Starter examples for using Next.js and the Vercel AI SDK with Llama.cpp and ModelFusion.
MIT License
This starter example shows how to use Next.js, the Vercel AI SDK, Llama.cpp and ModelFusion to create a ChatGPT-like AI-powered streaming chat bot.
git clone https://github.com/lgrammel/modelfusion-llamacpp-nextjs-starter.git
npm install
npm run dev
For each example, you also need to download the GGUF model and start the Llama.cpp server:
./server -m models/llama-2-7b-chat.Q4_K_M.gguf
(with the right model path)app/api/llama/route.ts
./server -m models/mistral-7b-instruct-v0.2.Q4_K_M.gguf
(with the right model path)app/api/mistral/route.ts
./server -m models/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf
(with the right model path)app/api/mixtral/route.ts
./server -m models/openhermes-2.5-mistral-7b.Q4_K_M.gguf
(with the right model path)app/api/openhermes/route.ts
import { ModelFusionTextStream, asChatMessages } from "@modelfusion/vercel-ai";
import { Message, StreamingTextResponse } from "ai";
import { llamacpp, streamText, trimChatPrompt } from "modelfusion";
export const runtime = "edge";
export async function POST(req: Request) {
const { messages }: { messages: Message[] } = await req.json();
const model = llamacpp
.CompletionTextGenerator({
promptTemplate: llamacpp.prompt.Llama2, // choose the correct prompt template
temperature: 0,
cachePrompt: true,
contextWindowSize: 4096, // Llama 2 context window size
maxGenerationTokens: 512, // Room for answer
})
.withChatPrompt();
// Use ModelFusion to call llama.cpp:
const textStream = await streamText({
model,
// reduce chat prompt length to fit the context window:
prompt: await trimChatPrompt({
model,
prompt: {
system:
"You are an AI chat bot. " +
"Follow the user's instructions carefully.",
// map Vercel AI SDK Message to ModelFusion ChatMessage:
messages: asChatMessages(messages),
},
}),
});
// Return the result using the Vercel AI SDK:
return new StreamingTextResponse(
ModelFusionTextStream(
textStream,
// optional callbacks:
{
onStart() {
console.log("onStart");
},
onToken(token) {
console.log("onToken", token);
},
onCompletion: () => {
console.log("onCompletion");
},
onFinal(completion) {
console.log("onFinal", completion);
},
}
)
);
}