Open
Description
Description
I am trying to use multimodal model output, and the code is as follows, but it seems that the returns are all garbled. Are there any related use cases?
// 1. 设置模型路径
using LLama.Common;
using LLama;
using LLama.Sampling;
string modelPath = "H:\\workspace\\gpt\\models\\Qwen2.5-VL-3B\\Qwen2.5-VL-3B-Instruct-UD-Q5_K_XL.gguf"; // 替换为您的模型文件路径
string mmProjPath = "H:\\workspace\\gpt\\models\\Qwen2.5-VL-3B\\mmproj-BF16.gguf"; // 替换为您的多模态投影文件路径(如果需要)
string imagePath = "test.jpeg"; // 替换为您要描述的图片路径
// 2. 加载模型
var modelParams = new ModelParams(modelPath)
{
ContextSize = 128000, // 设置足够大的上下文窗口
GpuLayerCount = 36 // 如果有GPU,可以设置为更高的值
};
// 3. 加载LLM模型和多模态投影模型
using var weights = await LLamaWeights.LoadFromFileAsync(modelParams);
using var llavaWeights = await LLavaWeights.LoadFromFileAsync(mmProjPath);
// 4. 创建上下文和执行器
using var context = weights.CreateContext(modelParams);
var executor = new InteractiveExecutor(context, llavaWeights);
// 5. 准备带有图片标记的提示
string prompt = "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|><image>\nDescribe this image in chinese.<|im_end|>\n<|im_start|>assistant\n";
InferenceParams inferenceParams = new InferenceParams()
{
MaxTokens = 512, // 设置最大生成token数
SamplingPipeline = new DefaultSamplingPipeline()
{
Temperature = 0.001f, // 设置温度
},
AntiPrompts = new List<string>()
{
"<|im_start|>user",
"<|im_end|>",
},
};
// 6. 加载图片
byte[] imageData = File.ReadAllBytes(imagePath);
executor.Images.Add(imageData);
// 7. 生成描述
Console.WriteLine("正在分析图片...");
await foreach (var text in executor.InferAsync(prompt, inferenceParams))
{
Console.Write(text);
}
Metadata
Metadata
Assignees
Labels
No labels