Fix image cannot be seen with slice image on llama engine

Ollama's recent engine update, llama.cpp, caused all models requiring a slice schema to not display images. As a result, the value of numTokens isn't always the length of the sliced image embed, but rather the end length of the schema. This causes the image embed to not be correctly included during all slice processing.
2025-10-09 22:53:26 +02:00 · 2025-09-13 07:25:12 +08:00
parent 44a6792873
commit 053092185e
1 changed files with 17 additions and 16 deletions
--- a/llama/llama.go
+++ b/llama/llama.go
@@ -515,33 +515,34 @@ func (c *MtmdContext) NewEmbed(llamaContext *Context, data []byte) ([][]float32,
 	}
 	nChunks := C.mtmd_input_chunks_size(ic)
 	numEmbed := llamaContext.Model().NEmbd()
-	lastChunkSize := 0
+	embed := make([][]float32, 0)
 	for i := range int(nChunks) {
 		chunk := C.mtmd_input_chunks_get(ic, C.size_t(i))
 		numTokens := int(C.mtmd_input_chunk_get_n_tokens(chunk))
-		lastChunkSize = numTokens
+		slog.Debug("chunk tokens", "index", i, "numTokens", numTokens)
 		// Encode the chunk
 		if C.int32_t(0) != C.mtmd_encode_chunk(c.c, chunk) {
 			return nil, errors.New("unable to encode mtmd image chunk")
 		}
 	}
-	// Get the embeddings
+		// Get the embeddings for this chunk
-	embed := make([][]float32, lastChunkSize)
+		chunkEmbed := make([][]float32, numTokens)
-	embd := C.mtmd_get_output_embd(c.c)
+		chunkEmbd := C.mtmd_get_output_embd(c.c)
-	if nil == embd {
+		if nil == chunkEmbd {
-		return nil, errors.New("failed to get image embedding")
+			continue
 		}
 		// Extend the embedding array for each token
-	s := unsafe.Slice((*float32)(embd), numEmbed*lastChunkSize)
+		s := unsafe.Slice((*float32)(chunkEmbd), numTokens*numEmbed)
 		rows := make([]float32, len(s))
 		copy(rows, s)
-	for i := range lastChunkSize {
+		for i := range numTokens {
-		embed[i] = rows[i*numEmbed : (i+1)*numEmbed]
+			chunkEmbed[i] = rows[i*numEmbed : (i+1)*numEmbed]
 		}
-
+		embed = append(embed, chunkEmbed...)
 	}
 	slog.Debug("image embeddings", "totalEmbeddings", len(embed))
 	return embed, nil
 }