mirror of
https://github.com/ollama/ollama.git
synced 2025-09-27 18:26:59 +02:00
Fix image cannot be seen with slice image on llama engine
Ollama's recent engine update, llama.cpp, caused all models requiring a slice schema to not display images. As a result, the value of numTokens isn't always the length of the sliced image embed, but rather the end length of the schema. This causes the image embed to not be correctly included during all slice processing.
This commit is contained in:
@@ -515,33 +515,34 @@ func (c *MtmdContext) NewEmbed(llamaContext *Context, data []byte) ([][]float32,
|
||||
}
|
||||
nChunks := C.mtmd_input_chunks_size(ic)
|
||||
numEmbed := llamaContext.Model().NEmbd()
|
||||
lastChunkSize := 0
|
||||
embed := make([][]float32, 0)
|
||||
for i := range int(nChunks) {
|
||||
chunk := C.mtmd_input_chunks_get(ic, C.size_t(i))
|
||||
numTokens := int(C.mtmd_input_chunk_get_n_tokens(chunk))
|
||||
lastChunkSize = numTokens
|
||||
slog.Debug("chunk tokens", "index", i, "numTokens", numTokens)
|
||||
|
||||
// Encode the chunk
|
||||
if C.int32_t(0) != C.mtmd_encode_chunk(c.c, chunk) {
|
||||
return nil, errors.New("unable to encode mtmd image chunk")
|
||||
}
|
||||
}
|
||||
|
||||
// Get the embeddings
|
||||
embed := make([][]float32, lastChunkSize)
|
||||
embd := C.mtmd_get_output_embd(c.c)
|
||||
if nil == embd {
|
||||
return nil, errors.New("failed to get image embedding")
|
||||
}
|
||||
// Get the embeddings for this chunk
|
||||
chunkEmbed := make([][]float32, numTokens)
|
||||
chunkEmbd := C.mtmd_get_output_embd(c.c)
|
||||
if nil == chunkEmbd {
|
||||
continue
|
||||
}
|
||||
|
||||
// Extend the embedding array for each token
|
||||
s := unsafe.Slice((*float32)(embd), numEmbed*lastChunkSize)
|
||||
rows := make([]float32, len(s))
|
||||
copy(rows, s)
|
||||
for i := range lastChunkSize {
|
||||
embed[i] = rows[i*numEmbed : (i+1)*numEmbed]
|
||||
// Extend the embedding array for each token
|
||||
s := unsafe.Slice((*float32)(chunkEmbd), numTokens*numEmbed)
|
||||
rows := make([]float32, len(s))
|
||||
copy(rows, s)
|
||||
for i := range numTokens {
|
||||
chunkEmbed[i] = rows[i*numEmbed : (i+1)*numEmbed]
|
||||
}
|
||||
embed = append(embed, chunkEmbed...)
|
||||
}
|
||||
|
||||
slog.Debug("image embeddings", "totalEmbeddings", len(embed))
|
||||
return embed, nil
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user