mirror of
https://github.com/ollama/ollama.git
synced 2025-10-09 22:53:26 +02:00
Fix image cannot be seen with slice image on llama engine
Ollama's recent engine update, llama.cpp, caused all models requiring a slice schema to not display images. As a result, the value of numTokens isn't always the length of the sliced image embed, but rather the end length of the schema. This causes the image embed to not be correctly included during all slice processing.
This commit is contained in:
@@ -515,33 +515,34 @@ func (c *MtmdContext) NewEmbed(llamaContext *Context, data []byte) ([][]float32,
|
|||||||
}
|
}
|
||||||
nChunks := C.mtmd_input_chunks_size(ic)
|
nChunks := C.mtmd_input_chunks_size(ic)
|
||||||
numEmbed := llamaContext.Model().NEmbd()
|
numEmbed := llamaContext.Model().NEmbd()
|
||||||
lastChunkSize := 0
|
embed := make([][]float32, 0)
|
||||||
for i := range int(nChunks) {
|
for i := range int(nChunks) {
|
||||||
chunk := C.mtmd_input_chunks_get(ic, C.size_t(i))
|
chunk := C.mtmd_input_chunks_get(ic, C.size_t(i))
|
||||||
numTokens := int(C.mtmd_input_chunk_get_n_tokens(chunk))
|
numTokens := int(C.mtmd_input_chunk_get_n_tokens(chunk))
|
||||||
lastChunkSize = numTokens
|
slog.Debug("chunk tokens", "index", i, "numTokens", numTokens)
|
||||||
|
|
||||||
// Encode the chunk
|
// Encode the chunk
|
||||||
if C.int32_t(0) != C.mtmd_encode_chunk(c.c, chunk) {
|
if C.int32_t(0) != C.mtmd_encode_chunk(c.c, chunk) {
|
||||||
return nil, errors.New("unable to encode mtmd image chunk")
|
return nil, errors.New("unable to encode mtmd image chunk")
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Get the embeddings
|
// Get the embeddings for this chunk
|
||||||
embed := make([][]float32, lastChunkSize)
|
chunkEmbed := make([][]float32, numTokens)
|
||||||
embd := C.mtmd_get_output_embd(c.c)
|
chunkEmbd := C.mtmd_get_output_embd(c.c)
|
||||||
if nil == embd {
|
if nil == chunkEmbd {
|
||||||
return nil, errors.New("failed to get image embedding")
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extend the embedding array for each token
|
// Extend the embedding array for each token
|
||||||
s := unsafe.Slice((*float32)(embd), numEmbed*lastChunkSize)
|
s := unsafe.Slice((*float32)(chunkEmbd), numTokens*numEmbed)
|
||||||
rows := make([]float32, len(s))
|
rows := make([]float32, len(s))
|
||||||
copy(rows, s)
|
copy(rows, s)
|
||||||
for i := range lastChunkSize {
|
for i := range numTokens {
|
||||||
embed[i] = rows[i*numEmbed : (i+1)*numEmbed]
|
chunkEmbed[i] = rows[i*numEmbed : (i+1)*numEmbed]
|
||||||
}
|
}
|
||||||
|
embed = append(embed, chunkEmbed...)
|
||||||
|
}
|
||||||
|
slog.Debug("image embeddings", "totalEmbeddings", len(embed))
|
||||||
return embed, nil
|
return embed, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user