skip tokenizer.model if possible (#11050)

if tokenizer.json is already copied, skip tokenizer.model
This commit is contained in:
Michael Yang
2025-06-11 12:10:35 -07:00
committed by GitHub
parent 2e77aa1ae7
commit 0dabb4ef6a

View File

@ -292,13 +292,18 @@ func filesForModel(path string) ([]string, error) {
} }
files = append(files, js...) files = append(files, js...)
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 { // only include tokenizer.model is tokenizer.json is not present
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob if !slices.ContainsFunc(files, func(s string) bool {
// tokenizer.model might be a unresolved git lfs reference; error if it is return slices.Contains(strings.Split(s, string(os.PathSeparator)), "tokenizer.json")
files = append(files, tks...) }) {
} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 { if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B) // add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
files = append(files, tks...) // tokenizer.model might be a unresolved git lfs reference; error if it is
files = append(files, tks...)
} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
files = append(files, tks...)
}
} }
return files, nil return files, nil