From 0dabb4ef6a1aab240a59b6bb4ef82372d335e3a9 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Wed, 11 Jun 2025 12:10:35 -0700 Subject: [PATCH] skip tokenizer.model if possible (#11050) if tokenizer.json is already copied, skip tokenizer.model --- parser/parser.go | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 96eae9c04..d40a79c29 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -292,13 +292,18 @@ func filesForModel(path string) ([]string, error) { } files = append(files, js...) - if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 { - // add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob - // tokenizer.model might be a unresolved git lfs reference; error if it is - files = append(files, tks...) - } else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 { - // some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B) - files = append(files, tks...) + // only include tokenizer.model is tokenizer.json is not present + if !slices.ContainsFunc(files, func(s string) bool { + return slices.Contains(strings.Split(s, string(os.PathSeparator)), "tokenizer.json") + }) { + if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 { + // add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob + // tokenizer.model might be a unresolved git lfs reference; error if it is + files = append(files, tks...) + } else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 { + // some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B) + files = append(files, tks...) + } } return files, nil