diff --git a/server/internal/handler/skill.go b/server/internal/handler/skill.go index d64b4d671..60ada4ca7 100644 --- a/server/internal/handler/skill.go +++ b/server/internal/handler/skill.go @@ -19,11 +19,18 @@ import ( "github.com/multica-ai/multica/server/pkg/protocol" ) -// sanitizeNullBytes removes null bytes (0x00) from strings. -// PostgreSQL rejects null bytes in text columns with -// "invalid byte sequence for encoding UTF8: 0x00 (SQLSTATE 22021)". +// sanitizeNullBytes makes a string safe for a PostgreSQL TEXT column. +// +// Two failure modes covered: +// - Embedded NUL (0x00) — PG rejects with SQLSTATE 22021. Removed. +// - Other invalid-UTF-8 byte sequences (e.g. 0x91 = Windows-1252 smart +// quote, which crashed agent-template import of skills containing +// Windows-encoded prose). `strings.ToValidUTF8` drops them. +// +// Name is kept for compatibility with the many call sites; the behaviour +// is a strict superset of the original. func sanitizeNullBytes(s string) string { - return strings.ReplaceAll(s, "\x00", "") + return strings.ToValidUTF8(strings.ReplaceAll(s, "\x00", ""), "") } // --- Response structs --- @@ -481,7 +488,16 @@ func isCapError(err error) bool { // addFile appends a supporting file while enforcing the per-bundle caps. It // returns an error when either the file count or aggregate byte budget would // be exceeded so the caller fails the import instead of silently truncating. +// +// Binary files (images, fonts, archives) are silently skipped: their bytes +// can't survive a PG TEXT column (SQLSTATE 22021), and they're reference +// assets the agent never reads as text anyway. Logging the skip leaves a +// breadcrumb if a user expected one of these to import. func (s *importedSkill) addFile(path, content string) error { + if isLikelyBinaryFilePath(path) { + slog.Info("skill import: skipping binary file", "path", path, "size", len(content)) + return nil + } if len(s.files) >= maxImportFileCount { return fmt.Errorf("%w: import bundle exceeds %d file limit", errImportCapExceeded, maxImportFileCount) } @@ -493,6 +509,34 @@ func (s *importedSkill) addFile(path, content string) error { return nil } +// isLikelyBinaryFilePath reports whether the file's extension indicates a +// non-text payload. Conservative blacklist — extensions not on the list +// are assumed text and pass through. `sanitizeNullBytes` (called at PG +// insert time) is the second-line defence against any text file that +// turns out to have stray invalid-UTF-8 bytes. +func isLikelyBinaryFilePath(path string) bool { + ext := strings.ToLower(filepath.Ext(path)) + switch ext { + case + // images + ".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff", ".ico", ".heic", + // fonts + ".ttf", ".otf", ".woff", ".woff2", ".eot", + // archives + ".zip", ".gz", ".tar", ".bz2", ".7z", ".rar", + // documents (binary office) + ".pdf", ".docx", ".xlsx", ".pptx", ".doc", ".xls", ".ppt", + // media + ".mp3", ".mp4", ".wav", ".avi", ".mov", ".webm", ".m4a", ".flac", + // compiled / executable + ".exe", ".dll", ".so", ".dylib", ".class", ".jar", ".wasm", + // db / cache + ".db", ".sqlite", ".sqlite3", ".pyc": + return true + } + return false +} + // --- ClawHub types --- type clawhubGetSkillResponse struct {