gpt-oss (#11672)

* bf16 * tests * gpt-oss * enable gptoss for engine * rough estimate * convert to mxfp4 * handle safetensors U8 * clamp glu/linear * update tokenizer * MXFP4 support This implements the Open Compute Microscaling (MX) FP4 format as a tensor type with backend implementations focusing on mulmat and mulmatid on CPU, CUDA, and Metal. * Unit tests for MXFP4 support This exercises various operations and shapes on both CPU and GPU (if detected on the system) * cuda graph * unit test adjustments * cuda: optimize memory access Read 4 bytes at a time (8 elements) when performing mul_mat_vec_mxfp4 * mac: fix crash on old macos versions cblas_sgemm is only supported on v13.3 and up, however bf16 is only supported on v14+ so we were falling back to ggml-blas and crashing on bf16 tensors. Checking for the function being null seems to be the simplest way to condittionally avoid registering the backend. * server: Minimum context length for gptoss This model requires a minimum context length of 8192 to function effectively. Users can set higher values through all normal mechanisms but lower values will be silently reset. * ggml: Multiply by numParallel for gptoss sliding window When computing the graph size estimate, the context size is already multiplied by numParallel so estimates reflect that. However, since sliding window models use a smaller, fixed context size, they need to manually take numParallel into account. * gpt-oss integration includes harmony parser and thinking levels, etc. * fix sync * fix tests * fix lint --------- Co-authored-by: Daniel Hiltgen <daniel@ollama.com> Co-authored-by: Jesse Gross <jesse@ollama.com> Co-authored-by: Devon Rifkin <drifkin@drifkin.net>
2025-12-08 17:31:23 +01:00 · 2025-08-05 12:21:16 -07:00
parent 0d38b66502
commit fa7776fd24
56 changed files with 6670 additions and 328 deletions
--- a/api/types.go
+++ b/api/types.go
@@ -85,10 +85,11 @@ type GenerateRequest struct {
 	Options map[string]any `json:"options"`

 	// Think controls whether thinking/reasoning models will think before
-	// responding. Needs to be a pointer so we can distinguish between false
+	// responding. Can be a boolean (true/false) or a string ("high", "medium", "low")
+	// for supported models. Needs to be a pointer so we can distinguish between false
 	// (request that thinking _not_ be used) and unset (use the old behavior
 	// before this option was introduced)
-	Think *bool `json:"think,omitempty"`
+	Think *ThinkValue `json:"think,omitempty"`
 }

 // ChatRequest describes a request sent by [Client.Chat].
@@ -116,8 +117,9 @@ type ChatRequest struct {
 	Options map[string]any `json:"options"`

 	// Think controls whether thinking/reasoning models will think before
-	// responding
-	Think *bool `json:"think,omitempty"`
+	// responding. Can be a boolean (true/false) or a string ("high", "medium", "low")
+	// for supported models.
+	Think *ThinkValue `json:"think,omitempty"`
 }

 type Tools []Tool
@@ -508,6 +510,8 @@ type GenerateResponse struct {
 	Context []int `json:"context,omitempty"`

 	Metrics
+
+	ToolCalls []ToolCall `json:"tool_calls,omitempty"`
 }

 // ModelDetails provides details about a model.
@@ -677,6 +681,113 @@ func DefaultOptions() Options {
 	}
 }

+// ThinkValue represents a value that can be a boolean or a string ("high", "medium", "low")
+type ThinkValue struct {
+	// Value can be a bool or string
+	Value interface{}
+}
+
+// IsValid checks if the ThinkValue is valid
+func (t *ThinkValue) IsValid() bool {
+	if t == nil || t.Value == nil {
+		return true // nil is valid (means not set)
+	}
+
+	switch v := t.Value.(type) {
+	case bool:
+		return true
+	case string:
+		return v == "high" || v == "medium" || v == "low"
+	default:
+		return false
+	}
+}
+
+// IsBool returns true if the value is a boolean
+func (t *ThinkValue) IsBool() bool {
+	if t == nil || t.Value == nil {
+		return false
+	}
+	_, ok := t.Value.(bool)
+	return ok
+}
+
+// IsString returns true if the value is a string
+func (t *ThinkValue) IsString() bool {
+	if t == nil || t.Value == nil {
+		return false
+	}
+	_, ok := t.Value.(string)
+	return ok
+}
+
+// AsBool returns the value as a bool (true if enabled in any way)
+func (t *ThinkValue) AsBool() bool {
+	if t == nil || t.Value == nil {
+		return false
+	}
+
+	switch v := t.Value.(type) {
+	case bool:
+		return v
+	case string:
+		// Any string value ("high", "medium", "low") means thinking is enabled
+		return v == "high" || v == "medium" || v == "low"
+	default:
+		return false
+	}
+}
+
+// AsString returns the value as a string
+func (t *ThinkValue) AsString() string {
+	if t == nil || t.Value == nil {
+		return ""
+	}
+
+	switch v := t.Value.(type) {
+	case string:
+		return v
+	case bool:
+		if v {
+			return "medium" // Default level when just true
+		}
+		return ""
+	default:
+		return ""
+	}
+}
+
+// UnmarshalJSON implements json.Unmarshaler
+func (t *ThinkValue) UnmarshalJSON(data []byte) error {
+	// Try to unmarshal as bool first
+	var b bool
+	if err := json.Unmarshal(data, &b); err == nil {
+		t.Value = b
+		return nil
+	}
+
+	// Try to unmarshal as string
+	var s string
+	if err := json.Unmarshal(data, &s); err == nil {
+		// Validate string values
+		if s != "high" && s != "medium" && s != "low" {
+			return fmt.Errorf("invalid think value: %q (must be \"high\", \"medium\", \"low\", true, or false)", s)
+		}
+		t.Value = s
+		return nil
+	}
+
+	return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\")")
+}
+
+// MarshalJSON implements json.Marshaler
+func (t *ThinkValue) MarshalJSON() ([]byte, error) {
+	if t == nil || t.Value == nil {
+		return []byte("null"), nil
+	}
+	return json.Marshal(t.Value)
+}
+
 type Duration struct {
 	time.Duration
 }