mirror of
https://github.com/ollama/ollama.git
synced 2025-11-10 20:17:59 +01:00
- Both `/api/generate` and `/api/chat` now accept a `"think"` option that allows specifying whether thinking mode should be on or not - Templates get passed this new option so, e.g., qwen3's template can put `/think` or `/no_think` in the system prompt depending on the value of the setting - Models' thinking support is inferred by inspecting model templates. The prefix and suffix the parser uses to identify thinking support is also automatically inferred from templates - Thinking control & parsing is opt-in via the API to prevent breaking existing API consumers. If the `"think"` option is not specified, the behavior is unchanged from previous versions of ollama - Add parsing for thinking blocks in both streaming/non-streaming mode in both `/generate` and `/chat` - Update the CLI to make use of these changes. Users can pass `--think` or `--think=false` to control thinking, or during an interactive session they can use the commands `/set think` or `/set nothink` - A `--hidethinking` option has also been added to the CLI. This makes it easy to use thinking in scripting scenarios like `ollama run qwen3 --think --hidethinking "my question here"` where you just want to see the answer but still want the benefits of thinking models
64 lines
1.6 KiB
Go
64 lines
1.6 KiB
Go
package cmd
|
|
|
|
import (
|
|
"encoding/json"
|
|
"io"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"os"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/ollama/ollama/api"
|
|
"github.com/ollama/ollama/types/model"
|
|
)
|
|
|
|
// Test that a warning is printed when thinking is requested but not supported.
|
|
func TestWarnMissingThinking(t *testing.T) {
|
|
cases := []struct {
|
|
capabilities []model.Capability
|
|
expectWarn bool
|
|
}{
|
|
{capabilities: []model.Capability{model.CapabilityThinking}, expectWarn: false},
|
|
{capabilities: []model.Capability{}, expectWarn: true},
|
|
}
|
|
|
|
for _, tc := range cases {
|
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/api/show" || r.Method != http.MethodPost {
|
|
t.Fatalf("unexpected request to %s %s", r.URL.Path, r.Method)
|
|
}
|
|
var req api.ShowRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
t.Fatalf("decode request: %v", err)
|
|
}
|
|
resp := api.ShowResponse{Capabilities: tc.capabilities}
|
|
if err := json.NewEncoder(w).Encode(resp); err != nil {
|
|
t.Fatalf("encode response: %v", err)
|
|
}
|
|
}))
|
|
defer srv.Close()
|
|
|
|
t.Setenv("OLLAMA_HOST", srv.URL)
|
|
client, err := api.ClientFromEnvironment()
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
oldStderr := os.Stderr
|
|
r, w, _ := os.Pipe()
|
|
os.Stderr = w
|
|
ensureThinkingSupport(t.Context(), client, "m")
|
|
w.Close()
|
|
os.Stderr = oldStderr
|
|
out, _ := io.ReadAll(r)
|
|
|
|
warned := strings.Contains(string(out), "warning:")
|
|
if tc.expectWarn && !warned {
|
|
t.Errorf("expected warning, got none")
|
|
}
|
|
if !tc.expectWarn && warned {
|
|
t.Errorf("did not expect warning, got: %s", string(out))
|
|
}
|
|
}
|
|
}
|