remove context shifting with max tokens and update docs

2025-04-07 03:18:24 +02:00 · 2025-01-30 13:48:24 -08:00 · 2025-01-30 13:48:24 -08:00 · 16abd181a9
commit 16abd181a9
parent 5c2f35d846
3 changed files with 36 additions and 32 deletions
--- a/docs/openai.md
+++ b/docs/openai.md
@ -94,6 +94,20 @@ except Exception as e:
    print(f"Error: {e}")
 ```

+#### Experimental 
+
+- `num_ctx` parameter can be used to set the context window for the model
+- OpenAI Python SDK does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter
+
+- The recommended way to control this is through the [Ollama Python SDK](https://github.com/ollama/ollama-python) with the `options` parameter
+```py
+completion = client.beta.chat.completions.create(
+    model="llama3.1:8b",
+    messages=[{"role": "user", "content": "Say this is a test"}],
+    extra_body={"num_ctx": 4096},
+)
+```
+
 ### OpenAI JavaScript library

 ```javascript
@ -142,6 +156,21 @@ const embedding = await openai.embeddings.create({
 })
 ```

+#### Experimental
+
+- `num_ctx` parameter can be used to set the context window for the model
+- OpenAI JS SDK does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the [OpenAI JS SDK](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests)
+
+- The recommended way to control this is through the [Ollama JS SDK](https://github.com/ollama/ollama-js) with the `options` parameter
+```js
+const chatCompletion = await openai.chat.completions.create({
+    messages: [{ role: 'user', content: 'Say this is a test' }],
+    model: 'llama3.2',
+    // @ts-expect-error num_ctx is not officially supported
+    num_ctx: 4096,
+})
+```
+
 ### `curl`

 ``` shell
@ -213,6 +242,7 @@ curl http://localhost:11434/v1/embeddings \
 - [x] Chat completions
 - [x] Streaming
 - [x] JSON mode
+- [x] Structured outputs
 - [x] Reproducible outputs
 - [x] Vision
 - [x] Tools
--- a/openai/openai.go
+++ b/openai/openai.go
@ -477,24 +477,17 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 		options["stop"] = stops
 	}

+	if r.NumCtx != nil {
+		options["num_ctx"] = *r.NumCtx
+	}
+
 	// Deprecated: MaxTokens is deprecated, use MaxCompletionTokens instead
 	if r.MaxTokens != nil {
 		r.MaxCompletionTokens = r.MaxTokens
 	}

-	if r.NumCtx != nil {
-		options["num_ctx"] = *r.NumCtx
-	}
-
-	DEFAULT_NUM_CTX := 2048
-	// set num_ctx to max_completion_tokens if it's greater than num_ctx
 	if r.MaxCompletionTokens != nil {
 		options["num_predict"] = *r.MaxCompletionTokens
-		if r.NumCtx != nil && *r.MaxCompletionTokens > *r.NumCtx {
-			options["num_ctx"] = *r.MaxCompletionTokens
-		} else if *r.MaxCompletionTokens > DEFAULT_NUM_CTX {
-			options["num_ctx"] = *r.MaxCompletionTokens
-		}
 	}

 	if r.Temperature != nil {
--- a/openai/openai_test.go
+++ b/openai/openai_test.go
@ -81,7 +81,7 @@ func TestChatMiddleware(t *testing.T) {
 					{"role": "user", "content": "Hello"}
 				],
 				"stream":            true,
-				"max_completion_tokens":        999,
+				"max_tokens":        999,
 				"seed":              123,
 				"stop":              ["\n", "stop"],
 				"temperature":       3.0,
@ -333,7 +333,7 @@ func TestChatMiddleware(t *testing.T) {
 			},
 		},
 		{
-			name: "chat handler with max_completion_tokens < num_ctx",
+			name: "chat handler with max_completion_tokens",
 			body: `{
 				"model": "test-model",
 				"messages": [{"role": "user", "content": "Hello"}],
@ -350,25 +350,6 @@ func TestChatMiddleware(t *testing.T) {
 				Stream: &False,
 			},
 		},
-		{
-			name: "chat handler with max_completion_tokens > num_ctx",
-			body: `{
-				"model": "test-model",
-				"messages": [{"role": "user", "content": "Hello"}],
-				"max_completion_tokens": 4096
-			}`,
-			req: api.ChatRequest{
-				Model:    "test-model",
-				Messages: []api.Message{{Role: "user", Content: "Hello"}},
-				Options: map[string]any{
-					"num_predict": 4096.0, // float because JSON doesn't distinguish between float and int
-					"num_ctx":     4096.0,
-					"temperature": 1.0,
-					"top_p":       1.0,
-				},
-				Stream: &False,
-			},
-		},
 		{
 			name: "chat handler error forwarding",
 			body: `{