remove context shifting with max tokens and update docs

This commit is contained in:
ParthSareen 2025-01-30 13:48:24 -08:00
parent 5c2f35d846
commit 16abd181a9
3 changed files with 36 additions and 32 deletions

View File

@ -94,6 +94,20 @@ except Exception as e:
print(f"Error: {e}")
```
#### Experimental
- `num_ctx` parameter can be used to set the context window for the model
- OpenAI Python SDK does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter
- The recommended way to control this is through the [Ollama Python SDK](https://github.com/ollama/ollama-python) with the `options` parameter
```py
completion = client.beta.chat.completions.create(
model="llama3.1:8b",
messages=[{"role": "user", "content": "Say this is a test"}],
extra_body={"num_ctx": 4096},
)
```
### OpenAI JavaScript library
```javascript
@ -142,6 +156,21 @@ const embedding = await openai.embeddings.create({
})
```
#### Experimental
- `num_ctx` parameter can be used to set the context window for the model
- OpenAI JS SDK does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the [OpenAI JS SDK](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests)
- The recommended way to control this is through the [Ollama JS SDK](https://github.com/ollama/ollama-js) with the `options` parameter
```js
const chatCompletion = await openai.chat.completions.create({
messages: [{ role: 'user', content: 'Say this is a test' }],
model: 'llama3.2',
// @ts-expect-error num_ctx is not officially supported
num_ctx: 4096,
})
```
### `curl`
``` shell
@ -213,6 +242,7 @@ curl http://localhost:11434/v1/embeddings \
- [x] Chat completions
- [x] Streaming
- [x] JSON mode
- [x] Structured outputs
- [x] Reproducible outputs
- [x] Vision
- [x] Tools

View File

@ -477,24 +477,17 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
options["stop"] = stops
}
if r.NumCtx != nil {
options["num_ctx"] = *r.NumCtx
}
// Deprecated: MaxTokens is deprecated, use MaxCompletionTokens instead
if r.MaxTokens != nil {
r.MaxCompletionTokens = r.MaxTokens
}
if r.NumCtx != nil {
options["num_ctx"] = *r.NumCtx
}
DEFAULT_NUM_CTX := 2048
// set num_ctx to max_completion_tokens if it's greater than num_ctx
if r.MaxCompletionTokens != nil {
options["num_predict"] = *r.MaxCompletionTokens
if r.NumCtx != nil && *r.MaxCompletionTokens > *r.NumCtx {
options["num_ctx"] = *r.MaxCompletionTokens
} else if *r.MaxCompletionTokens > DEFAULT_NUM_CTX {
options["num_ctx"] = *r.MaxCompletionTokens
}
}
if r.Temperature != nil {

View File

@ -81,7 +81,7 @@ func TestChatMiddleware(t *testing.T) {
{"role": "user", "content": "Hello"}
],
"stream": true,
"max_completion_tokens": 999,
"max_tokens": 999,
"seed": 123,
"stop": ["\n", "stop"],
"temperature": 3.0,
@ -333,7 +333,7 @@ func TestChatMiddleware(t *testing.T) {
},
},
{
name: "chat handler with max_completion_tokens < num_ctx",
name: "chat handler with max_completion_tokens",
body: `{
"model": "test-model",
"messages": [{"role": "user", "content": "Hello"}],
@ -350,25 +350,6 @@ func TestChatMiddleware(t *testing.T) {
Stream: &False,
},
},
{
name: "chat handler with max_completion_tokens > num_ctx",
body: `{
"model": "test-model",
"messages": [{"role": "user", "content": "Hello"}],
"max_completion_tokens": 4096
}`,
req: api.ChatRequest{
Model: "test-model",
Messages: []api.Message{{Role: "user", Content: "Hello"}},
Options: map[string]any{
"num_predict": 4096.0, // float because JSON doesn't distinguish between float and int
"num_ctx": 4096.0,
"temperature": 1.0,
"top_p": 1.0,
},
Stream: &False,
},
},
{
name: "chat handler error forwarding",
body: `{