mirror of
https://github.com/ollama/ollama.git
synced 2025-04-07 03:18:24 +02:00
remove context shifting with max tokens and update docs
This commit is contained in:
parent
5c2f35d846
commit
16abd181a9
@ -94,6 +94,20 @@ except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
```
|
||||
|
||||
#### Experimental
|
||||
|
||||
- `num_ctx` parameter can be used to set the context window for the model
|
||||
- OpenAI Python SDK does not support setting context window size, however this can be set for Ollama through the `extra_body` parameter
|
||||
|
||||
- The recommended way to control this is through the [Ollama Python SDK](https://github.com/ollama/ollama-python) with the `options` parameter
|
||||
```py
|
||||
completion = client.beta.chat.completions.create(
|
||||
model="llama3.1:8b",
|
||||
messages=[{"role": "user", "content": "Say this is a test"}],
|
||||
extra_body={"num_ctx": 4096},
|
||||
)
|
||||
```
|
||||
|
||||
### OpenAI JavaScript library
|
||||
|
||||
```javascript
|
||||
@ -142,6 +156,21 @@ const embedding = await openai.embeddings.create({
|
||||
})
|
||||
```
|
||||
|
||||
#### Experimental
|
||||
|
||||
- `num_ctx` parameter can be used to set the context window for the model
|
||||
- OpenAI JS SDK does not support setting context window size, however this can be set for Ollama by passing `num_ctx` directly with a `@ts-expect-error` as an undocumented parameter in the [OpenAI JS SDK](https://github.com/openai/openai-node?tab=readme-ov-file#making-customundocumented-requests)
|
||||
|
||||
- The recommended way to control this is through the [Ollama JS SDK](https://github.com/ollama/ollama-js) with the `options` parameter
|
||||
```js
|
||||
const chatCompletion = await openai.chat.completions.create({
|
||||
messages: [{ role: 'user', content: 'Say this is a test' }],
|
||||
model: 'llama3.2',
|
||||
// @ts-expect-error num_ctx is not officially supported
|
||||
num_ctx: 4096,
|
||||
})
|
||||
```
|
||||
|
||||
### `curl`
|
||||
|
||||
``` shell
|
||||
@ -213,6 +242,7 @@ curl http://localhost:11434/v1/embeddings \
|
||||
- [x] Chat completions
|
||||
- [x] Streaming
|
||||
- [x] JSON mode
|
||||
- [x] Structured outputs
|
||||
- [x] Reproducible outputs
|
||||
- [x] Vision
|
||||
- [x] Tools
|
||||
|
@ -477,24 +477,17 @@ func fromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
||||
options["stop"] = stops
|
||||
}
|
||||
|
||||
if r.NumCtx != nil {
|
||||
options["num_ctx"] = *r.NumCtx
|
||||
}
|
||||
|
||||
// Deprecated: MaxTokens is deprecated, use MaxCompletionTokens instead
|
||||
if r.MaxTokens != nil {
|
||||
r.MaxCompletionTokens = r.MaxTokens
|
||||
}
|
||||
|
||||
if r.NumCtx != nil {
|
||||
options["num_ctx"] = *r.NumCtx
|
||||
}
|
||||
|
||||
DEFAULT_NUM_CTX := 2048
|
||||
// set num_ctx to max_completion_tokens if it's greater than num_ctx
|
||||
if r.MaxCompletionTokens != nil {
|
||||
options["num_predict"] = *r.MaxCompletionTokens
|
||||
if r.NumCtx != nil && *r.MaxCompletionTokens > *r.NumCtx {
|
||||
options["num_ctx"] = *r.MaxCompletionTokens
|
||||
} else if *r.MaxCompletionTokens > DEFAULT_NUM_CTX {
|
||||
options["num_ctx"] = *r.MaxCompletionTokens
|
||||
}
|
||||
}
|
||||
|
||||
if r.Temperature != nil {
|
||||
|
@ -81,7 +81,7 @@ func TestChatMiddleware(t *testing.T) {
|
||||
{"role": "user", "content": "Hello"}
|
||||
],
|
||||
"stream": true,
|
||||
"max_completion_tokens": 999,
|
||||
"max_tokens": 999,
|
||||
"seed": 123,
|
||||
"stop": ["\n", "stop"],
|
||||
"temperature": 3.0,
|
||||
@ -333,7 +333,7 @@ func TestChatMiddleware(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "chat handler with max_completion_tokens < num_ctx",
|
||||
name: "chat handler with max_completion_tokens",
|
||||
body: `{
|
||||
"model": "test-model",
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
@ -350,25 +350,6 @@ func TestChatMiddleware(t *testing.T) {
|
||||
Stream: &False,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "chat handler with max_completion_tokens > num_ctx",
|
||||
body: `{
|
||||
"model": "test-model",
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"max_completion_tokens": 4096
|
||||
}`,
|
||||
req: api.ChatRequest{
|
||||
Model: "test-model",
|
||||
Messages: []api.Message{{Role: "user", Content: "Hello"}},
|
||||
Options: map[string]any{
|
||||
"num_predict": 4096.0, // float because JSON doesn't distinguish between float and int
|
||||
"num_ctx": 4096.0,
|
||||
"temperature": 1.0,
|
||||
"top_p": 1.0,
|
||||
},
|
||||
Stream: &False,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "chat handler error forwarding",
|
||||
body: `{
|
||||
|
Loading…
x
Reference in New Issue
Block a user