mirror of
https://github.com/ollama/ollama.git
synced 2025-07-12 22:32:52 +02:00
do not reload the running llm when runtime params change (#840)
- only reload the running llm if the model has changed, or the options for loading the running model have changed - rename loaded llm to runner to differentiate from loaded model image - remove logic which keeps the first system prompt in the generation context
This commit is contained in:
52
api/types.go
52
api/types.go
@ -161,15 +161,10 @@ func (r *GenerateResponse) Summary() {
|
||||
}
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
Seed int `json:"seed,omitempty"`
|
||||
|
||||
// Backend options
|
||||
UseNUMA bool `json:"numa,omitempty"`
|
||||
|
||||
// Model options
|
||||
// Runner options which must be set when the model is loaded into memory
|
||||
type Runner struct {
|
||||
UseNUMA bool `json:"numa,omitempty"`
|
||||
NumCtx int `json:"num_ctx,omitempty"`
|
||||
NumKeep int `json:"num_keep,omitempty"`
|
||||
NumBatch int `json:"num_batch,omitempty"`
|
||||
NumGQA int `json:"num_gqa,omitempty"`
|
||||
NumGPU int `json:"num_gpu,omitempty"`
|
||||
@ -183,8 +178,15 @@ type Options struct {
|
||||
EmbeddingOnly bool `json:"embedding_only,omitempty"`
|
||||
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
|
||||
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
|
||||
NumThread int `json:"num_thread,omitempty"`
|
||||
}
|
||||
|
||||
// Predict options
|
||||
type Options struct {
|
||||
Runner
|
||||
|
||||
// Predict options used at runtime
|
||||
NumKeep int `json:"num_keep,omitempty"`
|
||||
Seed int `json:"seed,omitempty"`
|
||||
NumPredict int `json:"num_predict,omitempty"`
|
||||
TopK int `json:"top_k,omitempty"`
|
||||
TopP float32 `json:"top_p,omitempty"`
|
||||
@ -200,8 +202,6 @@ type Options struct {
|
||||
MirostatEta float32 `json:"mirostat_eta,omitempty"`
|
||||
PenalizeNewline bool `json:"penalize_newline,omitempty"`
|
||||
Stop []string `json:"stop,omitempty"`
|
||||
|
||||
NumThread int `json:"num_thread,omitempty"`
|
||||
}
|
||||
|
||||
var ErrInvalidOpts = fmt.Errorf("invalid options")
|
||||
@ -309,20 +309,22 @@ func DefaultOptions() Options {
|
||||
PenalizeNewline: true,
|
||||
Seed: -1,
|
||||
|
||||
// options set when the model is loaded
|
||||
NumCtx: 2048,
|
||||
RopeFrequencyBase: 10000.0,
|
||||
RopeFrequencyScale: 1.0,
|
||||
NumBatch: 512,
|
||||
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
||||
NumGQA: 1,
|
||||
NumThread: 0, // let the runtime decide
|
||||
LowVRAM: false,
|
||||
F16KV: true,
|
||||
UseMLock: false,
|
||||
UseMMap: true,
|
||||
UseNUMA: false,
|
||||
EmbeddingOnly: true,
|
||||
Runner: Runner{
|
||||
// options set when the model is loaded
|
||||
NumCtx: 2048,
|
||||
RopeFrequencyBase: 10000.0,
|
||||
RopeFrequencyScale: 1.0,
|
||||
NumBatch: 512,
|
||||
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
|
||||
NumGQA: 1,
|
||||
NumThread: 0, // let the runtime decide
|
||||
LowVRAM: false,
|
||||
F16KV: true,
|
||||
UseMLock: false,
|
||||
UseMMap: true,
|
||||
UseNUMA: false,
|
||||
EmbeddingOnly: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user