From be61a81758ecd7990e0bd4cdd77093ffc4ecb1ca Mon Sep 17 00:00:00 2001 From: Purinda Gunasekara Date: Tue, 21 Nov 2023 02:52:52 +1100 Subject: [PATCH] main-gpu argument is not getting passed to llamacpp, fixed. (#1192) --- llm/llama.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llm/llama.go b/llm/llama.go index 4481e97de..4eab751d6 100644 --- a/llm/llama.go +++ b/llm/llama.go @@ -339,6 +339,7 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers "--model", model, "--ctx-size", fmt.Sprintf("%d", opts.NumCtx), "--batch-size", fmt.Sprintf("%d", opts.NumBatch), + "--main-gpu", fmt.Sprintf("%d", opts.MainGPU), "--n-gpu-layers", fmt.Sprintf("%d", numGPU), "--embedding", } @@ -544,6 +545,7 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, "stream": true, "n_predict": llm.NumPredict, "n_keep": llm.NumKeep, + "main_gpu": llm.MainGPU, "temperature": llm.Temperature, "top_k": llm.TopK, "top_p": llm.TopP,