mirror of
https://github.com/ollama/ollama.git
synced 2025-04-08 11:58:07 +02:00
Bump llama.cpp to b1662 and set n_parallel=1
This commit is contained in:
parent
89bbaafa64
commit
9adca7f711
@ -160,7 +160,7 @@ func newExtServer(server extServer, model string, adapters, projectors []string,
|
||||
sparams.n_batch = C.uint(opts.NumBatch)
|
||||
sparams.n_gpu_layers = C.int(numGPU)
|
||||
sparams.main_gpu = C.int(opts.MainGPU)
|
||||
sparams.n_parallel = 2 // TODO - wire up concurrency
|
||||
sparams.n_parallel = 1 // TODO - wire up concurrency
|
||||
|
||||
// Always use the value encoded in the model
|
||||
sparams.rope_freq_base = 0.0
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit a7aee47b98e45539d491071b25778b833b77e387
|
||||
Subproject commit 328b83de23b33240e28f4e74900d1d06726f5eb1
|
@ -1,4 +1,4 @@
|
||||
From 087cf3300e973d7790db8f7cad01d2a790de38be Mon Sep 17 00:00:00 2001
|
||||
From b5e195803e2a989e57eef0010adce778df1e2d01 Mon Sep 17 00:00:00 2001
|
||||
From: Daniel Hiltgen <daniel@ollama.com>
|
||||
Date: Mon, 13 Nov 2023 12:25:58 -0800
|
||||
Subject: [PATCH] Expose callable API for server
|
||||
@ -46,7 +46,7 @@ index 859cd12..4ea47a7 100644
|
||||
+endif()
|
||||
\ No newline at end of file
|
||||
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
|
||||
index d0cd8e1..5f5d4c5 100644
|
||||
index 0403853..2084fd8 100644
|
||||
--- a/examples/server/server.cpp
|
||||
+++ b/examples/server/server.cpp
|
||||
@@ -5,6 +5,9 @@
|
||||
@ -59,15 +59,15 @@ index d0cd8e1..5f5d4c5 100644
|
||||
|
||||
#ifndef NDEBUG
|
||||
// crash the server in debug mode, otherwise send an http 500 error
|
||||
@@ -2632,6 +2635,7 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
|
||||
@@ -2643,6 +2646,7 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
|
||||
}
|
||||
}
|
||||
|
||||
+#ifndef LLAMA_SERVER_LIBRARY
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
// own arguments required by this example
|
||||
@@ -3066,3 +3070,273 @@ int main(int argc, char **argv)
|
||||
#if SERVER_VERBOSE != 1
|
||||
@@ -3123,3 +3127,273 @@ int main(int argc, char **argv)
|
||||
llama_backend_free();
|
||||
return 0;
|
||||
}
|
||||
@ -439,10 +439,10 @@ index 0000000..d22f1b6
|
||||
+#endif // LLAMA_SERVER_LIBRARY
|
||||
\ No newline at end of file
|
||||
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
|
||||
index 9e1acd3..ea64b55 100644
|
||||
index f20846f..9640cf3 100644
|
||||
--- a/ggml-cuda.cu
|
||||
+++ b/ggml-cuda.cu
|
||||
@@ -6505,6 +6505,7 @@ static cudaError_t ggml_cuda_cpy_tensor_2d(
|
||||
@@ -6757,6 +6757,7 @@ static cudaError_t ggml_cuda_cpy_tensor_2d(
|
||||
CUDA_CHECK(cudaGetDevice(&id));
|
||||
src_ptr = (char *) extra->data_device[id];
|
||||
} else {
|
||||
|
Loading…
x
Reference in New Issue
Block a user