diff --git a/llama/ggml-cuda.cu b/llama/ggml-cuda.cu index e41ed50f3..439f9777d 100644 --- a/llama/ggml-cuda.cu +++ b/llama/ggml-cuda.cu @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-cuda.h b/llama/ggml-cuda.h index 27aae6dc4..2c481bbed 100644 --- a/llama/ggml-cuda.h +++ b/llama/ggml-cuda.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-metal.h b/llama/ggml-metal.h index 57264feb6..bb93cde3b 100644 --- a/llama/ggml-metal.h +++ b/llama/ggml-metal.h @@ -1,7 +1,7 @@ //go:build darwin /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-metal.m b/llama/ggml-metal.m index 709feea47..5fbf52545 100644 --- a/llama/ggml-metal.m +++ b/llama/ggml-metal.m @@ -1,7 +1,7 @@ //go:build darwin /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-metal.metal b/llama/ggml-metal.metal index b8397d9fc..0009f091e 100644 --- a/llama/ggml-metal.metal +++ b/llama/ggml-metal.metal @@ -1,7 +1,7 @@ //go:build darwin /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-mpi.c b/llama/ggml-mpi.c index 82b93bb3a..2ba3727a5 100644 --- a/llama/ggml-mpi.c +++ b/llama/ggml-mpi.c @@ -1,7 +1,7 @@ //go:build mpi /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-mpi.h b/llama/ggml-mpi.h index 618627409..414fa1f73 100644 --- a/llama/ggml-mpi.h +++ b/llama/ggml-mpi.h @@ -1,7 +1,7 @@ //go:build mpi /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-opencl.cpp b/llama/ggml-opencl.cpp index b392f0b33..56650fceb 100644 --- a/llama/ggml-opencl.cpp +++ b/llama/ggml-opencl.cpp @@ -1,7 +1,7 @@ //go:build opencl /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml-opencl.h b/llama/ggml-opencl.h index 94043893c..b05ac6f59 100644 --- a/llama/ggml-opencl.h +++ b/llama/ggml-opencl.h @@ -1,7 +1,7 @@ //go:build opencl /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml.c b/llama/ggml.c index cc9c594f1..854606c6d 100644 --- a/llama/ggml.c +++ b/llama/ggml.c @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/ggml.h b/llama/ggml.h index 3069ae585..0c2c6e918 100644 --- a/llama/ggml.h +++ b/llama/ggml.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/k_quants.c b/llama/k_quants.c index e6bd7889e..856497552 100644 --- a/llama/k_quants.c +++ b/llama/k_quants.c @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/k_quants.h b/llama/k_quants.h index 177de2d31..88a9d4394 100644 --- a/llama/k_quants.h +++ b/llama/k_quants.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/llama-util.h b/llama/llama-util.h index 0424ed652..0f910783f 100644 --- a/llama/llama-util.h +++ b/llama/llama-util.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/llama.cpp b/llama/llama.cpp index 25c29e79f..c5f15d9ac 100644 --- a/llama/llama.cpp +++ b/llama/llama.cpp @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * @@ -3689,7 +3689,7 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) { const auto & kv_self = ctx->kv_self; const auto & hparams = ctx->model.hparams; const int n_layer = hparams.n_layer; - const int n_embd = hparams.n_embd; + const int n_embd = hparams.n_embd_gqa(); const int n_ctx = hparams.n_ctx; const size_t kv_size = kv_self.buf.size; @@ -3792,7 +3792,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { const auto & kv_self = ctx->kv_self; const auto & hparams = ctx->model.hparams; const int n_layer = hparams.n_layer; - const int n_embd = hparams.n_embd; + const int n_embd = hparams.n_embd_gqa(); const int n_ctx = hparams.n_ctx; size_t kv_size; diff --git a/llama/llama.h b/llama/llama.h index 4a92566fd..d3de28f30 100644 --- a/llama/llama.h +++ b/llama/llama.h @@ -1,5 +1,5 @@ /** - * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b + * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2 * * MIT License * diff --git a/llama/update-llama-cpp.sh b/llama/update-llama-cpp.sh old mode 100644 new mode 100755