diff --git a/llama/ggml-cuda.cu b/llama/ggml-cuda.cu
index e41ed50f3..439f9777d 100644
--- a/llama/ggml-cuda.cu
+++ b/llama/ggml-cuda.cu
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-cuda.h b/llama/ggml-cuda.h
index 27aae6dc4..2c481bbed 100644
--- a/llama/ggml-cuda.h
+++ b/llama/ggml-cuda.h
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-metal.h b/llama/ggml-metal.h
index 57264feb6..bb93cde3b 100644
--- a/llama/ggml-metal.h
+++ b/llama/ggml-metal.h
@@ -1,7 +1,7 @@
 //go:build darwin
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-metal.m b/llama/ggml-metal.m
index 709feea47..5fbf52545 100644
--- a/llama/ggml-metal.m
+++ b/llama/ggml-metal.m
@@ -1,7 +1,7 @@
 //go:build darwin
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-metal.metal b/llama/ggml-metal.metal
index b8397d9fc..0009f091e 100644
--- a/llama/ggml-metal.metal
+++ b/llama/ggml-metal.metal
@@ -1,7 +1,7 @@
 //go:build darwin
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-mpi.c b/llama/ggml-mpi.c
index 82b93bb3a..2ba3727a5 100644
--- a/llama/ggml-mpi.c
+++ b/llama/ggml-mpi.c
@@ -1,7 +1,7 @@
 //go:build mpi
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-mpi.h b/llama/ggml-mpi.h
index 618627409..414fa1f73 100644
--- a/llama/ggml-mpi.h
+++ b/llama/ggml-mpi.h
@@ -1,7 +1,7 @@
 //go:build mpi
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-opencl.cpp b/llama/ggml-opencl.cpp
index b392f0b33..56650fceb 100644
--- a/llama/ggml-opencl.cpp
+++ b/llama/ggml-opencl.cpp
@@ -1,7 +1,7 @@
 //go:build opencl
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml-opencl.h b/llama/ggml-opencl.h
index 94043893c..b05ac6f59 100644
--- a/llama/ggml-opencl.h
+++ b/llama/ggml-opencl.h
@@ -1,7 +1,7 @@
 //go:build opencl
 
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml.c b/llama/ggml.c
index cc9c594f1..854606c6d 100644
--- a/llama/ggml.c
+++ b/llama/ggml.c
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/ggml.h b/llama/ggml.h
index 3069ae585..0c2c6e918 100644
--- a/llama/ggml.h
+++ b/llama/ggml.h
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/k_quants.c b/llama/k_quants.c
index e6bd7889e..856497552 100644
--- a/llama/k_quants.c
+++ b/llama/k_quants.c
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/k_quants.h b/llama/k_quants.h
index 177de2d31..88a9d4394 100644
--- a/llama/k_quants.h
+++ b/llama/k_quants.h
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/llama-util.h b/llama/llama-util.h
index 0424ed652..0f910783f 100644
--- a/llama/llama-util.h
+++ b/llama/llama-util.h
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/llama.cpp b/llama/llama.cpp
index 25c29e79f..c5f15d9ac 100644
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
@@ -3689,7 +3689,7 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
         const auto & kv_self = ctx->kv_self;
         const auto & hparams = ctx->model.hparams;
         const int    n_layer = hparams.n_layer;
-        const int    n_embd  = hparams.n_embd;
+        const int    n_embd  = hparams.n_embd_gqa();
         const int    n_ctx   = hparams.n_ctx;
 
         const size_t kv_size = kv_self.buf.size;
@@ -3792,7 +3792,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
         const auto & kv_self = ctx->kv_self;
         const auto & hparams = ctx->model.hparams;
         const int    n_layer = hparams.n_layer;
-        const int    n_embd  = hparams.n_embd;
+        const int    n_embd  = hparams.n_embd_gqa();
         const int    n_ctx   = hparams.n_ctx;
 
         size_t kv_size;
diff --git a/llama/llama.h b/llama/llama.h
index 4a92566fd..d3de28f30 100644
--- a/llama/llama.h
+++ b/llama/llama.h
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - git 7c529cede6e84054e77a3eceab31c53de7b2f55b
+ * llama.cpp - git d91f3f0c55663719ea03b76311e8c36ed55eb0e2
  *
  * MIT License
  *
diff --git a/llama/update-llama-cpp.sh b/llama/update-llama-cpp.sh
old mode 100644
new mode 100755