Update GGML to b6646 (#12245)

Notable EOLs with this change:
- MacOS v12 and v13 are no longer supported (v14+ required)
- AMD gfx900 and gfx906 are no longer supported
This commit is contained in:
Daniel Hiltgen
2025-10-02 14:47:10 -07:00
committed by GitHub
parent fdb109469f
commit c68f367ef6
326 changed files with 30615 additions and 20624 deletions

View File

@@ -14,6 +14,7 @@
#include <climits>
#include <cmath>
#include <codecvt>
#include <chrono>
#include <cstdarg>
#include <cstring>
#include <ctime>
@@ -41,6 +42,7 @@
#endif
#include <locale>
#include <windows.h>
#include <string.h>
#include <fcntl.h>
#include <io.h>
#else
@@ -49,6 +51,11 @@
#include <unistd.h>
#endif
#if defined(__linux__)
#include <sys/types.h>
#include <pwd.h>
#endif
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
@@ -557,13 +564,6 @@ std::string string_from(const struct llama_context * ctx, const std::vector<llam
auto detokenized = common_token_to_piece(ctx, token);
detokenized.erase(
std::remove_if(
detokenized.begin(),
detokenized.end(),
[](const unsigned char c) { return !std::isprint(c); }),
detokenized.end());
buf << "'" << detokenized << "'"
<< ":" << std::to_string(token);
}
@@ -588,13 +588,6 @@ std::string string_from(const struct llama_context * ctx, const struct llama_bat
auto detokenized = common_token_to_piece(ctx, batch.token[i]);
detokenized.erase(
std::remove_if(
detokenized.begin(),
detokenized.end(),
[](const unsigned char c) { return !std::isprint(c); }),
detokenized.end());
buf << "\n" << std::to_string(i)
<< ", token '" << detokenized << "'"
<< ", pos " << std::to_string(batch.pos[i])
@@ -877,8 +870,20 @@ std::string fs_get_cache_directory() {
#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) || defined(__OpenBSD__)
if (std::getenv("XDG_CACHE_HOME")) {
cache_directory = std::getenv("XDG_CACHE_HOME");
} else {
} else if (std::getenv("HOME")) {
cache_directory = std::getenv("HOME") + std::string("/.cache/");
} else {
#if defined(__linux__)
/* no $HOME is defined, fallback to getpwuid */
struct passwd *pw = getpwuid(getuid());
if ((!pw) || (!pw->pw_dir)) {
throw std::runtime_error("Failed to find $HOME directory");
}
cache_directory = std::string(pw->pw_dir) + std::string("/.cache/");
#else /* defined(__linux__) */
throw std::runtime_error("Failed to find $HOME directory");
#endif /* defined(__linux__) */
}
#elif defined(__APPLE__)
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
@@ -914,7 +919,8 @@ struct common_init_result common_init_from_params(common_params & params) {
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
if (model == NULL) {
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str());
LOG_ERR("%s: failed to load model '%s', try reducing --n-gpu-layers if you're running out of VRAM\n",
__func__, params.model.path.c_str());
return iparams;
}
@@ -924,7 +930,8 @@ struct common_init_result common_init_from_params(common_params & params) {
llama_context * lctx = llama_init_from_model(model, cparams);
if (lctx == NULL) {
LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.path.c_str());
LOG_ERR("%s: failed to create context with model '%s', try reducing --n-gpu-layers if you're running out of VRAM\n",
__func__, params.model.path.c_str());
llama_model_free(model);
return iparams;
}
@@ -971,15 +978,13 @@ struct common_init_result common_init_from_params(common_params & params) {
bool has_eos = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
bool has_sep = llama_vocab_sep(vocab) != LLAMA_TOKEN_NULL;
bool has_rerank_prompt = llama_model_chat_template(model, "rerank") != NULL;
if (!has_eos && !has_sep) {
LOG_WRN("%s: warning: vocab does not have an EOS token or SEP token, reranking will not work\n", __func__);
if (!has_eos && !has_sep && !has_rerank_prompt) {
LOG_WRN("%s: warning: vocab does not have an EOS token, SEP token, or rerank prompt. Reranking will not work\n", __func__);
ok = false;
} else if (!has_eos) {
LOG_WRN("%s: warning: vocab does not have an EOS token, using SEP token as fallback\n", __func__);
} else if (!has_sep) {
LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
ok = false;
}
if (!ok) {
@@ -1001,7 +1006,12 @@ struct common_init_result common_init_from_params(common_params & params) {
return iparams;
}
char buf[1024];
la.ptr = lora.get();
llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", buf, sizeof(buf));
la.task_name = buf;
llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", buf, sizeof(buf));
la.prompt_prefix = buf;
iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
}
@@ -1165,11 +1175,10 @@ struct llama_context_params common_context_params_to_llama(const common_params &
cparams.yarn_orig_ctx = params.yarn_orig_ctx;
cparams.pooling_type = params.pooling_type;
cparams.attention_type = params.attention_type;
cparams.defrag_thold = params.defrag_thold;
cparams.flash_attn_type = params.flash_attn_type;
cparams.cb_eval = params.cb_eval;
cparams.cb_eval_user_data = params.cb_eval_user_data;
cparams.offload_kqv = !params.no_kv_offload;
cparams.flash_attn = params.flash_attn;
cparams.no_perf = params.no_perf;
cparams.op_offload = !params.no_op_offload;
cparams.swa_full = params.swa_full;
@@ -1565,3 +1574,56 @@ ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std
return result;
}
ggml_opt_optimizer_params common_opt_lr_pars(void * userdata) {
ggml_opt_optimizer_params result = ggml_opt_get_default_optimizer_params(nullptr);
const lr_opt & d = *(lr_opt *) userdata;
result.adamw.alpha = result.sgd.alpha = d.get_lr(d.epoch);
result.sgd.wd = result.adamw.wd = d.wd;
return result;
}
// TODO make all command line args case-insensitive
static inline bool eq_case_insensitive(char const* a, char const* b) {
return !
#if defined(_MSC_VER)
_stricmp
#else
strcasecmp
#endif // defined(_MSC_VER)
(a, b);
}
enum ggml_opt_optimizer_type common_opt_get_optimizer(const char * n) {
if (eq_case_insensitive("adamw", n)) {
return GGML_OPT_OPTIMIZER_TYPE_ADAMW;
}
if (eq_case_insensitive("sgd", n)) {
return GGML_OPT_OPTIMIZER_TYPE_SGD;
}
return GGML_OPT_OPTIMIZER_TYPE_COUNT;
}
// TODO simplify to use just log and exp
static float const k_log_2 = std::log(2.f);
void lr_opt::init() {
if (lr_min > 0 && lr_min < lr0) {
float nhalf = std::log(lr0 / lr_min) / k_log_2;
float e = epochs;
if (decay_epochs > 0 && decay_epochs < e) {
e = decay_epochs;
} else {
decay_epochs = e;
}
scale_epoch = nhalf / e;
}
}
float lr_opt::get_lr(float epoch) const {
float r = lr_min <= 0 ? lr0 :
epoch >= decay_epochs ? lr_min :
lr0 * std::pow(0.5f, epoch * scale_epoch);
LOG_INF("epoch %.2g lr=%.2g\n", epoch, r);
return r;
}