chore: update mllama to use ollama engine (#10637)

2025-11-12 10:28:00 +01:00 · 2025-05-13 17:36:02 -07:00
parent 0478d440f0
commit 23125648b8
67 changed files with 785 additions and 4354 deletions
--- a/llama/llama.cpp/src/llama-graph.h
+++ b/llama/llama.cpp/src/llama-graph.h
@@ -87,7 +87,6 @@ public:

    ggml_tensor * tokens = nullptr; // I32 [n_batch]
    ggml_tensor * embd   = nullptr; // F32 [n_embd, n_batch]
-    ggml_tensor * cross_attn_state; // F32 [4, n_embd, 1061]
 };

 class llm_graph_input_pos : public llm_graph_input_i {
@@ -285,16 +284,6 @@ public:
    const llama_cross * cross = nullptr;
 };

-class llm_graph_input_cross_attn_state : public llm_graph_input_i {
-public:
-    llm_graph_input_cross_attn_state()          = default;
-    virtual ~llm_graph_input_cross_attn_state() = default;
-
-    void set_input(const llama_ubatch * ubatch) override;
-
-    ggml_tensor * cross_attn_state; // F32 [4, n_embd, 1061]
-};
-
 //
 // llm_graph_result
 //
@@ -506,7 +495,6 @@ struct llm_graph_context {
    ggml_tensor * build_inp_cls() const;
    ggml_tensor * build_inp_s_copy() const;
    ggml_tensor * build_inp_s_mask() const;
-    ggml_tensor * build_inp_cross_attn_state() const;

    ggml_tensor * build_inp_cross_embd() const;
    ggml_tensor * build_inp_pos_bucket_enc() const;