ollama/llama/amx.cpp

/**
 * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
 *
 * MIT License
 *
 * Copyright (c) 2023-2024 The ggml authors
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include "amx.h"
#include "common.h"
#include "mmq.h"
#include "ggml-backend-impl.h"
#include "ggml-backend.h"
#include "ggml-impl.h"
#include "ggml-cpu.h"
#include "ggml-cpu-traits.h"

#if defined(__gnu_linux__)
#include <sys/syscall.h>
#include <unistd.h>
#endif

#include <cstdlib>
#include <cstring>
#include <memory>

#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)

// AMX type_trais
namespace ggml::cpu::amx {
class tensor_traits : public ggml::cpu::tensor_traits {
    bool work_size(int /* n_threads */, const struct ggml_tensor * op, size_t & size) override {
        size = ggml_backend_amx_desired_wsize(op);
        return true;
    }

    bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) override {
        if (op->op == GGML_OP_MUL_MAT) {
            ggml_backend_amx_mul_mat(params, op);
            return true;
        }
        return false;
    }
};

static ggml::cpu::tensor_traits * get_tensor_traits(ggml_backend_buffer_t, struct ggml_tensor *) {
    static tensor_traits traits;
    return &traits;
}
}  // namespace ggml::cpu::amx

// AMX buffer interface
static void ggml_backend_amx_buffer_free_buffer(ggml_backend_buffer_t buffer) {
    free(buffer->context);
}

static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) {
    return (void *) (buffer->context);
}

static void ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
    tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor);

    GGML_UNUSED(buffer);
}

static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
                                                  uint8_t value, size_t offset, size_t size) {
    memset((char *) tensor->data + offset, value, size);

    GGML_UNUSED(buffer);
}

static void ggml_backend_amx_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,
                                               const void * data, size_t offset, size_t size) {
    if (qtype_has_amx_kernels(tensor->type)) {
        GGML_LOG_DEBUG("%s: amx repack tensor %s of type %s\n", __func__, tensor->name, ggml_type_name(tensor->type));
        ggml_backend_amx_convert_weight(tensor, data, offset, size);
    } else {
        memcpy((char *) tensor->data + offset, data, size);
    }

    GGML_UNUSED(buffer);
}

/*
// need to figure what we need to do with buffer->extra.
static void ggml_backend_amx_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
    GGML_ASSERT(!qtype_has_amx_kernels(tensor->type));
    memcpy(data, (const char *)tensor->data + offset, size);

    GGML_UNUSED(buffer);
}

static bool ggml_backend_amx_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
    if (ggml_backend_buffer_is_host(src->buffer)) {
        if (qtype_has_amx_kernels(src->type)) {
            ggml_backend_amx_convert_weight(dst, src->data, 0, ggml_nbytes(dst));
        } else {
            memcpy(dst->data, src->data, ggml_nbytes(src));
        }
        return true;
    }
    return false;

    GGML_UNUSED(buffer);
}
*/

static void ggml_backend_amx_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
    memset(buffer->context, value, buffer->size);
}

static ggml_backend_buffer_i ggml_backend_amx_buffer_interface = {
    /* .free_buffer     = */ ggml_backend_amx_buffer_free_buffer,
    /* .get_base        = */ ggml_backend_amx_buffer_get_base,
    /* .init_tensor     = */ ggml_backend_amx_buffer_init_tensor,
    /* .memset_tensor   = */ ggml_backend_amx_buffer_memset_tensor,
    /* .set_tensor      = */ ggml_backend_amx_buffer_set_tensor,
    /* .get_tensor      = */ nullptr,
    /* .cpy_tensor      = */ nullptr,
    /* .clear           = */ ggml_backend_amx_buffer_clear,
    /* .reset           = */ nullptr,
};

static const char * ggml_backend_amx_buffer_type_get_name(ggml_backend_buffer_type_t buft) {
    return "AMX";

    GGML_UNUSED(buft);
}

static ggml_backend_buffer_t ggml_backend_amx_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
    void * data = ggml_aligned_malloc(size);
    if (data == NULL) {
        fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size);
        return NULL;
    }

    return ggml_backend_buffer_init(buft, ggml_backend_amx_buffer_interface, data, size);
}

static size_t ggml_backend_amx_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {
    return TENSOR_ALIGNMENT;

    GGML_UNUSED(buft);
}

namespace ggml::cpu::amx {
class extra_buffer_type : ggml::cpu::extra_buffer_type {
    bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
        // handle only 2d gemm for now
        auto is_contiguous_2d = [](const struct ggml_tensor * t) {
            return ggml_is_contiguous(t) && t->ne[3] == 1 && t->ne[2] == 1;
        };

        if (op->op == GGML_OP_MUL_MAT && is_contiguous_2d(op->src[0]) &&  // src0 must be contiguous
            is_contiguous_2d(op->src[1]) &&                               // src1 must be contiguous
            op->src[0]->buffer && op->src[0]->buffer->buft == ggml_backend_amx_buffer_type() &&
            op->ne[0] % (TILE_N * 2) == 0 &&                              // out_features is 32x
            (qtype_has_amx_kernels(op->src[0]->type) || (op->src[0]->type == GGML_TYPE_F16))) {
            // src1 must be host buffer
            if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
                return false;
            }
            // src1 must be float32
            if (op->src[1]->type == GGML_TYPE_F32) {
                return true;
            }
        }
        return false;
    }

    ggml::cpu::tensor_traits * get_tensor_traits(const struct ggml_tensor * op) override {
        if (op->op == GGML_OP_MUL_MAT && op->src[0]->buffer &&
            op->src[0]->buffer->buft == ggml_backend_amx_buffer_type()) {
            return (ggml::cpu::tensor_traits *) op->src[0]->extra;
        }

        return nullptr;
    }
};
}  // namespace ggml::cpu::amx

static size_t ggml_backend_amx_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
    return ggml_backend_amx_get_alloc_size(tensor);

    GGML_UNUSED(buft);
}

#define ARCH_GET_XCOMP_PERM     0x1022
#define ARCH_REQ_XCOMP_PERM     0x1023
#define XFEATURE_XTILECFG       17
#define XFEATURE_XTILEDATA      18

static bool ggml_amx_init() {
#if defined(__gnu_linux__)
    if (syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA)) {
        fprintf(stderr, "AMX is not ready to be used!\n");
        return false;
    }
    return true;
#elif defined(_WIN32)
    return true;
#endif
}

ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() {
    static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = {
        /* .iface = */ {
                        /* .get_name         = */ ggml_backend_amx_buffer_type_get_name,
                        /* .alloc_buffer     = */ ggml_backend_amx_buffer_type_alloc_buffer,
                        /* .get_alignment    = */ ggml_backend_amx_buffer_type_get_alignment,
                        /* .get_max_size     = */ nullptr,  // defaults to SIZE_MAX
                        /* .get_alloc_size   = */ ggml_backend_amx_buffer_type_get_alloc_size,
                        /* .is_host          = */ nullptr,
                        },
        /* .device  = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
        /* .context = */ new ggml::cpu::amx::extra_buffer_type(),
    };

    if (!ggml_amx_init()) {
        return nullptr;
    }

    return &ggml_backend_buffer_type_amx;
}

#endif  // defined(__AMX_INT8__) && defined(__AVX512VNNI__)
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`/**`
llama: update vendored code to commit 46e3556 (#8308) 2025-01-08 11:22:01 -08:00			`* llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`*`
			`* MIT License`
			`*`
			`* Copyright (c) 2023-2024 The ggml authors`
			`*`
			`* Permission is hereby granted, free of charge, to any person obtaining a copy`
			`* of this software and associated documentation files (the "Software"), to deal`
			`* in the Software without restriction, including without limitation the rights`
			`* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell`
			`* copies of the Software, and to permit persons to whom the Software is`
			`* furnished to do so, subject to the following conditions:`
			`*`
			`* The above copyright notice and this permission notice shall be included in all`
			`* copies or substantial portions of the Software.`
			`*`
			`* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE`
			`* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,`
			`* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE`
			`* SOFTWARE.`
			`*/`

			`#include "amx.h"`
			`#include "common.h"`
			`#include "mmq.h"`
			`#include "ggml-backend-impl.h"`
			`#include "ggml-backend.h"`
			`#include "ggml-impl.h"`
			`#include "ggml-cpu.h"`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`#include "ggml-cpu-traits.h"`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00
			`#if defined(__gnu_linux__)`
			`#include <sys/syscall.h>`
			`#include <unistd.h>`
			`#endif`

			`#include <cstdlib>`
			`#include <cstring>`
			`#include <memory>`

			`#if defined(__AMX_INT8__) && defined(__AVX512VNNI__)`

llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`// AMX type_trais`
			`namespace ggml::cpu::amx {`
			`class tensor_traits : public ggml::cpu::tensor_traits {`
			`bool work_size(int /* n_threads /, const struct ggml_tensor op, size_t & size) override {`
			`size = ggml_backend_amx_desired_wsize(op);`
			`return true;`
			`}`

			`bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) override {`
			`if (op->op == GGML_OP_MUL_MAT) {`
			`ggml_backend_amx_mul_mat(params, op);`
			`return true;`
			`}`
			`return false;`
			`}`
			`};`

			`static ggml::cpu::tensor_traits * get_tensor_traits(ggml_backend_buffer_t, struct ggml_tensor *) {`
			`static tensor_traits traits;`
			`return &traits;`
			`}`
			`} // namespace ggml::cpu::amx`

llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`// AMX buffer interface`
			`static void ggml_backend_amx_buffer_free_buffer(ggml_backend_buffer_t buffer) {`
			`free(buffer->context);`
			`}`

			`static void * ggml_backend_amx_buffer_get_base(ggml_backend_buffer_t buffer) {`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`return (void *) (buffer->context);`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`}`

llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`static void ggml_backend_amx_buffer_init_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {`
			`tensor->extra = (void *) ggml::cpu::amx::get_tensor_traits(buffer, tensor);`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00
			`GGML_UNUSED(buffer);`
			`}`

llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`static void ggml_backend_amx_buffer_memset_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,`
			`uint8_t value, size_t offset, size_t size) {`
			`memset((char *) tensor->data + offset, value, size);`

			`GGML_UNUSED(buffer);`
			`}`

			`static void ggml_backend_amx_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor,`
			`const void * data, size_t offset, size_t size) {`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`if (qtype_has_amx_kernels(tensor->type)) {`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`GGML_LOG_DEBUG("%s: amx repack tensor %s of type %s\n", __func__, tensor->name, ggml_type_name(tensor->type));`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`ggml_backend_amx_convert_weight(tensor, data, offset, size);`
			`} else {`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`memcpy((char *) tensor->data + offset, data, size);`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`}`

			`GGML_UNUSED(buffer);`
			`}`

llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`/*`
			`// need to figure what we need to do with buffer->extra.`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`static void ggml_backend_amx_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {`
			`GGML_ASSERT(!qtype_has_amx_kernels(tensor->type));`
			`memcpy(data, (const char *)tensor->data + offset, size);`

			`GGML_UNUSED(buffer);`
			`}`

			`static bool ggml_backend_amx_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {`
			`if (ggml_backend_buffer_is_host(src->buffer)) {`
			`if (qtype_has_amx_kernels(src->type)) {`
			`ggml_backend_amx_convert_weight(dst, src->data, 0, ggml_nbytes(dst));`
			`} else {`
			`memcpy(dst->data, src->data, ggml_nbytes(src));`
			`}`
			`return true;`
			`}`
			`return false;`

			`GGML_UNUSED(buffer);`
			`}`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`*/`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00
			`static void ggml_backend_amx_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {`
			`memset(buffer->context, value, buffer->size);`
			`}`

			`static ggml_backend_buffer_i ggml_backend_amx_buffer_interface = {`
			`/* .free_buffer = */ ggml_backend_amx_buffer_free_buffer,`
			`/* .get_base = */ ggml_backend_amx_buffer_get_base,`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`/* .init_tensor = */ ggml_backend_amx_buffer_init_tensor,`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`/* .memset_tensor = */ ggml_backend_amx_buffer_memset_tensor,`
			`/* .set_tensor = */ ggml_backend_amx_buffer_set_tensor,`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`/* .get_tensor = */ nullptr,`
			`/* .cpy_tensor = */ nullptr,`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`/* .clear = */ ggml_backend_amx_buffer_clear,`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`/* .reset = */ nullptr,`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`};`

			`static const char * ggml_backend_amx_buffer_type_get_name(ggml_backend_buffer_type_t buft) {`
			`return "AMX";`

			`GGML_UNUSED(buft);`
			`}`

			`static ggml_backend_buffer_t ggml_backend_amx_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`void * data = ggml_aligned_malloc(size);`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`if (data == NULL) {`
			`fprintf(stderr, "%s: failed to allocate buffer of size %zu\n", __func__, size);`
			`return NULL;`
			`}`

			`return ggml_backend_buffer_init(buft, ggml_backend_amx_buffer_interface, data, size);`
			`}`

			`static size_t ggml_backend_amx_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) {`
			`return TENSOR_ALIGNMENT;`

			`GGML_UNUSED(buft);`
			`}`

llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`namespace ggml::cpu::amx {`
			`class extra_buffer_type : ggml::cpu::extra_buffer_type {`
			`bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {`
			`// handle only 2d gemm for now`
			`auto is_contiguous_2d = [](const struct ggml_tensor * t) {`
			`return ggml_is_contiguous(t) && t->ne[3] == 1 && t->ne[2] == 1;`
			`};`

			`if (op->op == GGML_OP_MUL_MAT && is_contiguous_2d(op->src[0]) && // src0 must be contiguous`
			`is_contiguous_2d(op->src[1]) && // src1 must be contiguous`
			`op->src[0]->buffer && op->src[0]->buffer->buft == ggml_backend_amx_buffer_type() &&`
			`op->ne[0] % (TILE_N * 2) == 0 && // out_features is 32x`
			`(qtype_has_amx_kernels(op->src[0]->type) \|\| (op->src[0]->type == GGML_TYPE_F16))) {`
			`// src1 must be host buffer`
			`if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {`
			`return false;`
			`}`
			`// src1 must be float32`
			`if (op->src[1]->type == GGML_TYPE_F32) {`
			`return true;`
			`}`
			`}`
			`return false;`
			`}`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`ggml::cpu::tensor_traits * get_tensor_traits(const struct ggml_tensor * op) override {`
			`if (op->op == GGML_OP_MUL_MAT && op->src[0]->buffer &&`
			`op->src[0]->buffer->buft == ggml_backend_amx_buffer_type()) {`
			`return (ggml::cpu::tensor_traits *) op->src[0]->extra;`
			`}`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`return nullptr;`
			`}`
			`};`
			`} // namespace ggml::cpu::amx`

			`static size_t ggml_backend_amx_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {`
			`return ggml_backend_amx_get_alloc_size(tensor);`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00
			`GGML_UNUSED(buft);`
			`}`

			`#define ARCH_GET_XCOMP_PERM 0x1022`
			`#define ARCH_REQ_XCOMP_PERM 0x1023`
			`#define XFEATURE_XTILECFG 17`
			`#define XFEATURE_XTILEDATA 18`

			`static bool ggml_amx_init() {`
			`#if defined(__gnu_linux__)`
			`if (syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA)) {`
			`fprintf(stderr, "AMX is not ready to be used!\n");`
			`return false;`
			`}`
			`return true;`
			`#elif defined(_WIN32)`
			`return true;`
			`#endif`
			`}`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`ggml_backend_buffer_type_t ggml_backend_amx_buffer_type() {`
			`static struct ggml_backend_buffer_type ggml_backend_buffer_type_amx = {`
			`/* .iface = */ {`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`/* .get_name = */ ggml_backend_amx_buffer_type_get_name,`
			`/* .alloc_buffer = */ ggml_backend_amx_buffer_type_alloc_buffer,`
			`/* .get_alignment = */ ggml_backend_amx_buffer_type_get_alignment,`
			`/* .get_max_size = */ nullptr, // defaults to SIZE_MAX`
			`/* .get_alloc_size = */ ggml_backend_amx_buffer_type_get_alloc_size,`
			`/* .is_host = */ nullptr,`
			`},`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`/* .context = */ new ggml::cpu::amx::extra_buffer_type(),`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`};`

			`if (!ggml_amx_init()) {`
llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`return nullptr;`
llama: update vendored code to commit 40c6d79f (#7875) 2024-12-10 19:21:34 -08:00			`}`

			`return &ggml_backend_buffer_type_amx;`
			`}`

llama: update vendor code to commit ba1cb19c (#8101) 2024-12-14 14:55:51 -08:00			`#endif // defined(__AMX_INT8__) && defined(__AVX512VNNI__)`