mirror of
https://github.com/ollama/ollama.git
synced 2025-04-07 03:18:24 +02:00
ollama debug tensor
This commit is contained in:
parent
6b0486c216
commit
9e4642e9b3
33
llama/patches/0020-ollama-debug-tensor.patch
Normal file
33
llama/patches/0020-ollama-debug-tensor.patch
Normal file
@ -0,0 +1,33 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <mxyng@pm.me>
|
||||
Date: Sun, 9 Mar 2025 14:44:16 -0700
|
||||
Subject: [PATCH] ollama debug tensor
|
||||
|
||||
---
|
||||
ggml/src/ggml-cpu/ggml-cpu.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
|
||||
index 2f606d82..ec60e8fc 100644
|
||||
--- a/ggml/src/ggml-cpu/ggml-cpu.c
|
||||
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
|
||||
@@ -11,6 +11,8 @@
|
||||
#include "ggml-threading.h"
|
||||
#include "ggml.h"
|
||||
|
||||
+#include "ollama-debug.h"
|
||||
+
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#include <malloc.h> // using malloc.h with MSC/MINGW
|
||||
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
|
||||
@@ -14103,6 +14105,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||
|
||||
ggml_compute_forward(¶ms, node);
|
||||
|
||||
+#ifdef OLLAMA_DEBUG
|
||||
+ ollama_debug(node, true);
|
||||
+#endif
|
||||
+
|
||||
if (state->ith == 0 && cplan->abort_callback &&
|
||||
cplan->abort_callback(cplan->abort_callback_data)) {
|
||||
atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
|
@ -355,7 +355,7 @@ func New(r *os.File, params ml.BackendParams) (ml.Backend, error) {
|
||||
|
||||
if C.ggml_backend_is_cpu(b) {
|
||||
// set number of threads for cpu backend
|
||||
C.ggml_backend_cpu_set_n_threads(b, C.int(params.NumThreads))
|
||||
C.ggml_backend_cpu_set_n_threads(b, C.int(Threads(params.NumThreads)))
|
||||
}
|
||||
}
|
||||
|
||||
|
11
ml/backend/ggml/ggml/include/ollama-debug.h
vendored
Normal file
11
ml/backend/ggml/ggml/include/ollama-debug.h
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
#include "ggml.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void ollama_debug(const struct ggml_tensor *tensor, bool verbose);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
6
ml/backend/ggml/ggml/src/ggml-cpu/cpu_debug.go
Normal file
6
ml/backend/ggml/ggml/src/ggml-cpu/cpu_debug.go
Normal file
@ -0,0 +1,6 @@
|
||||
//go:build debug
|
||||
|
||||
package cpu
|
||||
|
||||
// #cgo CPPFLAGS: -DOLLAMA_DEBUG
|
||||
import "C"
|
6
ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu.c
vendored
6
ml/backend/ggml/ggml/src/ggml-cpu/ggml-cpu.c
vendored
@ -11,6 +11,8 @@
|
||||
#include "ggml-threading.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#include "ollama-debug.h"
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#include <malloc.h> // using malloc.h with MSC/MINGW
|
||||
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
|
||||
@ -14103,6 +14105,10 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||
|
||||
ggml_compute_forward(¶ms, node);
|
||||
|
||||
#ifdef OLLAMA_DEBUG
|
||||
ollama_debug(node, true);
|
||||
#endif
|
||||
|
||||
if (state->ith == 0 && cplan->abort_callback &&
|
||||
cplan->abort_callback(cplan->abort_callback_data)) {
|
||||
atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
|
||||
|
115
ml/backend/ggml/ggml/src/ollama-debug.c
vendored
Normal file
115
ml/backend/ggml/ggml/src/ollama-debug.c
vendored
Normal file
@ -0,0 +1,115 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "ollama-debug.h"
|
||||
|
||||
static int mul(int64_t *dims, int ndims) {
|
||||
int result = 1;
|
||||
for (int i = 0; i < ndims; i++) {
|
||||
result *= dims[i];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void repeat(char c, int n) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
fprintf(stderr, "%c", c);
|
||||
}
|
||||
}
|
||||
|
||||
static void print_tensor(const void *tensor, void (*cb)(const void *, int),
|
||||
int shape,
|
||||
int64_t *dims, int ndims, int stride,
|
||||
int nitems, int pad) {
|
||||
fprintf(stderr, "[");
|
||||
for (int i = 0; i < dims[0]; i++) {
|
||||
if (i >= nitems && i < dims[0] - nitems) {
|
||||
fprintf(stderr, "... (%lld more), ", dims[0] - 2 * nitems);
|
||||
int skip = dims[0] - 2 * nitems;
|
||||
if (ndims > 1) {
|
||||
stride += mul(dims + 1, ndims - 1) * skip;
|
||||
repeat('\n', ndims - 1);
|
||||
repeat(' ', shape - ndims + 1 + pad);
|
||||
}
|
||||
i += skip - 1;
|
||||
} else if (ndims > 1) {
|
||||
print_tensor(tensor, cb, shape, dims + 1, ndims - 1, stride,
|
||||
nitems, pad);
|
||||
stride += mul(dims + 1, ndims - 1);
|
||||
if (i < dims[0] - 1) {
|
||||
fprintf(stderr, ", ");
|
||||
repeat('\n', ndims - 1);
|
||||
repeat(' ', shape - ndims + 1 + pad);
|
||||
}
|
||||
} else {
|
||||
cb(tensor, stride + i);
|
||||
if (i < dims[0] - 1) {
|
||||
fprintf(stderr, ", ");
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "]");
|
||||
}
|
||||
|
||||
static void print_tensor_f16(const void *tensor, int i) {
|
||||
float value = ggml_fp16_to_fp32(((const ggml_fp16_t *)tensor)[i]);
|
||||
fprintf(stderr, "%s%f", value < 0 ? "" : " ", value);
|
||||
}
|
||||
|
||||
static void print_tensor_f32(const void *tensor, int i) {
|
||||
float value = ((const float *)tensor)[i];
|
||||
fprintf(stderr, "%s%f", value < 0 ? "" : " ", value);
|
||||
}
|
||||
|
||||
static void print_tensor_i32(const void *tensor, int i) {
|
||||
int32_t value = ((const int32_t *)tensor)[i];
|
||||
fprintf(stderr, "%s%d", value < 0 ? "" : " ", value);
|
||||
}
|
||||
|
||||
static void ollama_debug_tensor(const struct ggml_tensor *tensor, bool verbose, const char *prefix, int indent) {
|
||||
fprintf(stderr, "%s%s %s (%s): [%lld %lld %lld %lld]\n", prefix, tensor->name,
|
||||
ggml_op_name(tensor->op), ggml_type_name(tensor->type), tensor->ne[0],
|
||||
tensor->ne[1], tensor->ne[2], tensor->ne[3]);
|
||||
|
||||
if (!verbose) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < indent; i++) {
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
|
||||
switch (tensor->type) {
|
||||
case GGML_TYPE_F16:
|
||||
print_tensor(ggml_get_data(tensor), print_tensor_f16, ggml_n_dims(tensor),
|
||||
(int64_t *)tensor->ne, ggml_n_dims(tensor), 0, 3, indent);
|
||||
break;
|
||||
case GGML_TYPE_F32:
|
||||
print_tensor(ggml_get_data(tensor), print_tensor_f32, ggml_n_dims(tensor),
|
||||
(int64_t *)tensor->ne, ggml_n_dims(tensor), 0, 3, indent);
|
||||
break;
|
||||
case GGML_TYPE_I32:
|
||||
print_tensor(ggml_get_data(tensor), print_tensor_i32, ggml_n_dims(tensor),
|
||||
(int64_t *)tensor->ne, ggml_n_dims(tensor), 0, 3, indent);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "<unsupported type>\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
void ollama_debug(const struct ggml_tensor *tensor, bool verbose) {
|
||||
ollama_debug_tensor(tensor, verbose, ">>> ", 4);
|
||||
|
||||
for (int i = 0; i < GGML_MAX_SRC && tensor->src[i] != NULL; ++i) {
|
||||
char src[8];
|
||||
const int n = snprintf(src, sizeof(src), " src%d ", i);
|
||||
if (n >= sizeof(src)) {
|
||||
src[sizeof(src) - 1] = '\0';
|
||||
}
|
||||
|
||||
ollama_debug_tensor(tensor->src[i], verbose, src, 4);
|
||||
}
|
||||
}
|
7
ml/backend/ggml/threads.go
Normal file
7
ml/backend/ggml/threads.go
Normal file
@ -0,0 +1,7 @@
|
||||
//go:build !debug
|
||||
|
||||
package ggml
|
||||
|
||||
func Threads(n int) int {
|
||||
return n
|
||||
}
|
7
ml/backend/ggml/threads_debug.go
Normal file
7
ml/backend/ggml/threads_debug.go
Normal file
@ -0,0 +1,7 @@
|
||||
//go:build debug
|
||||
|
||||
package ggml
|
||||
|
||||
func Threads(_ int) int {
|
||||
return 1
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user