mirror of
https://github.com/ollama/ollama.git
synced 2025-09-30 19:43:30 +02:00
Adapted rocm support to cgo based llama.cpp
This commit is contained in:
119
gpu/gpu.go
Normal file
119
gpu/gpu.go
Normal file
@@ -0,0 +1,119 @@
|
||||
//go:build linux || windows
|
||||
|
||||
package gpu
|
||||
|
||||
/*
|
||||
#include "gpu_info.h"
|
||||
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
)
|
||||
|
||||
type handles struct {
|
||||
cuda *C.cuda_handle_t
|
||||
rocm *C.rocm_handle_t
|
||||
}
|
||||
|
||||
var gpuMutex sync.Mutex
|
||||
var gpuHandles *handles = nil
|
||||
|
||||
// Note: gpuMutex must already be held
|
||||
func initGPUHandles() {
|
||||
log.Printf("Detecting GPU type")
|
||||
gpuHandles = &handles{nil, nil}
|
||||
var resp C.cuda_init_resp_t
|
||||
C.cuda_init(&resp)
|
||||
if resp.err != nil {
|
||||
log.Printf("CUDA not detected: %s", C.GoString(resp.err))
|
||||
C.free(unsafe.Pointer(resp.err))
|
||||
|
||||
var resp C.rocm_init_resp_t
|
||||
C.rocm_init(&resp)
|
||||
if resp.err != nil {
|
||||
log.Printf("ROCm not detected: %s", C.GoString(resp.err))
|
||||
C.free(unsafe.Pointer(resp.err))
|
||||
} else {
|
||||
log.Printf("Radeon GPU detected")
|
||||
rocm := resp.rh
|
||||
gpuHandles.rocm = &rocm
|
||||
}
|
||||
} else {
|
||||
log.Printf("Nvidia GPU detected")
|
||||
cuda := resp.ch
|
||||
gpuHandles.cuda = &cuda
|
||||
}
|
||||
}
|
||||
|
||||
func GetGPUInfo() GpuInfo {
|
||||
// TODO - consider exploring lspci (and equivalent on windows) to check for
|
||||
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
|
||||
gpuMutex.Lock()
|
||||
defer gpuMutex.Unlock()
|
||||
if gpuHandles == nil {
|
||||
initGPUHandles()
|
||||
}
|
||||
|
||||
var memInfo C.mem_info_t
|
||||
var resp GpuInfo
|
||||
if gpuHandles.cuda != nil {
|
||||
C.cuda_check_vram(*gpuHandles.cuda, &memInfo)
|
||||
resp.Driver = "CUDA"
|
||||
} else if gpuHandles.rocm != nil {
|
||||
C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
|
||||
resp.Driver = "ROCM"
|
||||
} else {
|
||||
C.cpu_check_ram(&memInfo)
|
||||
resp.Driver = "CPU"
|
||||
}
|
||||
if memInfo.err != nil {
|
||||
log.Printf("error looking up GPU memory: %s", C.GoString(memInfo.err))
|
||||
C.free(unsafe.Pointer(memInfo.err))
|
||||
}
|
||||
resp.FreeMemory = uint64(memInfo.free)
|
||||
resp.TotalMemory = uint64(memInfo.total)
|
||||
return resp
|
||||
}
|
||||
|
||||
func CheckVRAM() (int64, error) {
|
||||
gpuInfo := GetGPUInfo()
|
||||
if gpuInfo.FreeMemory > 0 && gpuInfo.Driver != "CPU" {
|
||||
return int64(gpuInfo.FreeMemory), nil
|
||||
}
|
||||
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
|
||||
}
|
||||
|
||||
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
|
||||
if opts.NumGPU != -1 {
|
||||
return opts.NumGPU
|
||||
}
|
||||
info := GetGPUInfo()
|
||||
if info.Driver == "CPU" {
|
||||
return 0
|
||||
}
|
||||
|
||||
/*
|
||||
Calculate bytes per layer, this will roughly be the size of the model file divided by the number of layers.
|
||||
We can store the model weights and the kv cache in vram,
|
||||
to enable kv chache vram storage add two additional layers to the number of layers retrieved from the model file.
|
||||
*/
|
||||
bytesPerLayer := uint64(fileSizeBytes / numLayer)
|
||||
|
||||
// 75% of the absolute max number of layers we can fit in available VRAM, off-loading too many layers to the GPU can cause OOM errors
|
||||
layers := int(info.FreeMemory/bytesPerLayer) * 3 / 4
|
||||
|
||||
// TODO - not sure on this part... if we can't fit all the layers, just fallback to CPU
|
||||
// if int64(layers) < numLayer {
|
||||
// log.Printf("%d MB VRAM available, insufficient to load current model (reuires %d MB) - falling back to CPU %d", freeBytes/(1024*1024), fileSizeBytes/(1024*1024))
|
||||
// return 0
|
||||
// }
|
||||
log.Printf("%d MB VRAM available, loading up to %d GPU layers out of %d", info.FreeMemory/(1024*1024), layers, numLayer)
|
||||
|
||||
return layers
|
||||
}
|
34
gpu/gpu_darwin.go
Normal file
34
gpu/gpu_darwin.go
Normal file
@@ -0,0 +1,34 @@
|
||||
//go:build darwin
|
||||
|
||||
package gpu
|
||||
|
||||
import "C"
|
||||
import (
|
||||
"github.com/jmorganca/ollama/api"
|
||||
)
|
||||
|
||||
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
|
||||
func CheckVRAM() (int64, error) {
|
||||
// TODO - assume metal, and return free memory?
|
||||
return 0, nil
|
||||
|
||||
}
|
||||
|
||||
func GetGPUInfo() GpuInfo {
|
||||
// TODO - Metal vs. x86 macs...
|
||||
|
||||
return GpuInfo{
|
||||
Driver: "METAL",
|
||||
TotalMemory: 0,
|
||||
FreeMemory: 0,
|
||||
}
|
||||
}
|
||||
|
||||
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
|
||||
// default to enable metal on macOS
|
||||
return 1
|
||||
}
|
||||
|
||||
func nativeInit() error {
|
||||
return nil
|
||||
}
|
49
gpu/gpu_info.h
Normal file
49
gpu/gpu_info.h
Normal file
@@ -0,0 +1,49 @@
|
||||
#ifndef __APPLE__
|
||||
#ifndef __GPU_INFO_H__
|
||||
#define __GPU_INFO_H__
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <dlfcn.h>
|
||||
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
|
||||
#define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
|
||||
#define LOAD_ERR() dlerror()
|
||||
#define UNLOAD_LIBRARY(handle) dlclose(handle)
|
||||
#else
|
||||
#include <windows.h>
|
||||
#define LOAD_LIBRARY(lib, flags) LoadLibrary(lib)
|
||||
#define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym)
|
||||
#define UNLOAD_LIBRARY(handle) FreeLibrary(handle)
|
||||
|
||||
// TODO - refactor this with proper error message handling on windows
|
||||
inline static char *LOAD_ERR() {
|
||||
static char errbuf[8];
|
||||
snprintf(errbuf, 8, "0x%lx", GetLastError());
|
||||
return errbuf;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct mem_info {
|
||||
uint64_t total;
|
||||
uint64_t free;
|
||||
char *err; // If non-nill, caller responsible for freeing
|
||||
} mem_info_t;
|
||||
|
||||
void cpu_check_ram(mem_info_t *resp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#include "gpu_info_cuda.h"
|
||||
#include "gpu_info_rocm.h"
|
||||
|
||||
#endif // __GPU_INFO_H__
|
||||
#endif // __APPLE__
|
42
gpu/gpu_info_cpu.c
Normal file
42
gpu/gpu_info_cpu.c
Normal file
@@ -0,0 +1,42 @@
|
||||
#include "gpu_info.h"
|
||||
// Fallbacks for CPU mode
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <sysinfoapi.h>
|
||||
void cpu_check_ram(mem_info_t *resp) {
|
||||
resp->err = NULL;
|
||||
MEMORYSTATUSEX info;
|
||||
if (GlobalMemoryStatusEx(&info) != 0) {
|
||||
resp->total = info.ullTotalPhys;
|
||||
resp->free = info.ullAvailPhys;
|
||||
} else {
|
||||
resp->err = strdup(LOAD_ERR());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#elif __linux__
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sys/sysinfo.h>
|
||||
void cpu_check_ram(mem_info_t *resp) {
|
||||
struct sysinfo info;
|
||||
resp->err = NULL;
|
||||
if (sysinfo(&info) != 0) {
|
||||
resp->err = strdup(strerror(errno));
|
||||
} else {
|
||||
resp->total = info.totalram * info.mem_unit;
|
||||
resp->free = info.freeram * info.mem_unit;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#elif __APPLE__
|
||||
// TODO consider an Apple implementation that does something useful
|
||||
// mem_info_t cpu_check_ram() {
|
||||
// mem_info_t resp = {0, 0, NULL};
|
||||
// return resp;
|
||||
// }
|
||||
#else
|
||||
#error "Unsupported platform"
|
||||
#endif
|
110
gpu/gpu_info_cuda.c
Normal file
110
gpu/gpu_info_cuda.c
Normal file
@@ -0,0 +1,110 @@
|
||||
#ifndef __APPLE__ // TODO - maybe consider nvidia support on intel macs?
|
||||
|
||||
#include "gpu_info_cuda.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifndef _WIN32
|
||||
const char *cuda_lib_paths[] = {
|
||||
"libnvidia-ml.so",
|
||||
"/usr/local/cuda/lib64/libnvidia-ml.so",
|
||||
NULL,
|
||||
};
|
||||
#else
|
||||
const char *cuda_lib_paths[] = {
|
||||
"nvml.dll",
|
||||
"",
|
||||
NULL,
|
||||
};
|
||||
#endif
|
||||
|
||||
void cuda_init(cuda_init_resp_t *resp) {
|
||||
resp->err = NULL;
|
||||
const int buflen = 256;
|
||||
char buf[buflen + 1];
|
||||
int i;
|
||||
|
||||
struct lookup {
|
||||
char *s;
|
||||
void **p;
|
||||
} l[4] = {
|
||||
{"nvmlInit_v2", (void *)&resp->ch.initFn},
|
||||
{"nvmlShutdown", (void *)&resp->ch.shutdownFn},
|
||||
{"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.getHandle},
|
||||
{"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.getMemInfo},
|
||||
};
|
||||
|
||||
for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
|
||||
resp->ch.handle = LOAD_LIBRARY(cuda_lib_paths[i], RTLD_LAZY);
|
||||
}
|
||||
if (!resp->ch.handle) {
|
||||
snprintf(buf, buflen,
|
||||
"Unable to load %s library to query for Nvidia GPUs: %s",
|
||||
cuda_lib_paths[0], LOAD_ERR());
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) { // TODO - fix this to use a null terminated list
|
||||
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
|
||||
if (!l[i].p) {
|
||||
UNLOAD_LIBRARY(resp->ch.handle);
|
||||
resp->ch.handle = NULL;
|
||||
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
|
||||
LOAD_ERR());
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
|
||||
resp->err = NULL;
|
||||
nvmlDevice_t device;
|
||||
nvmlMemory_t memInfo = {0};
|
||||
nvmlReturn_t ret;
|
||||
const int buflen = 256;
|
||||
char buf[buflen + 1];
|
||||
int i;
|
||||
|
||||
if (h.handle == NULL) {
|
||||
resp->err = strdup("nvml handle sn't initialized");
|
||||
return;
|
||||
}
|
||||
|
||||
ret = (*h.initFn)();
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "nvml vram init failure: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO - handle multiple GPUs
|
||||
ret = (*h.getHandle)(0, &device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
(*h.shutdownFn)();
|
||||
snprintf(buf, buflen, "unable to get device handle: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = (*h.getMemInfo)(device, &memInfo);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
(*h.shutdownFn)();
|
||||
snprintf(buf, buflen, "device memory info lookup failure: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
resp->total = memInfo.total;
|
||||
resp->free = memInfo.free;
|
||||
|
||||
ret = (*h.shutdownFn)();
|
||||
if (ret != NVML_SUCCESS) {
|
||||
snprintf(buf, buflen, "nvml vram shutdown failure: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
#endif // __APPLE__
|
35
gpu/gpu_info_cuda.h
Normal file
35
gpu/gpu_info_cuda.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#ifndef __APPLE__
|
||||
#ifndef __GPU_INFO_CUDA_H__
|
||||
#define __GPU_INFO_CUDA_H__
|
||||
#include "gpu_info.h"
|
||||
|
||||
// Just enough typedef's to dlopen/dlsym for memory information
|
||||
typedef enum nvmlReturn_enum {
|
||||
NVML_SUCCESS = 0,
|
||||
// Other values omitted for now...
|
||||
} nvmlReturn_t;
|
||||
typedef void *nvmlDevice_t; // Opaque is sufficient
|
||||
typedef struct nvmlMemory_st {
|
||||
unsigned long long total;
|
||||
unsigned long long free;
|
||||
unsigned long long used;
|
||||
} nvmlMemory_t;
|
||||
|
||||
typedef struct cuda_handle {
|
||||
void *handle;
|
||||
nvmlReturn_t (*initFn)(void);
|
||||
nvmlReturn_t (*shutdownFn)(void);
|
||||
nvmlReturn_t (*getHandle)(unsigned int, nvmlDevice_t *);
|
||||
nvmlReturn_t (*getMemInfo)(nvmlDevice_t, nvmlMemory_t *);
|
||||
} cuda_handle_t;
|
||||
|
||||
typedef struct cuda_init_resp {
|
||||
char *err; // If err is non-null handle is invalid
|
||||
cuda_handle_t ch;
|
||||
} cuda_init_resp_t;
|
||||
|
||||
void cuda_init(cuda_init_resp_t *resp);
|
||||
void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp);
|
||||
|
||||
#endif // __GPU_INFO_CUDA_H__
|
||||
#endif // __APPLE__
|
111
gpu/gpu_info_rocm.c
Normal file
111
gpu/gpu_info_rocm.c
Normal file
@@ -0,0 +1,111 @@
|
||||
#ifndef __APPLE__
|
||||
|
||||
#include "gpu_info_rocm.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifndef _WIN32
|
||||
const char *rocm_lib_paths[] = {
|
||||
"librocm_smi64.so",
|
||||
"/opt/rocm/lib/librocm_smi64.so",
|
||||
NULL,
|
||||
};
|
||||
#else
|
||||
// TODO untested
|
||||
const char *rocm_lib_paths[] = {
|
||||
"rocm_smi64.dll",
|
||||
"/opt/rocm/lib/rocm_smi64.dll",
|
||||
NULL,
|
||||
};
|
||||
#endif
|
||||
|
||||
void rocm_init(rocm_init_resp_t *resp) {
|
||||
resp->err = NULL;
|
||||
const int buflen = 256;
|
||||
char buf[buflen + 1];
|
||||
int i;
|
||||
struct lookup {
|
||||
char *s;
|
||||
void **p;
|
||||
} l[4] = {
|
||||
{"rsmi_init", (void *)&resp->rh.initFn},
|
||||
{"rsmi_shut_down", (void *)&resp->rh.shutdownFn},
|
||||
{"rsmi_dev_memory_total_get", (void *)&resp->rh.totalMemFn},
|
||||
{"rsmi_dev_memory_usage_get", (void *)&resp->rh.usageMemFn},
|
||||
// { "rsmi_dev_id_get", (void*)&resp->rh.getHandle },
|
||||
};
|
||||
|
||||
for (i = 0; rocm_lib_paths[i] != NULL && resp->rh.handle == NULL; i++) {
|
||||
resp->rh.handle = LOAD_LIBRARY(rocm_lib_paths[i], RTLD_LAZY);
|
||||
}
|
||||
if (!resp->rh.handle) {
|
||||
snprintf(buf, buflen,
|
||||
"Unable to load %s library to query for Radeon GPUs: %s\n",
|
||||
rocm_lib_paths[0], LOAD_ERR());
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
*l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s);
|
||||
if (!l[i].p) {
|
||||
UNLOAD_LIBRARY(resp->rh.handle);
|
||||
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
|
||||
LOAD_ERR());
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
|
||||
resp->err = NULL;
|
||||
// uint32_t num_devices;
|
||||
// uint16_t device;
|
||||
uint64_t totalMem = 0;
|
||||
uint64_t usedMem = 0;
|
||||
rsmi_status_t ret;
|
||||
const int buflen = 256;
|
||||
char buf[buflen + 1];
|
||||
int i;
|
||||
|
||||
ret = (*h.initFn)(0);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
snprintf(buf, buflen, "rocm vram init failure: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO - iterate through devices... ret =
|
||||
// rsmi_num_monitor_devices(&num_devices);
|
||||
|
||||
// ret = (*h.getHandle)(0, &device);
|
||||
// if (ret != RSMI_STATUS_SUCCESS) {
|
||||
// printf("rocm vram device lookup failure: %d\n", ret);
|
||||
// return -1;
|
||||
// }
|
||||
|
||||
// Get total memory - used memory for available memory
|
||||
ret = (*h.totalMemFn)(0, RSMI_MEM_TYPE_VRAM, &totalMem);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
(*h.shutdownFn)();
|
||||
snprintf(buf, buflen, "rocm total mem lookup failure: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
ret = (*h.usageMemFn)(0, RSMI_MEM_TYPE_VRAM, &usedMem);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
(*h.shutdownFn)();
|
||||
snprintf(buf, buflen, "rocm usage mem lookup failure: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
(*h.shutdownFn)();
|
||||
resp->total = totalMem;
|
||||
resp->free = totalMem - usedMem;
|
||||
return;
|
||||
}
|
||||
|
||||
#endif // __APPLE__
|
36
gpu/gpu_info_rocm.h
Normal file
36
gpu/gpu_info_rocm.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef __APPLE__
|
||||
#ifndef __GPU_INFO_ROCM_H__
|
||||
#define __GPU_INFO_ROCM_H__
|
||||
#include "gpu_info.h"
|
||||
|
||||
// Just enough typedef's to dlopen/dlsym for memory information
|
||||
typedef enum rsmi_status_return {
|
||||
RSMI_STATUS_SUCCESS = 0,
|
||||
// Other values omitted for now...
|
||||
} rsmi_status_t;
|
||||
|
||||
typedef enum rsmi_memory_type {
|
||||
RSMI_MEM_TYPE_VRAM = 0,
|
||||
RSMI_MEM_TYPE_VIS_VRAM,
|
||||
RSMI_MEM_TYPE_GTT,
|
||||
} rsmi_memory_type_t;
|
||||
|
||||
typedef struct rocm_handle {
|
||||
void *handle;
|
||||
rsmi_status_t (*initFn)(uint64_t);
|
||||
rsmi_status_t (*shutdownFn)(void);
|
||||
rsmi_status_t (*totalMemFn)(uint32_t, rsmi_memory_type_t, uint64_t *);
|
||||
rsmi_status_t (*usageMemFn)(uint32_t, rsmi_memory_type_t, uint64_t *);
|
||||
// rsmi_status_t (*getHandle)(uint32_t, uint16_t *);
|
||||
} rocm_handle_t;
|
||||
|
||||
typedef struct rocm_init_resp {
|
||||
char *err; // If err is non-null handle is invalid
|
||||
rocm_handle_t rh;
|
||||
} rocm_init_resp_t;
|
||||
|
||||
void rocm_init(rocm_init_resp_t *resp);
|
||||
void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp);
|
||||
|
||||
#endif // __GPU_INFO_ROCM_H__
|
||||
#endif // __APPLE__
|
26
gpu/gpu_test.go
Normal file
26
gpu/gpu_test.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package gpu
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestBasicGetGPUInfo(t *testing.T) {
|
||||
info := GetGPUInfo()
|
||||
assert.Contains(t, "CUDA ROCM CPU METAL", info.Driver)
|
||||
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
// TODO - remove this once MacOS returns some size for CPU
|
||||
return
|
||||
case "linux", "windows":
|
||||
assert.Greater(t, info.TotalMemory, uint64(0))
|
||||
assert.Greater(t, info.FreeMemory, uint64(0))
|
||||
default:
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected
|
10
gpu/types.go
Normal file
10
gpu/types.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package gpu
|
||||
|
||||
// Beginning of an `ollama info` command
|
||||
type GpuInfo struct {
|
||||
Driver string `json:"driver,omitempty"`
|
||||
TotalMemory uint64 `json:"total_memory,omitempty"`
|
||||
FreeMemory uint64 `json:"free_memory,omitempty"`
|
||||
|
||||
// TODO add other useful attributes about the card here for discovery information
|
||||
}
|
Reference in New Issue
Block a user