mirror of
https://github.com/ollama/ollama.git
synced 2025-07-12 18:12:44 +02:00
Revamp the dynamic library shim
This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped.
This commit is contained in:
@ -5,7 +5,7 @@ package llm
|
||||
/*
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "rocm_shim.h"
|
||||
#include "dynamic_shim.h"
|
||||
|
||||
*/
|
||||
import "C"
|
||||
@ -18,20 +18,20 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
)
|
||||
|
||||
//go:embed llama.cpp/gguf/build/*/lib/*
|
||||
//go:embed llama.cpp/gguf/build/lib/*
|
||||
var libEmbed embed.FS
|
||||
|
||||
var RocmShimMissing = fmt.Errorf("ROCm shim library not included in this build of ollama. Radeon GPUs are not supported")
|
||||
|
||||
type shimExtServer struct {
|
||||
s C.struct_rocm_llama_server
|
||||
s C.struct_dynamic_llama_server
|
||||
options api.Options
|
||||
}
|
||||
|
||||
@ -40,50 +40,58 @@ var shimMutex sync.Mutex
|
||||
var llm *shimExtServer
|
||||
|
||||
func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
|
||||
C.rocm_shim_llama_server_init(llm.s, sparams, err)
|
||||
C.dynamic_shim_llama_server_init(llm.s, sparams, err)
|
||||
}
|
||||
func (llm *shimExtServer) llama_server_start() {
|
||||
C.rocm_shim_llama_server_start(llm.s)
|
||||
C.dynamic_shim_llama_server_start(llm.s)
|
||||
}
|
||||
func (llm *shimExtServer) llama_server_stop() {
|
||||
C.rocm_shim_llama_server_stop(llm.s)
|
||||
C.dynamic_shim_llama_server_stop(llm.s)
|
||||
}
|
||||
|
||||
func (llm *shimExtServer) llama_server_completion(json_req *C.char, resp *C.ext_server_resp_t) {
|
||||
C.rocm_shim_llama_server_completion(llm.s, json_req, resp)
|
||||
C.dynamic_shim_llama_server_completion(llm.s, json_req, resp)
|
||||
}
|
||||
func (llm *shimExtServer) llama_server_completion_next_result(task_id C.int, resp *C.ext_server_task_result_t) {
|
||||
C.rocm_shim_llama_server_completion_next_result(llm.s, task_id, resp)
|
||||
C.dynamic_shim_llama_server_completion_next_result(llm.s, task_id, resp)
|
||||
}
|
||||
func (llm *shimExtServer) llama_server_completion_cancel(task_id C.int, err *C.ext_server_resp_t) {
|
||||
C.rocm_shim_llama_server_completion_cancel(llm.s, task_id, err)
|
||||
C.dynamic_shim_llama_server_completion_cancel(llm.s, task_id, err)
|
||||
}
|
||||
func (llm *shimExtServer) llama_server_release_task_result(result *C.ext_server_task_result_t) {
|
||||
C.rocm_shim_llama_server_release_task_result(llm.s, result)
|
||||
C.dynamic_shim_llama_server_release_task_result(llm.s, result)
|
||||
}
|
||||
|
||||
func (llm *shimExtServer) llama_server_tokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
|
||||
C.rocm_shim_llama_server_tokenize(llm.s, json_req, json_resp, err)
|
||||
C.dynamic_shim_llama_server_tokenize(llm.s, json_req, json_resp, err)
|
||||
}
|
||||
func (llm *shimExtServer) llama_server_detokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
|
||||
C.rocm_shim_llama_server_detokenize(llm.s, json_req, json_resp, err)
|
||||
C.dynamic_shim_llama_server_detokenize(llm.s, json_req, json_resp, err)
|
||||
}
|
||||
func (llm *shimExtServer) llama_server_embedding(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
|
||||
C.rocm_shim_llama_server_embedding(llm.s, json_req, json_resp, err)
|
||||
C.dynamic_shim_llama_server_embedding(llm.s, json_req, json_resp, err)
|
||||
}
|
||||
func (llm *shimExtServer) llama_server_release_json_resp(json_resp **C.char) {
|
||||
C.rocm_shim_llama_server_release_json_resp(llm.s, json_resp)
|
||||
C.dynamic_shim_llama_server_release_json_resp(llm.s, json_resp)
|
||||
}
|
||||
|
||||
func newRocmShimExtServer(model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
||||
if !ShimPresent {
|
||||
return nil, RocmShimMissing
|
||||
func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
||||
shimMutex.Lock()
|
||||
defer shimMutex.Unlock()
|
||||
libPath := C.CString(library)
|
||||
defer C.free(unsafe.Pointer(libPath))
|
||||
resp := newExtServerResp(128)
|
||||
defer freeExtServerResp(resp)
|
||||
var srv C.struct_dynamic_llama_server
|
||||
C.dynamic_shim_init(libPath, &srv, &resp)
|
||||
if resp.id < 0 {
|
||||
return nil, fmt.Errorf("Unable to load dynamic library: %s", C.GoString(resp.msg))
|
||||
}
|
||||
log.Printf("Loading ROCM llm server")
|
||||
if llm == nil {
|
||||
return nil, fmt.Errorf("nativeInit wasnt called or libary load failed")
|
||||
llm = &shimExtServer{
|
||||
s: srv,
|
||||
options: opts,
|
||||
}
|
||||
llm.options = opts
|
||||
log.Printf("Loading Dynamic Shim llm server: %s", library)
|
||||
return newExtServer(llm, model, adapters, projectors, numLayers, opts)
|
||||
}
|
||||
|
||||
@ -108,64 +116,37 @@ func (llm *shimExtServer) Close() {
|
||||
}
|
||||
|
||||
func nativeInit(workdir string) error {
|
||||
err := extractLib(workdir, "llama.cpp/gguf/build/*/lib/*rocm_server*")
|
||||
libs, err := extractDynamicLibs(workdir, "llama.cpp/gguf/build/lib/*server*")
|
||||
if err != nil {
|
||||
if err == payloadMissing {
|
||||
log.Printf("%s", RocmShimMissing)
|
||||
log.Printf("%s", payloadMissing)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
} else {
|
||||
ShimPresent = true
|
||||
}
|
||||
for _, lib := range libs {
|
||||
libName := strings.Split(strings.TrimPrefix(filepath.Base(lib), "lib"), ".")[0]
|
||||
AvailableShims[libName] = lib
|
||||
}
|
||||
|
||||
// Verify we have permissions - either running as root, or we have group access to the driver
|
||||
fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
|
||||
if err != nil {
|
||||
if errors.Is(err, fs.ErrPermission) {
|
||||
log.Fatalf("Radeon card detected, but permissions not set up properly. Either run ollama as root, or add you user account to the render group.")
|
||||
return err
|
||||
} else if errors.Is(err, fs.ErrNotExist) {
|
||||
// expected behavior without a radeon card
|
||||
return nil
|
||||
// Only check ROCm access if we have the dynamic lib loaded
|
||||
if _, rocmPresent := AvailableShims["rocm_server"]; rocmPresent {
|
||||
// Verify we have permissions - either running as root, or we have group access to the driver
|
||||
fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
|
||||
if err != nil {
|
||||
if errors.Is(err, fs.ErrPermission) {
|
||||
log.Fatalf("Radeon card detected, but permissions not set up properly. Either run ollama as root, or add you user account to the render group.")
|
||||
return err
|
||||
} else if errors.Is(err, fs.ErrNotExist) {
|
||||
// expected behavior without a radeon card
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
|
||||
}
|
||||
fd.Close()
|
||||
|
||||
return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
|
||||
}
|
||||
fd.Close()
|
||||
|
||||
shimMutex.Lock()
|
||||
defer shimMutex.Unlock()
|
||||
if llm != nil {
|
||||
return nil
|
||||
}
|
||||
var libName string
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
// shouldn't happen
|
||||
return nil
|
||||
case "linux":
|
||||
libName = "librocm_server.so"
|
||||
case "windows":
|
||||
libName = "rocm_server.dll"
|
||||
default:
|
||||
// shouldn't happen
|
||||
return nil
|
||||
}
|
||||
libPath := C.CString(filepath.Join(workdir, libName))
|
||||
defer C.free(unsafe.Pointer(libPath))
|
||||
resp := newExtServerResp(128)
|
||||
defer freeExtServerResp(resp)
|
||||
var srv C.struct_rocm_llama_server
|
||||
C.rocm_shim_init(libPath, &srv, &resp)
|
||||
if resp.id < 0 {
|
||||
// TODO - consider softening this failure mode to allow fall-back to the CUDA based built-in llm
|
||||
// and run against CPU
|
||||
return fmt.Errorf("Unable to load AMD GPU library: %s", C.GoString(resp.msg))
|
||||
}
|
||||
llm = &shimExtServer{
|
||||
s: srv,
|
||||
options: api.DefaultOptions(),
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user