diff --git a/go.mod b/go.mod
index 0df1372b5..0efefe2f9 100644
--- a/go.mod
+++ b/go.mod
@@ -45,7 +45,7 @@ require (
 	golang.org/x/crypto v0.14.0
 	golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63
 	golang.org/x/net v0.17.0 // indirect
-	golang.org/x/sys v0.13.0 // indirect
+	golang.org/x/sys v0.13.0
 	golang.org/x/term v0.13.0
 	golang.org/x/text v0.13.0 // indirect
 	google.golang.org/protobuf v1.30.0 // indirect
diff --git a/gpu/cpu_common.go b/gpu/cpu_common.go
new file mode 100644
index 000000000..11649f6a9
--- /dev/null
+++ b/gpu/cpu_common.go
@@ -0,0 +1,21 @@
+package gpu
+
+import (
+	"log"
+
+	"golang.org/x/sys/cpu"
+)
+
+func GetCPUVariant() string {
+	if cpu.X86.HasAVX2 {
+		log.Printf("CPU has AVX2")
+		return "avx2"
+	}
+	if cpu.X86.HasAVX {
+		log.Printf("CPU has AVX")
+		return "avx"
+	}
+	log.Printf("CPU does not have vector extensions")
+	// else LCD
+	return ""
+}
diff --git a/gpu/gpu_darwin.go b/gpu/gpu_darwin.go
index 796452850..eac55c428 100644
--- a/gpu/gpu_darwin.go
+++ b/gpu/gpu_darwin.go
@@ -32,8 +32,15 @@ func CheckVRAM() (int64, error) {
 
 func GetGPUInfo() GpuInfo {
 	mem, _ := getCPUMem()
+	if runtime.GOARCH == "amd64" {
+		return GpuInfo{
+			Library: "default",
+			Variant: GetCPUVariant(),
+			memInfo: mem,
+		}
+	}
 	return GpuInfo{
-		Library: "default",
+		Library: "metal",
 		memInfo: mem,
 	}
 }
@@ -45,12 +52,3 @@ func getCPUMem() (memInfo, error) {
 		DeviceCount: 0,
 	}, nil
 }
-
-func nativeInit() error {
-	return nil
-}
-
-func GetCPUVariant() string {
-	// We don't yet have CPU based builds for Darwin...
-	return ""
-}
diff --git a/gpu/gpu_test.go b/gpu/gpu_test.go
index c260211e2..010eaea57 100644
--- a/gpu/gpu_test.go
+++ b/gpu/gpu_test.go
@@ -9,7 +9,7 @@ import (
 
 func TestBasicGetGPUInfo(t *testing.T) {
 	info := GetGPUInfo()
-	assert.Contains(t, "cuda rocm cpu default", info.Library)
+	assert.Contains(t, "cuda rocm cpu metal", info.Library)
 
 	switch runtime.GOOS {
 	case "darwin":
diff --git a/llm/dynamic_shim.c b/llm/dyn_ext_server.c
similarity index 83%
rename from llm/dynamic_shim.c
rename to llm/dyn_ext_server.c
index ca7c372ae..111e4ab5e 100644
--- a/llm/dynamic_shim.c
+++ b/llm/dyn_ext_server.c
@@ -1,4 +1,4 @@
-#include "dynamic_shim.h"
+#include "dyn_ext_server.h"
 
 #include <stdio.h>
 #include <string.h>
@@ -33,7 +33,7 @@ inline char *LOAD_ERR() {
 #define UNLOAD_LIBRARY(handle) dlclose(handle)
 #endif
 
-void dynamic_shim_init(const char *libPath, struct dynamic_llama_server *s,
+void dyn_init(const char *libPath, struct dynamic_llama_server *s,
                        ext_server_resp_t *err) {
   int i = 0;
   struct lookup {
@@ -83,63 +83,63 @@ void dynamic_shim_init(const char *libPath, struct dynamic_llama_server *s,
   }
 }
 
-inline void dynamic_shim_llama_server_init(struct dynamic_llama_server s,
+inline void dyn_llama_server_init(struct dynamic_llama_server s,
                                            ext_server_params_t *sparams,
                                            ext_server_resp_t *err) {
   s.llama_server_init(sparams, err);
 }
 
-inline void dynamic_shim_llama_server_start(struct dynamic_llama_server s) {
+inline void dyn_llama_server_start(struct dynamic_llama_server s) {
   s.llama_server_start();
 }
 
-inline void dynamic_shim_llama_server_stop(struct dynamic_llama_server s) {
+inline void dyn_llama_server_stop(struct dynamic_llama_server s) {
   s.llama_server_stop();
 }
 
-inline void dynamic_shim_llama_server_completion(struct dynamic_llama_server s,
+inline void dyn_llama_server_completion(struct dynamic_llama_server s,
                                                  const char *json_req,
                                                  ext_server_resp_t *resp) {
   s.llama_server_completion(json_req, resp);
 }
 
-inline void dynamic_shim_llama_server_completion_next_result(
+inline void dyn_llama_server_completion_next_result(
     struct dynamic_llama_server s, const int task_id,
     ext_server_task_result_t *result) {
   s.llama_server_completion_next_result(task_id, result);
 }
 
-inline void dynamic_shim_llama_server_completion_cancel(
+inline void dyn_llama_server_completion_cancel(
     struct dynamic_llama_server s, const int task_id, ext_server_resp_t *err) {
   s.llama_server_completion_cancel(task_id, err);
 }
-inline void dynamic_shim_llama_server_release_task_result(
+inline void dyn_llama_server_release_task_result(
     struct dynamic_llama_server s, ext_server_task_result_t *result) {
   s.llama_server_release_task_result(result);
 }
 
-inline void dynamic_shim_llama_server_tokenize(struct dynamic_llama_server s,
+inline void dyn_llama_server_tokenize(struct dynamic_llama_server s,
                                                const char *json_req,
                                                char **json_resp,
                                                ext_server_resp_t *err) {
   s.llama_server_tokenize(json_req, json_resp, err);
 }
 
-inline void dynamic_shim_llama_server_detokenize(struct dynamic_llama_server s,
+inline void dyn_llama_server_detokenize(struct dynamic_llama_server s,
                                                  const char *json_req,
                                                  char **json_resp,
                                                  ext_server_resp_t *err) {
   s.llama_server_detokenize(json_req, json_resp, err);
 }
 
-inline void dynamic_shim_llama_server_embedding(struct dynamic_llama_server s,
+inline void dyn_llama_server_embedding(struct dynamic_llama_server s,
                                                 const char *json_req,
                                                 char **json_resp,
                                                 ext_server_resp_t *err) {
   s.llama_server_embedding(json_req, json_resp, err);
 }
 
-inline void dynamic_shim_llama_server_release_json_resp(
+inline void dyn_llama_server_release_json_resp(
     struct dynamic_llama_server s, char **json_resp) {
   s.llama_server_release_json_resp(json_resp);
 }
diff --git a/llm/ext_server_common.go b/llm/dyn_ext_server.go
similarity index 72%
rename from llm/ext_server_common.go
rename to llm/dyn_ext_server.go
index b10ac60ba..105df634f 100644
--- a/llm/ext_server_common.go
+++ b/llm/dyn_ext_server.go
@@ -10,25 +10,25 @@ package llm
 #cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
 #cgo darwin LDFLAGS: -lc++ -framework Accelerate
 #cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
-#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libcommon.a
-#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libext_server.a
-#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libllama.a
-#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/build/darwin/metal/lib/libggml_static.a
 #cgo linux CFLAGS: -D_GNU_SOURCE
 #cgo linux LDFLAGS: -lrt -ldl -lstdc++ -lm
 #cgo linux windows LDFLAGS: -lpthread
 
 #include <stdlib.h>
-#include "ext_server.h"
+#include "dyn_ext_server.h"
 
 */
 import "C"
+
 import (
 	"bytes"
 	"context"
 	"encoding/json"
 	"fmt"
 	"log"
+	"os"
+	"path/filepath"
+	"runtime"
 	"strings"
 	"sync"
 	"time"
@@ -37,21 +37,9 @@ import (
 	"github.com/jmorganca/ollama/api"
 )
 
-// TODO switch Linux to always be dynamic
-// If that works out, then look at the impact of doing the same for Mac
-type extServer interface {
-	LLM
-	llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t)
-	llama_server_start()
-	llama_server_stop()
-	llama_server_completion(json_req *C.char, resp *C.ext_server_resp_t)
-	llama_server_completion_next_result(task_id C.int, resp *C.ext_server_task_result_t)
-	llama_server_completion_cancel(task_id C.int, err *C.ext_server_resp_t)
-	llama_server_release_task_result(result *C.ext_server_task_result_t)
-	llama_server_tokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t)
-	llama_server_detokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t)
-	llama_server_embedding(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t)
-	llama_server_release_json_resp(json_resp **C.char)
+type dynExtServer struct {
+	s       C.struct_dynamic_llama_server
+	options api.Options
 }
 
 // Note: current implementation does not support concurrent instantiations
@@ -76,11 +64,30 @@ func extServerResponseToErr(resp C.ext_server_resp_t) error {
 	return fmt.Errorf(C.GoString(resp.msg))
 }
 
-func newExtServer(server extServer, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
+// Note: current implementation does not support concurrent instantiations
+var llm *dynExtServer
+
+func newDynExtServer(library, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
 	if !mutex.TryLock() {
 		log.Printf("concurrent llm servers not yet supported, waiting for prior server to complete")
 		mutex.Lock()
 	}
+	updatePath(filepath.Dir(library))
+	libPath := C.CString(library)
+	defer C.free(unsafe.Pointer(libPath))
+	resp := newExtServerResp(128)
+	defer freeExtServerResp(resp)
+	var srv C.struct_dynamic_llama_server
+	C.dyn_init(libPath, &srv, &resp)
+	if resp.id < 0 {
+		mutex.Unlock()
+		return nil, fmt.Errorf("Unable to load dynamic library: %s", C.GoString(resp.msg))
+	}
+	llm = &dynExtServer{
+		s:       srv,
+		options: opts,
+	}
+	log.Printf("Loading Dynamic llm server: %s", library)
 
 	var sparams C.ext_server_params_t
 	sparams.model = C.CString(model)
@@ -129,20 +136,20 @@ func newExtServer(server extServer, model string, adapters, projectors []string,
 
 	sparams.n_threads = C.uint(opts.NumThread)
 
-	log.Printf("Initializing internal llama server")
-	resp := newExtServerResp(128)
-	defer freeExtServerResp(resp)
-	server.llama_server_init(&sparams, &resp)
-	if resp.id < 0 {
-		return nil, extServerResponseToErr(resp)
+	log.Printf("Initializing llama server")
+	initResp := newExtServerResp(128)
+	defer freeExtServerResp(initResp)
+	C.dyn_llama_server_init(llm.s, &sparams, &initResp)
+	if initResp.id < 0 {
+		return nil, extServerResponseToErr(initResp)
 	}
 
-	log.Printf("Starting internal llama main loop")
-	server.llama_server_start()
-	return server, nil
+	log.Printf("Starting llama main loop")
+	C.dyn_llama_server_start(llm.s)
+	return llm, nil
 }
 
-func predict(ctx context.Context, llm extServer, predict PredictOpts, fn func(PredictResult)) error {
+func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn func(PredictResult)) error {
 	resp := newExtServerResp(128)
 	defer freeExtServerResp(resp)
 	var imageData []ImageData
@@ -200,7 +207,7 @@ func predict(ctx context.Context, llm extServer, predict PredictOpts, fn func(Pr
 		req := C.CString(buffer.String())
 		defer C.free(unsafe.Pointer(req))
 
-		llm.llama_server_completion(req, &resp)
+		C.dyn_llama_server_completion(llm.s, req, &resp)
 		if resp.id < 0 {
 			return extServerResponseToErr(resp)
 		}
@@ -211,7 +218,7 @@ func predict(ctx context.Context, llm extServer, predict PredictOpts, fn func(Pr
 			select {
 			case <-ctx.Done():
 				// This handles the request cancellation
-				llm.llama_server_completion_cancel(resp.id, &resp)
+				C.dyn_llama_server_completion_cancel(llm.s, resp.id, &resp)
 				if resp.id < 0 {
 					return extServerResponseToErr(resp)
 				} else {
@@ -219,13 +226,13 @@ func predict(ctx context.Context, llm extServer, predict PredictOpts, fn func(Pr
 				}
 			default:
 				var result C.ext_server_task_result_t
-				llm.llama_server_completion_next_result(resp.id, &result)
+				C.dyn_llama_server_completion_next_result(llm.s, resp.id, &result)
 				json_resp := C.GoString(result.json_resp)
-				llm.llama_server_release_task_result(&result)
+				C.dyn_llama_server_release_task_result(llm.s, &result)
 
 				var p prediction
 				if err := json.Unmarshal([]byte(json_resp), &p); err != nil {
-					llm.llama_server_completion_cancel(resp.id, &resp)
+					C.dyn_llama_server_completion_cancel(llm.s, resp.id, &resp)
 					if resp.id < 0 {
 						return fmt.Errorf("error unmarshaling llm prediction response: %w and cancel %s", err, C.GoString(resp.msg))
 					} else {
@@ -266,7 +273,7 @@ func predict(ctx context.Context, llm extServer, predict PredictOpts, fn func(Pr
 	return fmt.Errorf("max retries exceeded")
 }
 
-func encode(llm extServer, ctx context.Context, prompt string) ([]int, error) {
+func (llm *dynExtServer) Encode(ctx context.Context, prompt string) ([]int, error) {
 	data, err := json.Marshal(TokenizeRequest{Content: prompt})
 	if err != nil {
 		return nil, fmt.Errorf("marshaling encode data: %w", err)
@@ -276,11 +283,11 @@ func encode(llm extServer, ctx context.Context, prompt string) ([]int, error) {
 	var json_resp *C.char
 	resp := newExtServerResp(128)
 	defer freeExtServerResp(resp)
-	llm.llama_server_tokenize(req, &json_resp, &resp)
+	C.dyn_llama_server_tokenize(llm.s, req, &json_resp, &resp)
 	if resp.id < 0 {
 		return nil, extServerResponseToErr(resp)
 	}
-	defer llm.llama_server_release_json_resp(&json_resp)
+	defer C.dyn_llama_server_release_json_resp(llm.s, &json_resp)
 
 	var encoded TokenizeResponse
 	if err2 := json.Unmarshal([]byte(C.GoString(json_resp)), &encoded); err2 != nil {
@@ -290,7 +297,7 @@ func encode(llm extServer, ctx context.Context, prompt string) ([]int, error) {
 	return encoded.Tokens, err
 }
 
-func decode(llm extServer, ctx context.Context, tokens []int) (string, error) {
+func (llm *dynExtServer) Decode(ctx context.Context, tokens []int) (string, error) {
 	if len(tokens) == 0 {
 		return "", nil
 	}
@@ -304,11 +311,11 @@ func decode(llm extServer, ctx context.Context, tokens []int) (string, error) {
 	var json_resp *C.char
 	resp := newExtServerResp(128)
 	defer freeExtServerResp(resp)
-	llm.llama_server_detokenize(req, &json_resp, &resp)
+	C.dyn_llama_server_detokenize(llm.s, req, &json_resp, &resp)
 	if resp.id < 0 {
 		return "", extServerResponseToErr(resp)
 	}
-	defer llm.llama_server_release_json_resp(&json_resp)
+	defer C.dyn_llama_server_release_json_resp(llm.s, &json_resp)
 
 	var decoded DetokenizeResponse
 	if err2 := json.Unmarshal([]byte(C.GoString(json_resp)), &decoded); err2 != nil {
@@ -318,7 +325,7 @@ func decode(llm extServer, ctx context.Context, tokens []int) (string, error) {
 	return decoded.Content, err
 }
 
-func embedding(llm extServer, ctx context.Context, input string) ([]float64, error) {
+func (llm *dynExtServer) Embedding(ctx context.Context, input string) ([]float64, error) {
 	data, err := json.Marshal(TokenizeRequest{Content: input})
 	if err != nil {
 		return nil, fmt.Errorf("error marshaling embed data: %w", err)
@@ -329,11 +336,11 @@ func embedding(llm extServer, ctx context.Context, input string) ([]float64, err
 	var json_resp *C.char
 	resp := newExtServerResp(128)
 	defer freeExtServerResp(resp)
-	llm.llama_server_embedding(req, &json_resp, &resp)
+	C.dyn_llama_server_embedding(llm.s, req, &json_resp, &resp)
 	if resp.id < 0 {
 		return nil, extServerResponseToErr(resp)
 	}
-	defer llm.llama_server_release_json_resp(&json_resp)
+	defer C.dyn_llama_server_release_json_resp(llm.s, &json_resp)
 
 	var embedding EmbeddingResponse
 	if err := json.Unmarshal([]byte(C.GoString(json_resp)), &embedding); err != nil {
@@ -343,7 +350,38 @@ func embedding(llm extServer, ctx context.Context, input string) ([]float64, err
 	return embedding.Embedding, nil
 }
 
-func close(llm extServer) {
-	llm.llama_server_stop()
+func (llm *dynExtServer) Close() {
+	C.dyn_llama_server_stop(llm.s)
 	mutex.Unlock()
 }
+
+func updatePath(dir string) {
+	if runtime.GOOS == "windows" {
+		tmpDir := filepath.Dir(dir)
+		pathComponents := strings.Split(os.Getenv("PATH"), ";")
+		i := 0
+		for _, comp := range pathComponents {
+			if strings.EqualFold(comp, dir) {
+				return
+			}
+			// Remove any other prior paths to our temp dir
+			if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) {
+				pathComponents[i] = comp
+				i++
+			}
+		}
+		newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
+		log.Printf("Updating PATH to %s", newPath)
+		os.Setenv("PATH", newPath)
+	} else {
+		pathComponents := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
+		for _, comp := range pathComponents {
+			if comp == dir {
+				return
+			}
+		}
+		newPath := strings.Join(append([]string{dir}, pathComponents...), ":")
+		log.Printf("Updating LD_LIBRARY_PATH to %s", newPath)
+		os.Setenv("LD_LIBRARY_PATH", newPath)
+	}
+}
diff --git a/llm/dynamic_shim.h b/llm/dyn_ext_server.h
similarity index 75%
rename from llm/dynamic_shim.h
rename to llm/dyn_ext_server.h
index 116ca722d..cddf4a1f0 100644
--- a/llm/dynamic_shim.h
+++ b/llm/dyn_ext_server.h
@@ -27,46 +27,46 @@ struct dynamic_llama_server {
   void (*llama_server_release_json_resp)(char **json_resp);
 };
 
-void dynamic_shim_init(const char *libPath, struct dynamic_llama_server *s,
+void dyn_init(const char *libPath, struct dynamic_llama_server *s,
                        ext_server_resp_t *err);
 
 // No good way to call C function pointers from Go so inline the indirection
-void dynamic_shim_llama_server_init(struct dynamic_llama_server s,
+void dyn_llama_server_init(struct dynamic_llama_server s,
                                     ext_server_params_t *sparams,
                                     ext_server_resp_t *err);
 
-void dynamic_shim_llama_server_start(struct dynamic_llama_server s);
+void dyn_llama_server_start(struct dynamic_llama_server s);
 
-void dynamic_shim_llama_server_stop(struct dynamic_llama_server s);
+void dyn_llama_server_stop(struct dynamic_llama_server s);
 
-void dynamic_shim_llama_server_completion(struct dynamic_llama_server s,
+void dyn_llama_server_completion(struct dynamic_llama_server s,
                                           const char *json_req,
                                           ext_server_resp_t *resp);
 
-void dynamic_shim_llama_server_completion_next_result(
+void dyn_llama_server_completion_next_result(
     struct dynamic_llama_server s, const int task_id,
     ext_server_task_result_t *result);
 
-void dynamic_shim_llama_server_completion_cancel(struct dynamic_llama_server s,
+void dyn_llama_server_completion_cancel(struct dynamic_llama_server s,
                                                  const int task_id,
                                                  ext_server_resp_t *err);
 
-void dynamic_shim_llama_server_release_task_result(
+void dyn_llama_server_release_task_result(
     struct dynamic_llama_server s, ext_server_task_result_t *result);
 
-void dynamic_shim_llama_server_tokenize(struct dynamic_llama_server s,
+void dyn_llama_server_tokenize(struct dynamic_llama_server s,
                                         const char *json_req, char **json_resp,
                                         ext_server_resp_t *err);
 
-void dynamic_shim_llama_server_detokenize(struct dynamic_llama_server s,
+void dyn_llama_server_detokenize(struct dynamic_llama_server s,
                                           const char *json_req,
                                           char **json_resp,
                                           ext_server_resp_t *err);
 
-void dynamic_shim_llama_server_embedding(struct dynamic_llama_server s,
+void dyn_llama_server_embedding(struct dynamic_llama_server s,
                                          const char *json_req, char **json_resp,
                                          ext_server_resp_t *err);
-void dynamic_shim_llama_server_release_json_resp(struct dynamic_llama_server s,
+void dyn_llama_server_release_json_resp(struct dynamic_llama_server s,
                                                  char **json_resp);
 
 #ifdef __cplusplus
diff --git a/llm/ext_server.go b/llm/ext_server.go
deleted file mode 100644
index c8a5f0b92..000000000
--- a/llm/ext_server.go
+++ /dev/null
@@ -1,17 +0,0 @@
-//go:build !darwin
-
-package llm
-
-import (
-	"fmt"
-
-	"github.com/jmorganca/ollama/api"
-)
-
-func newDefaultExtServer(model string, adapters, projectors []string, opts api.Options) (extServer, error) {
-	// On windows and linux we always load the llama.cpp libraries dynamically to avoid startup DLL dependencies
-	// This ensures we can update the PATH at runtime to get everything loaded
-
-	// This should never happen as we'll always try to load one or more cpu dynamic libaries before hitting default
-	return nil, fmt.Errorf("no available default llm library")
-}
diff --git a/llm/ext_server_default.go b/llm/ext_server_default.go
deleted file mode 100644
index 31f05fb67..000000000
--- a/llm/ext_server_default.go
+++ /dev/null
@@ -1,82 +0,0 @@
-//go:build darwin
-
-package llm
-
-/*
-#include <stdlib.h>
-#include "ext_server.h"
-
-*/
-import "C"
-import (
-	"context"
-
-	"github.com/jmorganca/ollama/api"
-)
-
-// TODO - explore shifting Darwin to a dynamic loading pattern for consistency with Linux and Windows
-
-type llamaExtServer struct {
-	api.Options
-}
-
-func (llm *llamaExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
-	C.llama_server_init(sparams, err)
-}
-func (llm *llamaExtServer) llama_server_start() {
-	C.llama_server_start()
-}
-func (llm *llamaExtServer) llama_server_stop() {
-	C.llama_server_stop()
-}
-
-func (llm *llamaExtServer) llama_server_completion(json_req *C.char, resp *C.ext_server_resp_t) {
-	C.llama_server_completion(json_req, resp)
-}
-func (llm *llamaExtServer) llama_server_completion_next_result(task_id C.int, resp *C.ext_server_task_result_t) {
-	C.llama_server_completion_next_result(task_id, resp)
-}
-func (llm *llamaExtServer) llama_server_completion_cancel(task_id C.int, err *C.ext_server_resp_t) {
-	C.llama_server_completion_cancel(task_id, err)
-}
-func (llm *llamaExtServer) llama_server_release_task_result(result *C.ext_server_task_result_t) {
-	C.llama_server_release_task_result(result)
-}
-
-func (llm *llamaExtServer) llama_server_tokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
-	C.llama_server_tokenize(json_req, json_resp, err)
-}
-func (llm *llamaExtServer) llama_server_detokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
-	C.llama_server_detokenize(json_req, json_resp, err)
-}
-func (llm *llamaExtServer) llama_server_embedding(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
-	C.llama_server_embedding(json_req, json_resp, err)
-}
-func (llm *llamaExtServer) llama_server_release_json_resp(json_resp **C.char) {
-	C.llama_server_release_json_resp(json_resp)
-}
-
-func newDefaultExtServer(model string, adapters, projectors []string, opts api.Options) (extServer, error) {
-	server := &llamaExtServer{opts}
-	return newExtServer(server, model, adapters, projectors, opts)
-}
-
-func (llm *llamaExtServer) Predict(ctx context.Context, pred PredictOpts, fn func(PredictResult)) error {
-	return predict(ctx, llm, pred, fn)
-}
-
-func (llm *llamaExtServer) Encode(ctx context.Context, prompt string) ([]int, error) {
-	return encode(llm, ctx, prompt)
-}
-
-func (llm *llamaExtServer) Decode(ctx context.Context, tokens []int) (string, error) {
-	return decode(llm, ctx, tokens)
-}
-
-func (llm *llamaExtServer) Embedding(ctx context.Context, input string) ([]float64, error) {
-	return embedding(llm, ctx, input)
-}
-
-func (llm *llamaExtServer) Close() {
-	close(llm)
-}
diff --git a/llm/generate/gen_darwin.sh b/llm/generate/gen_darwin.sh
index cabd8f758..b7f1f6842 100755
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -29,4 +29,16 @@ git_module_setup
 apply_patches
 build
 install
+gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
+    -Wl,-force_load ${BUILD_DIR}/lib/libext_server.a \
+    ${BUILD_DIR}/lib/libcommon.a \
+    ${BUILD_DIR}/lib/libllama.a \
+    ${BUILD_DIR}/lib/libggml_static.a \
+    -lpthread -ldl -lm -lc++ \
+    -framework Accelerate \
+    -framework Foundation \
+    -framework Metal \
+    -framework MetalKit \
+    -framework MetalPerformanceShaders
+
 cleanup
diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh
index 3fec7e6b3..0c940ba5a 100755
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -104,12 +104,6 @@ if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
         build
         install
         link_server_lib
-        gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
-            -Wl,--whole-archive \
-            ${BUILD_DIR}/lib/libext_server.a \
-            -Wl,--no-whole-archive \
-            ${BUILD_DIR}/lib/libcommon.a \
-            ${BUILD_DIR}/lib/libllama.a
     fi
 else
     echo "Skipping CPU generation step as requested"
diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1
index 9435fffa1..1bc08c69f 100644
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -4,7 +4,7 @@ $ErrorActionPreference = "Stop"
 
 function init_vars {
     $script:llamacppDir = "../llama.cpp"
-    $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")
+    $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off",  "-A","x64")
     $script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
     if ($env:CGO_CFLAGS -contains "-g") {
         $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
@@ -63,16 +63,36 @@ init_vars
 git_module_setup
 apply_patches
 
-# first build CPU based
-$script:buildDir="${script:llamacppDir}/build/windows/cpu"
+# -DLLAMA_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
+# -DLLAMA_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
+# -DLLAMA_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
+# -DLLAMA_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
 
+$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on", "-DLLAMA_NATIVE=off")
+
+$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+$script:buildDir="${script:llamacppDir}/build/windows/cpu"
+write-host "Building LCD CPU"
+build
+install
+
+$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=off", "-DLLAMA_F16C=off") + $script:cmakeDefs
+$script:buildDir="${script:llamacppDir}/build/windows/cpu_avx"
+write-host "Building AVX CPU"
+build
+install
+
+$script:cmakeDefs = $script:commonCpuDefs + @("-DLLAMA_AVX=on", "-DLLAMA_AVX2=on", "-DLLAMA_AVX512=off", "-DLLAMA_FMA=on", "-DLLAMA_F16C=on") + $script:cmakeDefs
+$script:buildDir="${script:llamacppDir}/build/windows/cpu_avx2"
+write-host "Building AVX2 CPU"
 build
 install
 
 # Then build cuda as a dynamically loaded library
+# TODO figure out how to detect cuda version
 init_vars
 $script:buildDir="${script:llamacppDir}/build/windows/cuda"
-$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON")
+$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DLLAMA_AVX=on")
 build
 install
 
diff --git a/llm/llm.go b/llm/llm.go
index 05230b09f..a414c3daa 100644
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -138,33 +138,30 @@ func Init(workdir string) error {
 	return nativeInit(workdir)
 }
 
-func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
-	shims := getShims(gpuInfo)
+func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
+	dynLibs := getDynLibs(gpuInfo)
 
 	// Check to see if the user has requested a specific library instead of auto-detecting
 	demandLib := os.Getenv("OLLAMA_LLM_LIBRARY")
 	if demandLib != "" {
-		libPath := availableShims[demandLib]
+		libPath := availableDynLibs[demandLib]
 		if libPath == "" {
 			log.Printf("Invalid OLLAMA_LLM_LIBRARY %s - not found", demandLib)
 		} else {
 			log.Printf("Loading OLLAMA_LLM_LIBRARY=%s", demandLib)
-			shims = []string{libPath}
+			dynLibs = []string{libPath}
 		}
 	}
 
-	for _, shim := range shims {
-		// TODO - only applies on Darwin (switch to fully dynamic there too...)
-		if shim == "default" {
-			break
-		}
-		srv, err := newDynamicShimExtServer(shim, model, adapters, projectors, opts)
+	err2 := fmt.Errorf("unable to locate suitable llm library")
+	for _, dynLib := range dynLibs {
+		srv, err := newDynExtServer(dynLib, model, adapters, projectors, opts)
 		if err == nil {
 			return srv, nil
 		}
-		log.Printf("Failed to load dynamic library %s  %s", shim, err)
+		log.Printf("Failed to load dynamic library %s  %s", dynLib, err)
+		err2 = err
 	}
 
-	return newDefaultExtServer(model, adapters, projectors, opts)
-
+	return nil, err2
 }
diff --git a/llm/shim.go b/llm/payload_common.go
similarity index 84%
rename from llm/shim.go
rename to llm/payload_common.go
index e68a8ec38..f69767681 100644
--- a/llm/shim.go
+++ b/llm/payload_common.go
@@ -18,42 +18,42 @@ import (
 // Libraries names may contain an optional variant separated by '_'
 // For example, "rocm_v6" and "rocm_v5" or "cpu" and "cpu_avx2"
 // Any library without a variant is the lowest common denominator
-var availableShims = map[string]string{}
+var availableDynLibs = map[string]string{}
 
 const pathComponentCount = 6
 
-// getShims returns an ordered list of shims to try, starting with the best
-func getShims(gpuInfo gpu.GpuInfo) []string {
+// getDynLibs returns an ordered list of LLM libraries to try, starting with the best
+func getDynLibs(gpuInfo gpu.GpuInfo) []string {
 	// Short circuit if we know we're using the default built-in (darwin only)
 	if gpuInfo.Library == "default" {
 		return []string{"default"}
 	}
 
 	exactMatch := ""
-	shims := []string{}
-	altShims := []string{}
+	dynLibs := []string{}
+	altDynLibs := []string{}
 	requested := gpuInfo.Library
 	if gpuInfo.Variant != "" {
 		requested += "_" + gpuInfo.Variant
 	}
 	// Try to find an exact match
-	for cmp := range availableShims {
+	for cmp := range availableDynLibs {
 		if requested == cmp {
 			exactMatch = cmp
-			shims = []string{availableShims[cmp]}
+			dynLibs = []string{availableDynLibs[cmp]}
 			break
 		}
 	}
 	// Then for GPUs load alternates and sort the list for consistent load ordering
 	if gpuInfo.Library != "cpu" {
-		for cmp := range availableShims {
+		for cmp := range availableDynLibs {
 			if gpuInfo.Library == strings.Split(cmp, "_")[0] && cmp != exactMatch {
-				altShims = append(altShims, cmp)
+				altDynLibs = append(altDynLibs, cmp)
 			}
 		}
-		slices.Sort(altShims)
-		for _, altShim := range altShims {
-			shims = append(shims, availableShims[altShim])
+		slices.Sort(altDynLibs)
+		for _, altDynLib := range altDynLibs {
+			dynLibs = append(dynLibs, availableDynLibs[altDynLib])
 		}
 	}
 
@@ -65,27 +65,27 @@ func getShims(gpuInfo gpu.GpuInfo) []string {
 		// Attempting to run the wrong CPU instructions will panic the
 		// process
 		if variant != "" {
-			for cmp := range availableShims {
+			for cmp := range availableDynLibs {
 				if cmp == "cpu_"+variant {
-					shims = append(shims, availableShims[cmp])
+					dynLibs = append(dynLibs, availableDynLibs[cmp])
 					break
 				}
 			}
 		} else {
-			shims = append(shims, availableShims["cpu"])
+			dynLibs = append(dynLibs, availableDynLibs["cpu"])
 		}
 	}
 
 	// Finaly, if we didn't find any matches, LCD CPU FTW
-	if len(shims) == 0 {
-		shims = []string{availableShims["cpu"]}
+	if len(dynLibs) == 0 {
+		dynLibs = []string{availableDynLibs["cpu"]}
 	}
-	return shims
+	return dynLibs
 }
 
-func rocmShimPresent() bool {
-	for shimName := range availableShims {
-		if strings.HasPrefix(shimName, "rocm") {
+func rocmDynLibPresent() bool {
+	for dynLibName := range availableDynLibs {
+		if strings.HasPrefix(dynLibName, "rocm") {
 			return true
 		}
 	}
@@ -104,7 +104,6 @@ func nativeInit(workdir string) error {
 			return err
 		}
 		os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
-		return nil
 	}
 
 	libs, err := extractDynamicLibs(workdir, "llama.cpp/build/*/*/lib/*")
@@ -118,7 +117,7 @@ func nativeInit(workdir string) error {
 	for _, lib := range libs {
 		// The last dir component is the variant name
 		variant := filepath.Base(filepath.Dir(lib))
-		availableShims[variant] = lib
+		availableDynLibs[variant] = lib
 	}
 
 	if err := verifyDriverAccess(); err != nil {
@@ -126,9 +125,9 @@ func nativeInit(workdir string) error {
 	}
 
 	// Report which dynamic libraries we have loaded to assist troubleshooting
-	variants := make([]string, len(availableShims))
+	variants := make([]string, len(availableDynLibs))
 	i := 0
-	for variant := range availableShims {
+	for variant := range availableDynLibs {
 		variants[i] = variant
 		i++
 	}
@@ -226,7 +225,7 @@ func verifyDriverAccess() error {
 		return nil
 	}
 	// Only check ROCm access if we have the dynamic lib loaded
-	if rocmShimPresent() {
+	if rocmDynLibPresent() {
 		// Verify we have permissions - either running as root, or we have group access to the driver
 		fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
 		if err != nil {
diff --git a/llm/payload_darwin.go b/llm/payload_darwin.go
new file mode 100644
index 000000000..1a5f042a1
--- /dev/null
+++ b/llm/payload_darwin.go
@@ -0,0 +1,8 @@
+package llm
+
+import (
+	"embed"
+)
+
+//go:embed llama.cpp/ggml-metal.metal llama.cpp/build/darwin/*/lib/*.so
+var libEmbed embed.FS
diff --git a/llm/payload_linux.go b/llm/payload_linux.go
new file mode 100644
index 000000000..afef040af
--- /dev/null
+++ b/llm/payload_linux.go
@@ -0,0 +1,8 @@
+package llm
+
+import (
+	"embed"
+)
+
+//go:embed llama.cpp/build/linux/*/lib/*.so
+var libEmbed embed.FS
diff --git a/llm/payload_test.go b/llm/payload_test.go
new file mode 100644
index 000000000..7a644713a
--- /dev/null
+++ b/llm/payload_test.go
@@ -0,0 +1,54 @@
+package llm
+
+import (
+	"testing"
+
+	"github.com/jmorganca/ollama/gpu"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestGetDynLibs(t *testing.T) {
+	availableDynLibs = map[string]string{
+		"cpu": "X_cpu",
+	}
+	assert.Equal(t, false, rocmDynLibPresent())
+	res := getDynLibs(gpu.GpuInfo{Library: "cpu"})
+	assert.Len(t, res, 1)
+	assert.Equal(t, availableDynLibs["cpu"], res[0])
+
+	availableDynLibs = map[string]string{
+		"rocm_v5": "X_rocm_v5",
+		"rocm_v6": "X_rocm_v6",
+		"cpu":     "X_cpu",
+	}
+	assert.Equal(t, true, rocmDynLibPresent())
+	res = getDynLibs(gpu.GpuInfo{Library: "rocm"})
+	assert.Len(t, res, 3)
+	assert.Equal(t, availableDynLibs["rocm_v5"], res[0])
+	assert.Equal(t, availableDynLibs["rocm_v6"], res[1])
+	assert.Equal(t, availableDynLibs["cpu"], res[2])
+
+	res = getDynLibs(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
+	assert.Len(t, res, 3)
+	assert.Equal(t, availableDynLibs["rocm_v6"], res[0])
+	assert.Equal(t, availableDynLibs["rocm_v5"], res[1])
+	assert.Equal(t, availableDynLibs["cpu"], res[2])
+
+	res = getDynLibs(gpu.GpuInfo{Library: "cuda"})
+	assert.Len(t, res, 1)
+	assert.Equal(t, availableDynLibs["cpu"], res[0])
+
+	res = getDynLibs(gpu.GpuInfo{Library: "default"})
+	assert.Len(t, res, 1)
+	assert.Equal(t, "default", res[0])
+
+	availableDynLibs = map[string]string{
+		"rocm": "X_rocm_v5",
+		"cpu":  "X_cpu",
+	}
+	assert.Equal(t, true, rocmDynLibPresent())
+	res = getDynLibs(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
+	assert.Len(t, res, 2)
+	assert.Equal(t, availableDynLibs["rocm"], res[0])
+	assert.Equal(t, availableDynLibs["cpu"], res[1])
+}
diff --git a/llm/payload_windows.go b/llm/payload_windows.go
new file mode 100644
index 000000000..21c6cc4d9
--- /dev/null
+++ b/llm/payload_windows.go
@@ -0,0 +1,8 @@
+package llm
+
+import (
+	"embed"
+)
+
+//go:embed llama.cpp/build/windows/*/lib/*.dll
+var libEmbed embed.FS
diff --git a/llm/shim_darwin.go b/llm/shim_darwin.go
deleted file mode 100644
index 9ef8ef96e..000000000
--- a/llm/shim_darwin.go
+++ /dev/null
@@ -1,16 +0,0 @@
-package llm
-
-import (
-	"embed"
-	"fmt"
-
-	"github.com/jmorganca/ollama/api"
-)
-
-//go:embed llama.cpp/ggml-metal.metal
-var libEmbed embed.FS
-
-func newDynamicShimExtServer(library, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
-	// should never happen...
-	return nil, fmt.Errorf("Dynamic library loading not supported on Mac")
-}
diff --git a/llm/shim_ext_server.go b/llm/shim_ext_server.go
deleted file mode 100644
index 102f059c8..000000000
--- a/llm/shim_ext_server.go
+++ /dev/null
@@ -1,107 +0,0 @@
-//go:build !darwin
-
-package llm
-
-/*
-
-#include <stdlib.h>
-#include "dynamic_shim.h"
-
-*/
-import "C"
-import (
-	"context"
-	"fmt"
-	"log"
-	"path/filepath"
-	"sync"
-	"unsafe"
-
-	"github.com/jmorganca/ollama/api"
-)
-
-type shimExtServer struct {
-	s       C.struct_dynamic_llama_server
-	options api.Options
-}
-
-// Note: current implementation does not support concurrent instantiations
-var shimMutex sync.Mutex
-var llm *shimExtServer
-
-func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
-	C.dynamic_shim_llama_server_init(llm.s, sparams, err)
-}
-func (llm *shimExtServer) llama_server_start() {
-	C.dynamic_shim_llama_server_start(llm.s)
-}
-func (llm *shimExtServer) llama_server_stop() {
-	C.dynamic_shim_llama_server_stop(llm.s)
-}
-
-func (llm *shimExtServer) llama_server_completion(json_req *C.char, resp *C.ext_server_resp_t) {
-	C.dynamic_shim_llama_server_completion(llm.s, json_req, resp)
-}
-func (llm *shimExtServer) llama_server_completion_next_result(task_id C.int, resp *C.ext_server_task_result_t) {
-	C.dynamic_shim_llama_server_completion_next_result(llm.s, task_id, resp)
-}
-func (llm *shimExtServer) llama_server_completion_cancel(task_id C.int, err *C.ext_server_resp_t) {
-	C.dynamic_shim_llama_server_completion_cancel(llm.s, task_id, err)
-}
-func (llm *shimExtServer) llama_server_release_task_result(result *C.ext_server_task_result_t) {
-	C.dynamic_shim_llama_server_release_task_result(llm.s, result)
-}
-
-func (llm *shimExtServer) llama_server_tokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
-	C.dynamic_shim_llama_server_tokenize(llm.s, json_req, json_resp, err)
-}
-func (llm *shimExtServer) llama_server_detokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
-	C.dynamic_shim_llama_server_detokenize(llm.s, json_req, json_resp, err)
-}
-func (llm *shimExtServer) llama_server_embedding(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
-	C.dynamic_shim_llama_server_embedding(llm.s, json_req, json_resp, err)
-}
-func (llm *shimExtServer) llama_server_release_json_resp(json_resp **C.char) {
-	C.dynamic_shim_llama_server_release_json_resp(llm.s, json_resp)
-}
-
-func newDynamicShimExtServer(library, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
-	shimMutex.Lock()
-	defer shimMutex.Unlock()
-	updatePath(filepath.Dir(library))
-	libPath := C.CString(library)
-	defer C.free(unsafe.Pointer(libPath))
-	resp := newExtServerResp(128)
-	defer freeExtServerResp(resp)
-	var srv C.struct_dynamic_llama_server
-	C.dynamic_shim_init(libPath, &srv, &resp)
-	if resp.id < 0 {
-		return nil, fmt.Errorf("Unable to load dynamic library: %s", C.GoString(resp.msg))
-	}
-	llm = &shimExtServer{
-		s:       srv,
-		options: opts,
-	}
-	log.Printf("Loading Dynamic Shim llm server: %s", library)
-	return newExtServer(llm, model, adapters, projectors, opts)
-}
-
-func (llm *shimExtServer) Predict(ctx context.Context, pred PredictOpts, fn func(PredictResult)) error {
-	return predict(ctx, llm, pred, fn)
-}
-
-func (llm *shimExtServer) Encode(ctx context.Context, prompt string) ([]int, error) {
-	return encode(llm, ctx, prompt)
-}
-
-func (llm *shimExtServer) Decode(ctx context.Context, tokens []int) (string, error) {
-	return decode(llm, ctx, tokens)
-}
-
-func (llm *shimExtServer) Embedding(ctx context.Context, input string) ([]float64, error) {
-	return embedding(llm, ctx, input)
-}
-
-func (llm *shimExtServer) Close() {
-	close(llm)
-}
diff --git a/llm/shim_ext_server_linux.go b/llm/shim_ext_server_linux.go
deleted file mode 100644
index e4bfd15ea..000000000
--- a/llm/shim_ext_server_linux.go
+++ /dev/null
@@ -1,23 +0,0 @@
-package llm
-
-import (
-	"embed"
-	"log"
-	"os"
-	"strings"
-)
-
-//go:embed llama.cpp/build/*/*/lib/*.so
-var libEmbed embed.FS
-
-func updatePath(dir string) {
-	pathComponents := strings.Split(os.Getenv("LD_LIBRARY_PATH"), ":")
-	for _, comp := range pathComponents {
-		if comp == dir {
-			return
-		}
-	}
-	newPath := strings.Join(append([]string{dir}, pathComponents...), ":")
-	log.Printf("Updating LD_LIBRARY_PATH to %s", newPath)
-	os.Setenv("LD_LIBRARY_PATH", newPath)
-}
diff --git a/llm/shim_ext_server_windows.go b/llm/shim_ext_server_windows.go
deleted file mode 100644
index c218c6f32..000000000
--- a/llm/shim_ext_server_windows.go
+++ /dev/null
@@ -1,31 +0,0 @@
-package llm
-
-import (
-	"embed"
-	"log"
-	"os"
-	"path/filepath"
-	"strings"
-)
-
-//go:embed llama.cpp/build/windows/*/lib/*.dll
-var libEmbed embed.FS
-
-func updatePath(dir string) {
-	tmpDir := filepath.Dir(dir)
-	pathComponents := strings.Split(os.Getenv("PATH"), ";")
-	i := 0
-	for _, comp := range pathComponents {
-		if strings.EqualFold(comp, dir) {
-			return
-		}
-		// Remove any other prior paths to our temp dir
-		if !strings.HasPrefix(strings.ToLower(comp), strings.ToLower(tmpDir)) {
-			pathComponents[i] = comp
-			i++
-		}
-	}
-	newPath := strings.Join(append([]string{dir}, pathComponents...), ";")
-	log.Printf("Updating PATH to %s", newPath)
-	os.Setenv("PATH", newPath)
-}
diff --git a/llm/shim_test.go b/llm/shim_test.go
deleted file mode 100644
index 8d49ce149..000000000
--- a/llm/shim_test.go
+++ /dev/null
@@ -1,54 +0,0 @@
-package llm
-
-import (
-	"testing"
-
-	"github.com/jmorganca/ollama/gpu"
-	"github.com/stretchr/testify/assert"
-)
-
-func TestGetShims(t *testing.T) {
-	availableShims = map[string]string{
-		"cpu": "X_cpu",
-	}
-	assert.Equal(t, false, rocmShimPresent())
-	res := getShims(gpu.GpuInfo{Library: "cpu"})
-	assert.Len(t, res, 1)
-	assert.Equal(t, availableShims["cpu"], res[0])
-
-	availableShims = map[string]string{
-		"rocm_v5": "X_rocm_v5",
-		"rocm_v6": "X_rocm_v6",
-		"cpu":     "X_cpu",
-	}
-	assert.Equal(t, true, rocmShimPresent())
-	res = getShims(gpu.GpuInfo{Library: "rocm"})
-	assert.Len(t, res, 3)
-	assert.Equal(t, availableShims["rocm_v5"], res[0])
-	assert.Equal(t, availableShims["rocm_v6"], res[1])
-	assert.Equal(t, availableShims["cpu"], res[2])
-
-	res = getShims(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
-	assert.Len(t, res, 3)
-	assert.Equal(t, availableShims["rocm_v6"], res[0])
-	assert.Equal(t, availableShims["rocm_v5"], res[1])
-	assert.Equal(t, availableShims["cpu"], res[2])
-
-	res = getShims(gpu.GpuInfo{Library: "cuda"})
-	assert.Len(t, res, 1)
-	assert.Equal(t, availableShims["cpu"], res[0])
-
-	res = getShims(gpu.GpuInfo{Library: "default"})
-	assert.Len(t, res, 1)
-	assert.Equal(t, "default", res[0])
-
-	availableShims = map[string]string{
-		"rocm": "X_rocm_v5",
-		"cpu":  "X_cpu",
-	}
-	assert.Equal(t, true, rocmShimPresent())
-	res = getShims(gpu.GpuInfo{Library: "rocm", Variant: "v6"})
-	assert.Len(t, res, 2)
-	assert.Equal(t, availableShims["rocm"], res[0])
-	assert.Equal(t, availableShims["cpu"], res[1])
-}