diff --git a/discover/runner.go b/discover/runner.go
index 9ae5b3ffd4..65a542e2fd 100644
--- a/discover/runner.go
+++ b/discover/runner.go
@@ -27,7 +27,6 @@ var (
 	deviceMu     sync.Mutex
 	devices      []ml.DeviceInfo
 	libDirs      map[string]struct{}
-	rocmDir      string
 	exe          string
 	bootstrapped bool
 )
@@ -61,14 +60,6 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 			libDirs[filepath.Dir(file)] = struct{}{}
 		}
 
-		// Our current packaging model places ggml-hip in the main directory
-		// but keeps rocm in an isolated directory.  We have to add it to
-		// the [LD_LIBRARY_]PATH so ggml-hip will load properly
-		rocmDir = filepath.Join(ml.LibOllamaPath, "rocm")
-		if _, err := os.Stat(rocmDir); err != nil {
-			rocmDir = ""
-		}
-
 		if len(libDirs) == 0 {
 			libDirs[""] = struct{}{}
 		}
@@ -82,9 +73,20 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		// are enumerated, but not actually supported.
 		// We run this in serial to avoid potentially initializing a GPU multiple
 		// times concurrently leading to memory contention
-		// TODO refactor so we group the lib dirs and do serial per version, but parallel for different libs
 		for dir := range libDirs {
+			// Typically bootstrapping takes < 1s, but on some systems, with devices
+			// in low power/idle mode, initialization can take multiple seconds.  We
+			// set a longer timeout just for bootstrap discovery to reduce the chance
+			// of giving up too quickly
 			bootstrapTimeout := 30 * time.Second
+			if runtime.GOOS == "windows" {
+				// On Windows with Defender enabled, AV scanning of the DLLs
+				// takes place sequentially and this can significantly increase
+				// the time it takes too do the initial discovery pass.
+				// Subsequent loads will be faster as the scan results are
+				// cached
+				bootstrapTimeout = 90 * time.Second
+			}
 			var dirs []string
 			if dir != "" {
 				if requested != "" && filepath.Base(dir) != requested {
@@ -93,21 +95,11 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 				} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
 					continue
 				}
-			}
-			if dir == "" {
-				dirs = []string{ml.LibOllamaPath}
-			} else {
 				dirs = []string{ml.LibOllamaPath, dir}
+			} else {
+				dirs = []string{ml.LibOllamaPath}
 			}
 
-			// ROCm can take a long time on some systems, so give it more time before giving up
-			if dir != "" && strings.Contains(filepath.Base(dir), "rocm") {
-				bootstrapTimeout = 60 * time.Second
-			}
-			// Typically bootstrapping takes < 1s, but on some systems, with devices
-			// in low power/idle mode, initialization can take multiple seconds.  We
-			// set a long timeout just for bootstrap discovery to reduce the chance
-			// of giving up too quickly
 			ctx1stPass, cancel := context.WithTimeout(ctx, bootstrapTimeout)
 			defer cancel()
 
@@ -117,6 +109,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 
 		// In the second pass, we more deeply initialize the GPUs to weed out devices that
 		// aren't supported by a given library.  We run this phase in parallel to speed up discovery.
+		// Only devices that need verification are included in this pass
 		slog.Debug("evluating which if any devices to filter out", "initial_count", len(devices))
 		ctx2ndPass, cancel := context.WithTimeout(ctx, 30*time.Second)
 		defer cancel()
@@ -125,35 +118,16 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		supportedMu := sync.Mutex{}
 		supported := make(map[string]map[string]map[string]int) // [Library][libDir][ID] = pre-deletion devices index
 		for i := range devices {
-			libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
-			if devices[i].Library == "Metal" {
+			if !devices[i].NeedsInitValidation() {
 				continue
 			}
-			slog.Debug("verifying GPU is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
+			libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
+			slog.Debug("verifying device is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
 			wg.Add(1)
 			go func(i int) {
 				defer wg.Done()
-				var envVar string
-				id := devices[i].ID
-				if devices[i].Library == "ROCm" {
-					if runtime.GOOS != "linux" {
-						envVar = "HIP_VISIBLE_DEVICES"
-					} else {
-						envVar = "ROCR_VISIBLE_DEVICES"
-					}
-				} else if devices[i].Library == "CUDA" {
-					envVar = "CUDA_VISIBLE_DEVICES"
-				} else if devices[i].Library == "Vulkan" {
-					id = devices[i].FilteredID
-					envVar = "GGML_VK_VISIBLE_DEVICES"
-				} else {
-					slog.Error("Unknown Library:" + devices[i].Library)
-				}
-
-				extraEnvs := map[string]string{
-					"GGML_CUDA_INIT": "1", // force deep initialization to trigger crash on unsupported GPUs
-					envVar:           id,  // Filter to just this one GPU
-				}
+				extraEnvs := ml.GetVisibleDevicesEnv(devices[i : i+1])
+				devices[i].AddInitValidation(extraEnvs)
 				if len(bootstrapDevices(ctx2ndPass, devices[i].LibraryPath, extraEnvs)) == 0 {
 					slog.Debug("filtering device which didn't fully initialize",
 						"id", devices[i].ID,
@@ -178,26 +152,28 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 		wg.Wait()
 		logutil.Trace("supported GPU library combinations before filtering", "supported", supported)
 
-		filterOutVulkanThatAreSupportedByOtherGPU(needsDelete)
-
 		// Mark for deletion any overlaps - favoring the library version that can cover all GPUs if possible
 		filterOverlapByLibrary(supported, needsDelete)
 
-		// TODO if we ever support multiple ROCm library versions this algorithm will need to be adjusted to keep the rocmID numeric value correct
-		rocmID := 0
+		// Any Libraries that utilize numeric IDs need adjusting based on any possible filtering taking place
+		postFilteredID := map[string]int{}
 		for i := 0; i < len(needsDelete); i++ {
 			if needsDelete[i] {
 				logutil.Trace("removing unsupported or overlapping GPU combination", "libDir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1], "description", devices[i].Description, "compute", devices[i].Compute(), "pci_id", devices[i].PCIID)
 				devices = append(devices[:i], devices[i+1:]...)
 				needsDelete = append(needsDelete[:i], needsDelete[i+1:]...)
 				i--
-			} else if devices[i].Library == "ROCm" {
+			} else {
+				if _, ok := postFilteredID[devices[i].Library]; !ok {
+					postFilteredID[devices[i].Library] = 0
+				}
 				if _, err := strconv.Atoi(devices[i].ID); err == nil {
 					// Replace the numeric ID with the post-filtered IDs
-					devices[i].FilteredID = devices[i].ID
-					devices[i].ID = strconv.Itoa(rocmID)
+					slog.Debug("adjusting filtering IDs", "FilterID", devices[i].ID, "new_ID", strconv.Itoa(postFilteredID[devices[i].Library]))
+					devices[i].FilterID = devices[i].ID
+					devices[i].ID = strconv.Itoa(postFilteredID[devices[i].Library])
 				}
-				rocmID++
+				postFilteredID[devices[i].Library]++
 			}
 		}
 
@@ -214,7 +190,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 				case ml.DuplicateDevice:
 					// Different library, choose based on priority
 					var droppedDevice ml.DeviceInfo
-					if devices[i].Library == "CUDA" || devices[i].Library == "ROCm" {
+					if devices[i].PreferredLibrary(devices[j]) {
 						droppedDevice = devices[j]
 					} else {
 						droppedDevice = devices[i]
@@ -363,38 +339,6 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
 	return devices
 }
 
-func filterOutVulkanThatAreSupportedByOtherGPU(needsDelete []bool) {
-	// Filter out Vulkan devices that share a PCI ID with a non-Vulkan device that is not marked for deletion
-	for i := range devices {
-		if devices[i].Library != "Vulkan" || needsDelete[i] {
-			continue
-		}
-		if devices[i].PCIID == "" {
-			continue
-		}
-		for j := range devices {
-			if i == j {
-				continue
-			}
-			if devices[j].PCIID == "" {
-				continue
-			}
-			if devices[j].PCIID == devices[i].PCIID && devices[j].Library != "Vulkan" && !needsDelete[j] {
-				needsDelete[i] = true
-				slog.Debug("filtering device with duplicate PCI ID",
-					"id", devices[i].ID,
-					"library", devices[i].Library,
-					"libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
-					"pci_id", devices[i].PCIID,
-					"kept_id", devices[j].ID,
-					"kept_library", devices[j].Library,
-				)
-				break
-			}
-		}
-	}
-}
-
 func filterOverlapByLibrary(supported map[string]map[string]map[string]int, needsDelete []bool) {
 	// For multi-GPU systems, use the newest version that supports all the GPUs
 	for _, byLibDirs := range supported {
diff --git a/discover/types.go b/discover/types.go
index b1f622f4c6..efc69ecfd9 100644
--- a/discover/types.go
+++ b/discover/types.go
@@ -41,7 +41,7 @@ func LogDetails(devices []ml.DeviceInfo) {
 		}
 		slog.Info("inference compute",
 			"id", dev.ID,
-			"filtered_id", dev.FilteredID,
+			"filter_id", dev.FilterID,
 			"library", dev.Library,
 			"compute", dev.Compute(),
 			"name", dev.Name,
diff --git a/llama/patches/0026-GPU-discovery-enhancements.patch b/llama/patches/0026-GPU-discovery-enhancements.patch
index 807a468901..e5e68f3188 100644
--- a/llama/patches/0026-GPU-discovery-enhancements.patch
+++ b/llama/patches/0026-GPU-discovery-enhancements.patch
@@ -14,24 +14,24 @@ Vulkan PCI and Memory
 
 fix vulkan PCI ID and ID handling
 ---
- ggml/include/ggml-backend.h          |   8 +
+ ggml/include/ggml-backend.h          |   6 +
  ggml/src/CMakeLists.txt              |   2 +
  ggml/src/ggml-cuda/ggml-cuda.cu      |  65 ++++
  ggml/src/ggml-cuda/vendors/hip.h     |   3 +
  ggml/src/ggml-impl.h                 |   8 +
  ggml/src/ggml-metal/ggml-metal.cpp   |   2 +
- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 212 +++++++++++--
+ ggml/src/ggml-vulkan/ggml-vulkan.cpp | 209 +++++++++++--
  ggml/src/mem_hip.cpp                 | 452 +++++++++++++++++++++++++++
  ggml/src/mem_nvml.cpp                | 209 +++++++++++++
- 9 files changed, 931 insertions(+), 30 deletions(-)
+ 9 files changed, 926 insertions(+), 30 deletions(-)
  create mode 100644 ggml/src/mem_hip.cpp
  create mode 100644 ggml/src/mem_nvml.cpp
 
 diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
-index ba181d09d..809835243 100644
+index 69223c488..6510e0cba 100644
 --- a/ggml/include/ggml-backend.h
 +++ b/ggml/include/ggml-backend.h
-@@ -169,6 +169,14 @@ extern "C" {
+@@ -169,6 +169,12 @@ extern "C" {
          const char * device_id;
          // device capabilities
          struct ggml_backend_dev_caps caps;
@@ -41,8 +41,6 @@ index ba181d09d..809835243 100644
 +        int compute_minor;
 +        int integrated;
 +        const char *library;
-+        // number with which the devices are accessed (Vulkan)
-+        const char *numeric_id;
      };
  
      GGML_API const char *                  ggml_backend_dev_name(ggml_backend_dev_t device);
@@ -60,7 +58,7 @@ index 0609c6503..aefe43bdd 100644
  
  target_include_directories(ggml-base PRIVATE .)
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index 87c6c34a4..b075a18be 100644
+index 5787e8cd5..d232bf828 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
 @@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
@@ -92,7 +90,7 @@ index 87c6c34a4..b075a18be 100644
          GGML_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
                          id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
                          ggml_cuda_parse_uuid(prop, id).c_str());
-@@ -3484,6 +3499,11 @@ struct ggml_backend_cuda_device_context {
+@@ -3476,6 +3491,11 @@ struct ggml_backend_cuda_device_context {
      std::string description;
      std::string pci_bus_id;
      std::string id;
@@ -104,7 +102,7 @@ index 87c6c34a4..b075a18be 100644
  };
  
  static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
-@@ -3504,6 +3524,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
+@@ -3496,6 +3516,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
  static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
      ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
      ggml_cuda_set_device(ctx->device);
@@ -133,7 +131,7 @@ index 87c6c34a4..b075a18be 100644
      CUDA_CHECK(cudaMemGetInfo(free, total));
  }
  
-@@ -3512,6 +3554,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
+@@ -3504,6 +3546,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
      return GGML_BACKEND_DEVICE_TYPE_GPU;
  }
  
@@ -141,7 +139,7 @@ index 87c6c34a4..b075a18be 100644
  static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
      ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
  
-@@ -3525,6 +3568,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
+@@ -3517,6 +3560,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
      // If you need the memory data, call ggml_backend_dev_memory() explicitly.
      props->memory_total = props->memory_free = 0;
  
@@ -161,7 +159,7 @@ index 87c6c34a4..b075a18be 100644
      bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
  #ifdef GGML_CUDA_NO_PEER_COPY
      bool events = false;
-@@ -4087,6 +4143,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+@@ -4079,6 +4135,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
          std::lock_guard<std::mutex> lock(mutex);
          if (!initialized) {
              ggml_backend_cuda_reg_context * ctx = new ggml_backend_cuda_reg_context;
@@ -169,7 +167,7 @@ index 87c6c34a4..b075a18be 100644
  
              for (int i = 0; i < ggml_cuda_info().device_count; i++) {
                  ggml_backend_cuda_device_context * dev_ctx = new ggml_backend_cuda_device_context;
-@@ -4102,6 +4159,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+@@ -4094,6 +4151,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                  snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
                  dev_ctx->pci_bus_id = pci_bus_id;
  
@@ -225,10 +223,10 @@ index d0fb3bcca..b63edd0c1 100644
  }
  #endif
 diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp
-index f2ff9f322..f356e4a0a 100644
+index 05ff6a5a6..032dee76d 100644
 --- a/ggml/src/ggml-metal/ggml-metal.cpp
 +++ b/ggml/src/ggml-metal/ggml-metal.cpp
-@@ -535,6 +535,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
+@@ -537,6 +537,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
      GGML_UNUSED(dev);
  }
  
@@ -236,7 +234,7 @@ index f2ff9f322..f356e4a0a 100644
  static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
      props->name        = ggml_backend_metal_device_get_name(dev);
      props->description = ggml_backend_metal_device_get_description(dev);
-@@ -543,6 +544,7 @@ static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, ggml_bac
+@@ -545,6 +546,7 @@ static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, ggml_bac
  
      ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
  
@@ -245,7 +243,7 @@ index f2ff9f322..f356e4a0a 100644
          /* .async                 = */ true,
          /* .host_buffer           = */ false,
 diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index ed83236f4..0bbcecd01 100644
+index bd3ece516..7cfb14a54 100644
 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 @@ -231,6 +231,7 @@ class vk_memory_logger;
@@ -286,7 +284,7 @@ index ed83236f4..0bbcecd01 100644
  // backend interface
  
  #define UNUSED GGML_UNUSED
-@@ -12391,31 +12415,103 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
+@@ -12392,31 +12416,102 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
      ggml_vk_get_device_description(dev_idx, description, description_size);
  }
  
@@ -309,7 +307,6 @@ index ed83236f4..0bbcecd01 100644
 +    std::string pci_id;
 +    std::string id;
 +    std::string uuid;
-+    std::string numeric_id;
 +    int major;
 +    int minor;
 +    int driver_major;
@@ -407,7 +404,7 @@ index ed83236f4..0bbcecd01 100644
              break;
          }
      }
-@@ -12448,8 +12544,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
+@@ -12449,8 +12544,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
          }
      }
  
@@ -422,7 +419,7 @@ index ed83236f4..0bbcecd01 100644
      }
  
      vk::PhysicalDeviceProperties2 props = {};
-@@ -12466,19 +12567,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
+@@ -12467,19 +12567,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
  
      char pci_bus_id[16] = {};
      snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function);
@@ -456,7 +453,7 @@ index ed83236f4..0bbcecd01 100644
  
  static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
      ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
-@@ -12490,9 +12596,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
+@@ -12491,9 +12596,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
      return ctx->description.c_str();
  }
  
@@ -472,7 +469,7 @@ index ed83236f4..0bbcecd01 100644
  }
  
  static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
-@@ -12516,8 +12627,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
+@@ -12517,8 +12627,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
  
      props->name        = ggml_backend_vk_device_get_name(dev);
      props->description = ggml_backend_vk_device_get_description(dev);
@@ -483,7 +480,7 @@ index ed83236f4..0bbcecd01 100644
      ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
      props->caps = {
          /* .async                 = */ false,
-@@ -12525,6 +12637,14 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
+@@ -12526,6 +12637,13 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
          /* .buffer_from_host_ptr  = */ false,
          /* .events                = */ false,
      };
@@ -494,11 +491,10 @@ index ed83236f4..0bbcecd01 100644
 +    props->driver_minor = ctx->driver_minor;
 +    props->integrated = ctx->is_integrated_gpu;
 +    props->library = GGML_VK_NAME;
-+    props->numeric_id = ctx->numeric_id.c_str();
  }
  
  static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
-@@ -12953,6 +13073,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
+@@ -12954,6 +13072,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
          static std::mutex mutex;
          std::lock_guard<std::mutex> lock(mutex);
          if (!initialized) {
@@ -507,7 +503,7 @@ index ed83236f4..0bbcecd01 100644
              for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
                  ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
                  char desc[256];
-@@ -12961,12 +13083,42 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
+@@ -12962,12 +13082,41 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
                  ctx->name = GGML_VK_NAME + std::to_string(i);
                  ctx->description = desc;
                  ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
@@ -547,7 +543,6 @@ index ed83236f4..0bbcecd01 100644
 +                // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
 +                ctx->driver_major = 0;
 +                ctx->driver_minor = 0;
-+                ctx->numeric_id = std::to_string(i);
              }
              initialized = true;
          }
diff --git a/llama/patches/0031-Add-memory-detection-using-DXGI-PDH.patch b/llama/patches/0030-Add-memory-detection-using-DXGI-PDH.patch
similarity index 98%
rename from llama/patches/0031-Add-memory-detection-using-DXGI-PDH.patch
rename to llama/patches/0030-Add-memory-detection-using-DXGI-PDH.patch
index 8e5461cbb2..ebadc82b43 100644
--- a/llama/patches/0031-Add-memory-detection-using-DXGI-PDH.patch
+++ b/llama/patches/0030-Add-memory-detection-using-DXGI-PDH.patch
@@ -38,7 +38,7 @@ index b63edd0c1..81cad8cf3 100644
  #ifdef __cplusplus
  }
 diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index cc68e7968..27d6574da 100644
+index 7cfb14a54..a1c46d0b3 100644
 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 @@ -73,6 +73,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher();
@@ -54,10 +54,10 @@ index cc68e7968..27d6574da 100644
      std::string id;
      std::string uuid;
 +    std::string luid;
-     std::string numeric_id;
      int major;
      int minor;
-@@ -12449,8 +12451,22 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
+     int driver_major;
+@@ -12448,8 +12450,22 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
      vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
      vk::PhysicalDeviceProperties2 props2;
      vkdev.getProperties2(&props2);
@@ -81,7 +81,7 @@ index cc68e7968..27d6574da 100644
      {
          // Use vendor specific management libraries for best VRAM reporting if available
          switch (props2.properties.vendorID) {
-@@ -12478,8 +12494,8 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
+@@ -12477,8 +12493,8 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
              break;
          }
      }
@@ -91,7 +91,7 @@ index cc68e7968..27d6574da 100644
      *total = 0;
      *free = 0;
      vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props;
-@@ -13091,7 +13107,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
+@@ -13089,7 +13105,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
                      /* .reg     = */ reg,
                      /* .context = */ ctx,
                  });
@@ -99,7 +99,7 @@ index cc68e7968..27d6574da 100644
                  // Gather additional information about the device
                  int dev_idx = vk_instance.device_indices[i];
                  vk::PhysicalDeviceProperties props1;
-@@ -13114,6 +13129,14 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
+@@ -13112,6 +13127,14 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
                      }
                  }
                  ctx->uuid = oss.str();
diff --git a/llama/patches/0032-interleave-multi-rope.patch b/llama/patches/0031-interleave-multi-rope.patch
similarity index 98%
rename from llama/patches/0032-interleave-multi-rope.patch
rename to llama/patches/0031-interleave-multi-rope.patch
index eb41639e6a..6a8be51e74 100644
--- a/llama/patches/0032-interleave-multi-rope.patch
+++ b/llama/patches/0031-interleave-multi-rope.patch
@@ -1,6 +1,6 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Michael Yang <git@mxy.ng>
-Date: Web, 16 Oct 2025 20:37:19 -0700
+Date: Thu, 16 Oct 2025 20:37:19 -0700
 Subject: [PATCH] interleave multi rope
 
 since ollama doesn't use mrope for anything else, change it to mean the
@@ -85,7 +85,7 @@ index 375a0c7fd..9866c96b4 100644
              // end of mrope
  
 diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
-index 111286b49..6fc2b42f8 100644
+index 111286b49..633dc20ff 100644
 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
 +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
 @@ -31,19 +31,13 @@ void main() {
diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go
index c02926b359..5fa0a9ec0e 100644
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -730,10 +730,6 @@ func (b *Backend) BackendDevices() []ml.DeviceInfo {
 			info.PCIID = C.GoString(props.device_id)
 		}
 		info.LibraryPath = ggml.LibPaths()
-		if props.numeric_id != nil {
-			info.FilteredID = C.GoString(props.numeric_id)
-		}
-
 		C.ggml_backend_dev_memory(dev, &props.memory_free, &props.memory_total)
 		info.TotalMemory = (uint64)(props.memory_total)
 		info.FreeMemory = (uint64)(props.memory_free)
diff --git a/ml/backend/ggml/ggml/include/ggml-backend.h b/ml/backend/ggml/ggml/include/ggml-backend.h
index 1cab4bb3f2..6510e0cba1 100644
--- a/ml/backend/ggml/ggml/include/ggml-backend.h
+++ b/ml/backend/ggml/ggml/include/ggml-backend.h
@@ -175,8 +175,6 @@ extern "C" {
         int compute_minor;
         int integrated;
         const char *library;
-        // number with which the devices are accessed (Vulkan)
-        const char *numeric_id;
     };
 
     GGML_API const char *                  ggml_backend_dev_name(ggml_backend_dev_t device);
diff --git a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
index 27d6574da6..a1c46d0b3d 100644
--- a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -12435,7 +12435,6 @@ struct ggml_backend_vk_device_context {
     std::string id;
     std::string uuid;
     std::string luid;
-    std::string numeric_id;
     int major;
     int minor;
     int driver_major;
@@ -12661,7 +12660,6 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
     props->driver_minor = ctx->driver_minor;
     props->integrated = ctx->is_integrated_gpu;
     props->library = GGML_VK_NAME;
-    props->numeric_id = ctx->numeric_id.c_str();
 }
 
 static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
@@ -13142,7 +13140,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
                 // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
                 ctx->driver_major = 0;
                 ctx->driver_minor = 0;
-                ctx->numeric_id = std::to_string(i);
             }
             initialized = true;
         }
diff --git a/ml/device.go b/ml/device.go
index 70e0c6a3d7..dc91359f40 100644
--- a/ml/device.go
+++ b/ml/device.go
@@ -257,7 +257,7 @@ type DeviceInfo struct {
 
 	// FilterID is populated with the unfiltered device ID if a numeric ID is used
 	// so the device can be included.
-	FilteredID string `json:"filtered_id,omitempty"`
+	FilterID string `json:"filter_id,omitempty"`
 
 	// Integrated is set true for integrated GPUs, false for Discrete GPUs
 	Integrated bool `json:"integration,omitempty"`
@@ -455,6 +455,35 @@ func GetVisibleDevicesEnv(l []DeviceInfo) map[string]string {
 	return env
 }
 
+// NeedsInitValidation returns true if the device in question has the potential
+// to crash at inference time and requires deeper validation before we include
+// it in the supported devices list.
+func (d DeviceInfo) NeedsInitValidation() bool {
+	// At this time the only library we know needs a 2nd pass is ROCm since
+	// rocblas will crash on unsupported devices.  We want to find those crashes
+	// during bootstrap discovery so we can eliminate those GPUs before the user
+	// tries to run inference on them
+	return d.Library == "ROCm"
+}
+
+// Set the init validation environment variable
+func (d DeviceInfo) AddInitValidation(env map[string]string) {
+	env["GGML_CUDA_INIT"] = "1" // force deep initialization to trigger crash on unsupported GPUs
+}
+
+// PreferredLibrary returns true if this library is preferred over the other input
+// library
+// Used to filter out Vulkan in favor of CUDA or ROCm
+func (d DeviceInfo) PreferredLibrary(other DeviceInfo) bool {
+	// TODO in the future if we find Vulkan is better than ROCm on some devices
+	// that implementation can live here.
+
+	if d.Library == "CUDA" || d.Library == "ROCm" {
+		return true
+	}
+	return false
+}
+
 func (d DeviceInfo) updateVisibleDevicesEnv(env map[string]string) {
 	var envVar string
 	switch d.Library {
@@ -472,8 +501,8 @@ func (d DeviceInfo) updateVisibleDevicesEnv(env map[string]string) {
 	if existing {
 		v = v + ","
 	}
-	if d.FilteredID != "" {
-		v = v + d.FilteredID
+	if d.FilterID != "" {
+		v = v + d.FilterID
 	} else {
 		v = v + d.ID
 	}