ollama/discover/types.go

package discover

import (
	"fmt"
	"log/slog"

	"github.com/ollama/ollama/format"
)

type memInfo struct {
	TotalMemory uint64 `json:"total_memory,omitempty"`
	FreeMemory  uint64 `json:"free_memory,omitempty"`
	FreeSwap    uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
}

// Beginning of an `ollama info` command
type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
	memInfo
	Library string `json:"library,omitempty"`

	// Optional variant to select (e.g. versions, cpu feature flags)
	Variant string `json:"variant"`

	// MinimumMemory represents the minimum memory required to use the GPU
	MinimumMemory uint64 `json:"-"`

	// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
	DependencyPath []string `json:"lib_path,omitempty"`

	// Extra environment variables specific to the GPU as list of [key,value]
	EnvWorkarounds [][2]string `json:"envs,omitempty"`

	// Set to true if we can NOT reliably discover FreeMemory.  A value of true indicates
	// the FreeMemory is best effort, and may over or under report actual memory usage
	// False indicates FreeMemory can generally be trusted on this GPU
	UnreliableFreeMemory bool

	// GPU information
	ID      string `json:"gpu_id"`  // string to use for selection of this specific GPU
	Name    string `json:"name"`    // user friendly name if available
	Compute string `json:"compute"` // Compute Capability or gfx

	// Driver Information - TODO no need to put this on each GPU
	DriverMajor int `json:"driver_major,omitempty"`
	DriverMinor int `json:"driver_minor,omitempty"`

	// TODO other performance capability info to help in scheduling decisions
}

func (gpu GpuInfo) RunnerName() string {
	if gpu.Variant != "" {
		return gpu.Library + "_" + gpu.Variant
	}
	return gpu.Library
}

type CPUInfo struct {
	GpuInfo
	CPUs []CPU
}

// CPU type represents a CPU Package occupying a socket
type CPU struct {
	ID                  string `cpuinfo:"processor"`
	VendorID            string `cpuinfo:"vendor_id"`
	ModelName           string `cpuinfo:"model name"`
	CoreCount           int
	EfficiencyCoreCount int // Performance = CoreCount - Efficiency
	ThreadCount         int
}

type CudaGPUInfo struct {
	GpuInfo
	OSOverhead   uint64 // Memory overhead between the driver library and management library
	index        int    //nolint:unused,nolintlint
	computeMajor int    //nolint:unused,nolintlint
	computeMinor int    //nolint:unused,nolintlint
}
type CudaGPUInfoList []CudaGPUInfo

type RocmGPUInfo struct {
	GpuInfo
	usedFilepath string //nolint:unused,nolintlint
	index        int    //nolint:unused,nolintlint
}
type RocmGPUInfoList []RocmGPUInfo

type OneapiGPUInfo struct {
	GpuInfo
	driverIndex int //nolint:unused,nolintlint
	gpuIndex    int //nolint:unused,nolintlint
}
type OneapiGPUInfoList []OneapiGPUInfo

type GpuInfoList []GpuInfo

type UnsupportedGPUInfo struct {
	GpuInfo
	Reason string `json:"reason"`
}

// Split up the set of gpu info's by Library and variant
func (l GpuInfoList) ByLibrary() []GpuInfoList {
	resp := []GpuInfoList{}
	libs := []string{}
	for _, info := range l {
		found := false
		requested := info.Library
		if info.Variant != "" {
			requested += "_" + info.Variant
		}
		for i, lib := range libs {
			if lib == requested {
				resp[i] = append(resp[i], info)
				found = true
				break
			}
		}
		if !found {
			libs = append(libs, requested)
			resp = append(resp, []GpuInfo{info})
		}
	}
	return resp
}

// Report the GPU information into the log an Info level
func (l GpuInfoList) LogDetails() {
	for _, g := range l {
		slog.Info("inference compute",
			"id", g.ID,
			"library", g.Library,
			"variant", g.Variant,
			"compute", g.Compute,
			"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
			"name", g.Name,
			"total", format.HumanBytes2(g.TotalMemory),
			"available", format.HumanBytes2(g.FreeMemory),
		)
	}
}

// Sort by Free Space
type ByFreeMemory []GpuInfo

func (a ByFreeMemory) Len() int           { return len(a) }
func (a ByFreeMemory) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }

type SystemInfo struct {
	System          CPUInfo              `json:"system"`
	GPUs            []GpuInfo            `json:"gpus"`
	UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
	DiscoveryErrors []string             `json:"discovery_errors"`
}

// Return the optimal number of threads to use for inference
func (si SystemInfo) GetOptimalThreadCount() int {
	if len(si.System.CPUs) == 0 {
		return 0
	}

	coreCount := 0
	for _, c := range si.System.CPUs {
		coreCount += c.CoreCount - c.EfficiencyCoreCount
	}

	return coreCount
}

// For each GPU, check if it does NOT support flash attention
func (l GpuInfoList) FlashAttentionSupported() bool {
	for _, gpu := range l {
		supportsFA := gpu.Library == "metal" ||
			(gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
			gpu.Library == "rocm"

		if !supportsFA {
			return false
		}
	}
	return true
}
Rename gpu package discover (#7143) Cleaning up go package naming 2024-10-16 17:45:00 -07:00			`package discover`
Adapted rocm support to cgo based llama.cpp 2023-11-29 11:00:37 -08:00
Record more GPU information This cleans up the logging for GPU discovery a bit, and can serve as a foundation to report GPU information in a future UX. 2024-05-07 14:54:26 -07:00			`import (`
			`"fmt"`
			`"log/slog"`

			`"github.com/ollama/ollama/format"`
			`)`

Fix windows system memory lookup This refines the gpu package error handling and fixes a bug with the system memory lookup on windows. 2023-12-22 15:43:31 -08:00			`type memInfo struct {`
Adapted rocm support to cgo based llama.cpp 2023-11-29 11:00:37 -08:00			TotalMemory uint64 `json:"total_memory,omitempty"`
			FreeMemory uint64 `json:"free_memory,omitempty"`
Discovery CPU details for default thread selection (#6264) On windows, detect large multi-socket systems and reduce to the number of cores in one socket for best performance 2024-10-15 11:36:08 -07:00			FreeSwap uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
Fix windows system memory lookup This refines the gpu package error handling and fixes a bug with the system memory lookup on windows. 2023-12-22 15:43:31 -08:00			`}`

			// Beginning of an `ollama info` command
Discovery CPU details for default thread selection (#6264) On windows, detect large multi-socket systems and reduce to the number of cores in one socket for best performance 2024-10-15 11:36:08 -07:00			`type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?`
Fix windows system memory lookup This refines the gpu package error handling and fixes a bug with the system memory lookup on windows. 2023-12-22 15:43:31 -08:00			`memInfo`
			Library string `json:"library,omitempty"`
Adapted rocm support to cgo based llama.cpp 2023-11-29 11:00:37 -08:00
Support multiple variants for a given llm lib type In some cases we may want multiple variants for a given GPU type or CPU. This adds logic to have an optional Variant which we can use to select an optimal library, but also allows us to try multiple variants in case some fail to load. This can be useful for scenarios such as ROCm v5 vs v6 incompatibility or potentially CPU features. 2024-01-05 12:13:08 -08:00			`// Optional variant to select (e.g. versions, cpu feature flags)`
Add Jetson cuda variants for arm This adds new variants for arm64 specific to Jetson platforms 2024-05-30 21:54:07 -07:00			Variant string `json:"variant"`
Support multiple variants for a given llm lib type In some cases we may want multiple variants for a given GPU type or CPU. This adds logic to have an optional Variant which we can use to select an optimal library, but also allows us to try multiple variants in case some fail to load. This can be useful for scenarios such as ROCm v5 vs v6 incompatibility or potentially CPU features. 2024-01-05 12:13:08 -08:00
update memory calcualtions count each layer independently when deciding gpu offloading 2024-03-18 10:45:22 +01:00			`// MinimumMemory represents the minimum memory required to use the GPU`
partial offloading 2024-04-05 14:50:38 -07:00			MinimumMemory uint64 `json:"-"`
update memory calcualtions count each layer independently when deciding gpu offloading 2024-03-18 10:45:22 +01:00
Request and model concurrency This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS. 2024-03-30 09:50:05 -07:00			`// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly`
Jetpack support for Go server (#7217) This adds support for the Jetson JetPack variants into the Go runner 2024-11-12 10:31:52 -08:00			DependencyPath []string `json:"lib_path,omitempty"`
Request and model concurrency This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS. 2024-03-30 09:50:05 -07:00
Workaround gfx900 SDMA bugs Implement support for GPU env var workarounds, and leverage this for the Vega RX 56 which needs HSA_ENABLE_SDMA=0 set to work properly 2024-05-31 16:15:21 -07:00			`// Extra environment variables specific to the GPU as list of [key,value]`
			EnvWorkarounds [][2]string `json:"envs,omitempty"`

Disable concurrency for AMD + Windows Until ROCm v6.2 ships, we wont be able to get accurate free memory reporting on windows, which makes automatic concurrency too risky. Users can still opt-in but will need to pay attention to model sizes otherwise they may thrash/page VRAM or cause OOM crashes. All other platforms and GPUs have accurate VRAM reporting wired up now, so we can turn on concurrency by default. 2024-06-19 13:35:38 -07:00			`// Set to true if we can NOT reliably discover FreeMemory. A value of true indicates`
			`// the FreeMemory is best effort, and may over or under report actual memory usage`
			`// False indicates FreeMemory can generally be trusted on this GPU`
			`UnreliableFreeMemory bool`

Request and model concurrency This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS. 2024-03-30 09:50:05 -07:00			`// GPU information`
Record more GPU information This cleans up the logging for GPU discovery a bit, and can serve as a foundation to report GPU information in a future UX. 2024-05-07 14:54:26 -07:00			ID string `json:"gpu_id"` // string to use for selection of this specific GPU
			Name string `json:"name"` // user friendly name if available
			Compute string `json:"compute"` // Compute Capability or gfx

			`// Driver Information - TODO no need to put this on each GPU`
			DriverMajor int `json:"driver_major,omitempty"`
			DriverMinor int `json:"driver_minor,omitempty"`
Request and model concurrency This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS. 2024-03-30 09:50:05 -07:00
			`// TODO other performance capability info to help in scheduling decisions`
Adapted rocm support to cgo based llama.cpp 2023-11-29 11:00:37 -08:00			`}`
Detect AMD GPU info via sysfs and block old cards This wires up some new logic to start using sysfs to discover AMD GPU information and detects old cards we can't yet support so we can fallback to CPU mode. 2024-02-11 14:50:06 -08:00
build: Make target improvements (#7499) * llama: wire up builtin runner This adds a new entrypoint into the ollama CLI to run the cgo built runner. On Mac arm64, this will have GPU support, but on all other platforms it will be the lowest common denominator CPU build. After we fully transition to the new Go runners more tech-debt can be removed and we can stop building the "default" runner via make and rely on the builtin always. * build: Make target improvements Add a few new targets and help for building locally. This also adjusts the runner lookup to favor local builds, then runners relative to the executable, and finally payloads. * Support customized CPU flags for runners This implements a simplified custom CPU flags pattern for the runners. When built without overrides, the runner name contains the vector flag we check for (AVX) to ensure we don't try to run on unsupported systems and crash. If the user builds a customized set, we omit the naming scheme and don't check for compatibility. This avoids checking requirements at runtime, so that logic has been removed as well. This can be used to build GPU runners with no vector flags, or CPU/GPU runners with additional flags (e.g. AVX512) enabled. * Use relative paths If the user checks out the repo in a path that contains spaces, make gets really confused so use relative paths for everything in-repo to avoid breakage. * Remove payloads from main binary * install: clean up prior libraries This removes support for v0.3.6 and older versions (before the tar bundle) and ensures we clean up prior libraries before extracting the bundle(s). Without this change, runners and dependent libraries could leak when we update and lead to subtle runtime errors. 2024-12-10 09:47:19 -08:00			`func (gpu GpuInfo) RunnerName() string {`
			`if gpu.Variant != "" {`
			`return gpu.Library + "_" + gpu.Variant`
			`}`
			`return gpu.Library`
			`}`

Refine GPU discovery to bootstrap once Now that we call the GPU discovery routines many times to update memory, this splits initial discovery from free memory updating. 2024-05-15 15:13:16 -07:00			`type CPUInfo struct {`
			`GpuInfo`
Discovery CPU details for default thread selection (#6264) On windows, detect large multi-socket systems and reduce to the number of cores in one socket for best performance 2024-10-15 11:36:08 -07:00			`CPUs []CPU`
			`}`

			`// CPU type represents a CPU Package occupying a socket`
			`type CPU struct {`
			ID string `cpuinfo:"processor"`
			VendorID string `cpuinfo:"vendor_id"`
			ModelName string `cpuinfo:"model name"`
			`CoreCount int`
			`EfficiencyCoreCount int // Performance = CoreCount - Efficiency`
			`ThreadCount int`
Refine GPU discovery to bootstrap once Now that we call the GPU discovery routines many times to update memory, this splits initial discovery from free memory updating. 2024-05-15 15:13:16 -07:00			`}`

			`type CudaGPUInfo struct {`
			`GpuInfo`
Add cuda v12 variant and selection logic Based on compute capability and driver version, pick v12 or v11 cuda variants. 2024-06-13 20:46:14 -07:00			`OSOverhead uint64 // Memory overhead between the driver library and management library`
			`index int //nolint:unused,nolintlint`
			`computeMajor int //nolint:unused,nolintlint`
			`computeMinor int //nolint:unused,nolintlint`
Refine GPU discovery to bootstrap once Now that we call the GPU discovery routines many times to update memory, this splits initial discovery from free memory updating. 2024-05-15 15:13:16 -07:00			`}`
			`type CudaGPUInfoList []CudaGPUInfo`

			`type RocmGPUInfo struct {`
			`GpuInfo`
review comments and coverage 2024-06-05 12:07:20 -07:00			`usedFilepath string //nolint:unused,nolintlint`
			`index int //nolint:unused,nolintlint`
Refine GPU discovery to bootstrap once Now that we call the GPU discovery routines many times to update memory, this splits initial discovery from free memory updating. 2024-05-15 15:13:16 -07:00			`}`
			`type RocmGPUInfoList []RocmGPUInfo`

			`type OneapiGPUInfo struct {`
			`GpuInfo`
review comments and coverage 2024-06-05 12:07:20 -07:00			`driverIndex int //nolint:unused,nolintlint`
			`gpuIndex int //nolint:unused,nolintlint`
Refine GPU discovery to bootstrap once Now that we call the GPU discovery routines many times to update memory, this splits initial discovery from free memory updating. 2024-05-15 15:13:16 -07:00			`}`
			`type OneapiGPUInfoList []OneapiGPUInfo`

Request and model concurrency This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS. 2024-03-30 09:50:05 -07:00			`type GpuInfoList []GpuInfo`

Track GPU discovery failure information (#5820) * Expose GPU discovery failure information * Remove exposed API for now 2024-10-14 16:26:45 -07:00			`type UnsupportedGPUInfo struct {`
			`GpuInfo`
			Reason string `json:"reason"`
			`}`

Request and model concurrency This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS. 2024-03-30 09:50:05 -07:00			`// Split up the set of gpu info's by Library and variant`
			`func (l GpuInfoList) ByLibrary() []GpuInfoList {`
			`resp := []GpuInfoList{}`
			`libs := []string{}`
			`for _, info := range l {`
			`found := false`
			`requested := info.Library`
next build (#8539) * add build to .dockerignore * test: only build one arch * add build to .gitignore * fix ccache path * filter amdgpu targets * only filter if autodetecting * Don't clobber gpu list for default runner This ensures the GPU specific environment variables are set properly * explicitly set CXX compiler for HIP * Update build_windows.ps1 This isn't complete, but is close. Dependencies are missing, and it only builds the "default" preset. * build: add ollama subdir * add .git to .dockerignore * docs: update development.md * update build_darwin.sh * remove unused scripts * llm: add cwd and build/lib/ollama to library paths * default DYLD_LIBRARY_PATH to LD_LIBRARY_PATH in runner on macOS * add additional cmake output vars for msvc * interim edits to make server detection logic work with dll directories like lib/ollama/cuda_v12 * remove unncessary filepath.Dir, cleanup * add hardware-specific directory to path * use absolute server path * build: linux arm * cmake install targets * remove unused files * ml: visit each library path once * build: skip cpu variants on arm * build: install cpu targets * build: fix workflow * shorter names * fix rocblas install * docs: clean up development.md * consistent build dir removal in development.md * silence -Wimplicit-function-declaration build warnings in ggml-cpu * update readme * update development readme * llm: update library lookup logic now that there is one runner (#8587) * tweak development.md * update docs * add windows cuda/rocm tests --------- Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Daniel Hiltgen <daniel@ollama.com> 2025-01-29 15:03:38 -08:00			`if info.Variant != "" {`
Add Jetson cuda variants for arm This adds new variants for arm64 specific to Jetson platforms 2024-05-30 21:54:07 -07:00			`requested += "_" + info.Variant`
Request and model concurrency This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS. 2024-03-30 09:50:05 -07:00			`}`
			`for i, lib := range libs {`
			`if lib == requested {`
			`resp[i] = append(resp[i], info)`
			`found = true`
			`break`
			`}`
			`}`
			`if !found {`
gpu: Group GPU Library sets by variant (#6483) The recent cuda variant changes uncovered a bug in ByLibrary which failed to group by common variant for GPU types. 2024-08-23 15:11:56 -07:00			`libs = append(libs, requested)`
Request and model concurrency This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS. 2024-03-30 09:50:05 -07:00			`resp = append(resp, []GpuInfo{info})`
			`}`
			`}`
			`return resp`
Detect AMD GPU info via sysfs and block old cards This wires up some new logic to start using sysfs to discover AMD GPU information and detects old cards we can't yet support so we can fallback to CPU mode. 2024-02-11 14:50:06 -08:00			`}`
Request and model concurrency This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS. 2024-03-30 09:50:05 -07:00
Record more GPU information This cleans up the logging for GPU discovery a bit, and can serve as a foundation to report GPU information in a future UX. 2024-05-07 14:54:26 -07:00			`// Report the GPU information into the log an Info level`
			`func (l GpuInfoList) LogDetails() {`
			`for _, g := range l {`
			`slog.Info("inference compute",`
			`"id", g.ID,`
			`"library", g.Library,`
Report GPU variant in log 2024-06-19 09:36:30 -07:00			`"variant", g.Variant,`
Record more GPU information This cleans up the logging for GPU discovery a bit, and can serve as a foundation to report GPU information in a future UX. 2024-05-07 14:54:26 -07:00			`"compute", g.Compute,`
			`"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),`
			`"name", g.Name,`
			`"total", format.HumanBytes2(g.TotalMemory),`
			`"available", format.HumanBytes2(g.FreeMemory),`
			`)`
			`}`
			`}`

Request and model concurrency This change adds support for multiple concurrent requests, as well as loading multiple models by spawning multiple runners. The default settings are currently set at 1 concurrent request per model and only 1 loaded model at a time, but these can be adjusted by setting OLLAMA_NUM_PARALLEL and OLLAMA_MAX_LOADED_MODELS. 2024-03-30 09:50:05 -07:00			`// Sort by Free Space`
			`type ByFreeMemory []GpuInfo`

			`func (a ByFreeMemory) Len() int { return len(a) }`
			`func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }`
			`func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }`
Refine GPU discovery to bootstrap once Now that we call the GPU discovery routines many times to update memory, this splits initial discovery from free memory updating. 2024-05-15 15:13:16 -07:00
Track GPU discovery failure information (#5820) * Expose GPU discovery failure information * Remove exposed API for now 2024-10-14 16:26:45 -07:00			`type SystemInfo struct {`
			System CPUInfo `json:"system"`
			GPUs []GpuInfo `json:"gpus"`
			UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
			DiscoveryErrors []string `json:"discovery_errors"`
			`}`
Discovery CPU details for default thread selection (#6264) On windows, detect large multi-socket systems and reduce to the number of cores in one socket for best performance 2024-10-15 11:36:08 -07:00
			`// Return the optimal number of threads to use for inference`
			`func (si SystemInfo) GetOptimalThreadCount() int {`
			`if len(si.System.CPUs) == 0 {`
			`return 0`
			`}`
Refine default thread selection for NUMA systems (#7322) Until we have full NUMA support, this adjusts the default thread selection algorithm to count up the number of performance cores across all sockets. 2024-10-30 15:05:45 -07:00
			`coreCount := 0`
			`for _, c := range si.System.CPUs {`
			`coreCount += c.CoreCount - c.EfficiencyCoreCount`
			`}`

			`return coreCount`
Discovery CPU details for default thread selection (#6264) On windows, detect large multi-socket systems and reduce to the number of cores in one socket for best performance 2024-10-15 11:36:08 -07:00			`}`
llm: introduce k/v context quantization (vRAM improvements) (#6279) 2024-12-04 10:57:19 +11:00
			`// For each GPU, check if it does NOT support flash attention`
			`func (l GpuInfoList) FlashAttentionSupported() bool {`
			`for _, gpu := range l {`
			`supportsFA := gpu.Library == "metal" \|\|`
			`(gpu.Library == "cuda" && gpu.DriverMajor >= 7) \|\|`
			`gpu.Library == "rocm"`

			`if !supportsFA {`
			`return false`
			`}`
			`}`
			`return true`
			`}`