ollama/discover/types.go
Michael Yang dcfb7a105c
next build (#8539)
* add build to .dockerignore

* test: only build one arch

* add build to .gitignore

* fix ccache path

* filter amdgpu targets

* only filter if autodetecting

* Don't clobber gpu list for default runner

This ensures the GPU specific environment variables are set properly

* explicitly set CXX compiler for HIP

* Update build_windows.ps1

This isn't complete, but is close.  Dependencies are missing, and it only builds the "default" preset.

* build: add ollama subdir

* add .git to .dockerignore

* docs: update development.md

* update build_darwin.sh

* remove unused scripts

* llm: add cwd and build/lib/ollama to library paths

* default DYLD_LIBRARY_PATH to LD_LIBRARY_PATH in runner on macOS

* add additional cmake output vars for msvc

* interim edits to make server detection logic work with dll directories like lib/ollama/cuda_v12

* remove unncessary filepath.Dir, cleanup

* add hardware-specific directory to path

* use absolute server path

* build: linux arm

* cmake install targets

* remove unused files

* ml: visit each library path once

* build: skip cpu variants on arm

* build: install cpu targets

* build: fix workflow

* shorter names

* fix rocblas install

* docs: clean up development.md

* consistent build dir removal in development.md

* silence -Wimplicit-function-declaration build warnings in ggml-cpu

* update readme

* update development readme

* llm: update library lookup logic now that there is one runner (#8587)

* tweak development.md

* update docs

* add windows cuda/rocm tests

---------

Co-authored-by: jmorganca <jmorganca@gmail.com>
Co-authored-by: Daniel Hiltgen <daniel@ollama.com>
2025-01-29 15:03:38 -08:00

184 lines
4.9 KiB
Go

package discover
import (
"fmt"
"log/slog"
"github.com/ollama/ollama/format"
)
type memInfo struct {
TotalMemory uint64 `json:"total_memory,omitempty"`
FreeMemory uint64 `json:"free_memory,omitempty"`
FreeSwap uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
}
// Beginning of an `ollama info` command
type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
memInfo
Library string `json:"library,omitempty"`
// Optional variant to select (e.g. versions, cpu feature flags)
Variant string `json:"variant"`
// MinimumMemory represents the minimum memory required to use the GPU
MinimumMemory uint64 `json:"-"`
// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
DependencyPath []string `json:"lib_path,omitempty"`
// Extra environment variables specific to the GPU as list of [key,value]
EnvWorkarounds [][2]string `json:"envs,omitempty"`
// Set to true if we can NOT reliably discover FreeMemory. A value of true indicates
// the FreeMemory is best effort, and may over or under report actual memory usage
// False indicates FreeMemory can generally be trusted on this GPU
UnreliableFreeMemory bool
// GPU information
ID string `json:"gpu_id"` // string to use for selection of this specific GPU
Name string `json:"name"` // user friendly name if available
Compute string `json:"compute"` // Compute Capability or gfx
// Driver Information - TODO no need to put this on each GPU
DriverMajor int `json:"driver_major,omitempty"`
DriverMinor int `json:"driver_minor,omitempty"`
// TODO other performance capability info to help in scheduling decisions
}
func (gpu GpuInfo) RunnerName() string {
if gpu.Variant != "" {
return gpu.Library + "_" + gpu.Variant
}
return gpu.Library
}
type CPUInfo struct {
GpuInfo
CPUs []CPU
}
// CPU type represents a CPU Package occupying a socket
type CPU struct {
ID string `cpuinfo:"processor"`
VendorID string `cpuinfo:"vendor_id"`
ModelName string `cpuinfo:"model name"`
CoreCount int
EfficiencyCoreCount int // Performance = CoreCount - Efficiency
ThreadCount int
}
type CudaGPUInfo struct {
GpuInfo
OSOverhead uint64 // Memory overhead between the driver library and management library
index int //nolint:unused,nolintlint
computeMajor int //nolint:unused,nolintlint
computeMinor int //nolint:unused,nolintlint
}
type CudaGPUInfoList []CudaGPUInfo
type RocmGPUInfo struct {
GpuInfo
usedFilepath string //nolint:unused,nolintlint
index int //nolint:unused,nolintlint
}
type RocmGPUInfoList []RocmGPUInfo
type OneapiGPUInfo struct {
GpuInfo
driverIndex int //nolint:unused,nolintlint
gpuIndex int //nolint:unused,nolintlint
}
type OneapiGPUInfoList []OneapiGPUInfo
type GpuInfoList []GpuInfo
type UnsupportedGPUInfo struct {
GpuInfo
Reason string `json:"reason"`
}
// Split up the set of gpu info's by Library and variant
func (l GpuInfoList) ByLibrary() []GpuInfoList {
resp := []GpuInfoList{}
libs := []string{}
for _, info := range l {
found := false
requested := info.Library
if info.Variant != "" {
requested += "_" + info.Variant
}
for i, lib := range libs {
if lib == requested {
resp[i] = append(resp[i], info)
found = true
break
}
}
if !found {
libs = append(libs, requested)
resp = append(resp, []GpuInfo{info})
}
}
return resp
}
// Report the GPU information into the log an Info level
func (l GpuInfoList) LogDetails() {
for _, g := range l {
slog.Info("inference compute",
"id", g.ID,
"library", g.Library,
"variant", g.Variant,
"compute", g.Compute,
"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
"name", g.Name,
"total", format.HumanBytes2(g.TotalMemory),
"available", format.HumanBytes2(g.FreeMemory),
)
}
}
// Sort by Free Space
type ByFreeMemory []GpuInfo
func (a ByFreeMemory) Len() int { return len(a) }
func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
type SystemInfo struct {
System CPUInfo `json:"system"`
GPUs []GpuInfo `json:"gpus"`
UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
DiscoveryErrors []string `json:"discovery_errors"`
}
// Return the optimal number of threads to use for inference
func (si SystemInfo) GetOptimalThreadCount() int {
if len(si.System.CPUs) == 0 {
return 0
}
coreCount := 0
for _, c := range si.System.CPUs {
coreCount += c.CoreCount - c.EfficiencyCoreCount
}
return coreCount
}
// For each GPU, check if it does NOT support flash attention
func (l GpuInfoList) FlashAttentionSupported() bool {
for _, gpu := range l {
supportsFA := gpu.Library == "metal" ||
(gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
gpu.Library == "rocm"
if !supportsFA {
return false
}
}
return true
}