mirror of
https://github.com/ollama/ollama.git
synced 2025-03-29 11:11:47 +01:00
* llama: wire up builtin runner This adds a new entrypoint into the ollama CLI to run the cgo built runner. On Mac arm64, this will have GPU support, but on all other platforms it will be the lowest common denominator CPU build. After we fully transition to the new Go runners more tech-debt can be removed and we can stop building the "default" runner via make and rely on the builtin always. * build: Make target improvements Add a few new targets and help for building locally. This also adjusts the runner lookup to favor local builds, then runners relative to the executable, and finally payloads. * Support customized CPU flags for runners This implements a simplified custom CPU flags pattern for the runners. When built without overrides, the runner name contains the vector flag we check for (AVX) to ensure we don't try to run on unsupported systems and crash. If the user builds a customized set, we omit the naming scheme and don't check for compatibility. This avoids checking requirements at runtime, so that logic has been removed as well. This can be used to build GPU runners with no vector flags, or CPU/GPU runners with additional flags (e.g. AVX512) enabled. * Use relative paths If the user checks out the repo in a path that contains spaces, make gets really confused so use relative paths for everything in-repo to avoid breakage. * Remove payloads from main binary * install: clean up prior libraries This removes support for v0.3.6 and older versions (before the tar bundle) and ensures we clean up prior libraries before extracting the bundle(s). Without this change, runners and dependent libraries could leak when we update and lead to subtle runtime errors.
223 lines
6.8 KiB
Go
223 lines
6.8 KiB
Go
package discover
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"slices"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/ollama/ollama/envconfig"
|
|
"github.com/ollama/ollama/format"
|
|
)
|
|
|
|
const (
|
|
|
|
// TODO We're lookinng for this exact name to detect iGPUs since hipGetDeviceProperties never reports integrated==true
|
|
iGPUName = "AMD Radeon(TM) Graphics"
|
|
)
|
|
|
|
var (
|
|
// Used to validate if the given ROCm lib is usable
|
|
ROCmLibGlobs = []string{"hipblas.dll", "rocblas"} // This is not sufficient to discern v5 vs v6
|
|
RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\6.1\\bin"} // TODO glob?
|
|
)
|
|
|
|
// Only called once during bootstrap
|
|
func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|
resp := []RocmGPUInfo{}
|
|
hl, err := NewHipLib()
|
|
if err != nil {
|
|
slog.Debug(err.Error())
|
|
return nil, err
|
|
}
|
|
defer hl.Release()
|
|
|
|
driverMajor, driverMinor, err := hl.AMDDriverVersion()
|
|
if err != nil {
|
|
// For now this is benign, but we may eventually need to fail compatibility checks
|
|
slog.Debug("error looking up amd driver version", "error", err)
|
|
}
|
|
|
|
// Note: the HIP library automatically handles subsetting to any *_VISIBLE_DEVICES the user specified
|
|
count := hl.HipGetDeviceCount()
|
|
if count == 0 {
|
|
err := fmt.Errorf("no compatible amdgpu devices detected")
|
|
slog.Info(err.Error())
|
|
return nil, err
|
|
}
|
|
depPaths := LibraryDirs()
|
|
libDir, err := AMDValidateLibDir()
|
|
if err != nil {
|
|
err = fmt.Errorf("unable to verify rocm library: %w", err)
|
|
slog.Warn(err.Error())
|
|
return nil, err
|
|
}
|
|
depPaths = append(depPaths, libDir)
|
|
|
|
var supported []string
|
|
gfxOverride := envconfig.HsaOverrideGfxVersion()
|
|
if gfxOverride == "" {
|
|
supported, err = GetSupportedGFX(libDir)
|
|
if err != nil {
|
|
err = fmt.Errorf("failed to lookup supported GFX types: %w", err)
|
|
slog.Warn(err.Error())
|
|
return nil, err
|
|
}
|
|
} else {
|
|
slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
|
|
}
|
|
|
|
slog.Debug("detected hip devices", "count", count)
|
|
// TODO how to determine the underlying device ID when visible devices is causing this to subset?
|
|
for i := range count {
|
|
err = hl.HipSetDevice(i)
|
|
if err != nil {
|
|
slog.Warn("set device", "id", i, "error", err)
|
|
continue
|
|
}
|
|
|
|
props, err := hl.HipGetDeviceProperties(i)
|
|
if err != nil {
|
|
slog.Warn("get properties", "id", i, "error", err)
|
|
continue
|
|
}
|
|
n := bytes.IndexByte(props.Name[:], 0)
|
|
name := string(props.Name[:n])
|
|
// TODO is UUID actually populated on windows?
|
|
// Can luid be used on windows for setting visible devices (and is it actually set?)
|
|
n = bytes.IndexByte(props.GcnArchName[:], 0)
|
|
gfx := string(props.GcnArchName[:n])
|
|
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
|
|
// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
|
// TODO Why isn't props.iGPU accurate!?
|
|
|
|
freeMemory, totalMemory, err := hl.HipMemGetInfo()
|
|
if err != nil {
|
|
slog.Warn("get mem info", "id", i, "error", err)
|
|
continue
|
|
}
|
|
|
|
gpuInfo := RocmGPUInfo{
|
|
GpuInfo: GpuInfo{
|
|
Library: "rocm",
|
|
memInfo: memInfo{
|
|
TotalMemory: totalMemory,
|
|
FreeMemory: freeMemory,
|
|
},
|
|
// Free memory reporting on Windows is not reliable until we bump to ROCm v6.2
|
|
UnreliableFreeMemory: true,
|
|
|
|
ID: strconv.Itoa(i), // TODO this is probably wrong if we specify visible devices
|
|
DependencyPath: depPaths,
|
|
MinimumMemory: rocmMinimumMemory,
|
|
Name: name,
|
|
Compute: gfx,
|
|
DriverMajor: driverMajor,
|
|
DriverMinor: driverMinor,
|
|
},
|
|
index: i,
|
|
}
|
|
|
|
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
|
|
if strings.EqualFold(name, iGPUName) || totalMemory < IGPUMemLimit {
|
|
reason := "unsupported Radeon iGPU detected skipping"
|
|
slog.Info(reason, "id", gpuInfo.ID, "total", format.HumanBytes2(totalMemory))
|
|
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
|
|
GpuInfo: gpuInfo.GpuInfo,
|
|
Reason: reason,
|
|
})
|
|
continue
|
|
}
|
|
|
|
// Strip off Target Features when comparing
|
|
if !slices.Contains[[]string, string](supported, strings.Split(gfx, ":")[0]) {
|
|
reason := fmt.Sprintf("amdgpu is not supported (supported types:%s)", supported)
|
|
slog.Warn(reason, "gpu_type", gfx, "gpu", gpuInfo.ID, "library", libDir)
|
|
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
|
|
GpuInfo: gpuInfo.GpuInfo,
|
|
Reason: reason,
|
|
})
|
|
// HSA_OVERRIDE_GFX_VERSION not supported on windows
|
|
continue
|
|
} else {
|
|
slog.Debug("amdgpu is supported", "gpu", i, "gpu_type", gfx)
|
|
}
|
|
|
|
slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
|
|
slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
|
|
|
|
resp = append(resp, gpuInfo)
|
|
}
|
|
|
|
return resp, nil
|
|
}
|
|
|
|
func AMDValidateLibDir() (string, error) {
|
|
libDir, err := commonAMDValidateLibDir()
|
|
if err == nil {
|
|
return libDir, nil
|
|
}
|
|
|
|
// Installer payload (if we're running from some other location)
|
|
localAppData := os.Getenv("LOCALAPPDATA")
|
|
appDir := filepath.Join(localAppData, "Programs", "Ollama")
|
|
rocmTargetDir := filepath.Join(appDir, envconfig.LibRelativeToExe(), "lib", "ollama")
|
|
if rocmLibUsable(rocmTargetDir) {
|
|
slog.Debug("detected ollama installed ROCm at " + rocmTargetDir)
|
|
return rocmTargetDir, nil
|
|
}
|
|
|
|
// Should not happen on windows since we include it in the installer, but stand-alone binary might hit this
|
|
slog.Warn("amdgpu detected, but no compatible rocm library found. Please install ROCm")
|
|
return "", errors.New("no suitable rocm found, falling back to CPU")
|
|
}
|
|
|
|
func (gpus RocmGPUInfoList) RefreshFreeMemory() error {
|
|
if len(gpus) == 0 {
|
|
return nil
|
|
}
|
|
hl, err := NewHipLib()
|
|
if err != nil {
|
|
slog.Debug(err.Error())
|
|
return nil
|
|
}
|
|
defer hl.Release()
|
|
|
|
for i := range gpus {
|
|
err := hl.HipSetDevice(gpus[i].index)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
freeMemory, _, err := hl.HipMemGetInfo()
|
|
if err != nil {
|
|
slog.Warn("get mem info", "id", i, "error", err)
|
|
continue
|
|
}
|
|
slog.Debug("updating rocm free memory", "gpu", gpus[i].ID, "name", gpus[i].Name, "before", format.HumanBytes2(gpus[i].FreeMemory), "now", format.HumanBytes2(freeMemory))
|
|
gpus[i].FreeMemory = freeMemory
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
|
|
ids := []string{}
|
|
for _, info := range gpuInfo {
|
|
if info.Library != "rocm" {
|
|
// TODO shouldn't happen if things are wired correctly...
|
|
slog.Debug("rocmGetVisibleDevicesEnv skipping over non-rocm device", "library", info.Library)
|
|
continue
|
|
}
|
|
ids = append(ids, info.ID)
|
|
}
|
|
// There are 3 potential env vars to use to select GPUs.
|
|
// ROCR_VISIBLE_DEVICES supports UUID or numeric but does not work on Windows
|
|
// HIP_VISIBLE_DEVICES supports numeric IDs only
|
|
// GPU_DEVICE_ORDINAL supports numeric IDs only
|
|
return "HIP_VISIBLE_DEVICES", strings.Join(ids, ",")
|
|
}
|