Adjust mmap logic for cuda windows for faster model load

On Windows, recent llama.cpp changes make mmap slower in most cases, so default to off. This also implements a tri-state for use_mmap so we can detect the difference between a user provided value of true/false, or unspecified.
2025-12-08 04:12:09 +01:00 · 2024-06-17 12:14:42 -07:00
parent 8ed51cac37
commit 171796791f
3 changed files with 96 additions and 15 deletions
--- a/api/types_test.go
+++ b/api/types_test.go
@@ -105,3 +105,39 @@ func TestDurationMarshalUnmarshal(t *testing.T) {
 		})
 	}
 }
+
+func TestUseMmapParsingFromJSON(t *testing.T) {
+	tests := []struct {
+		name string
+		req  string
+		exp  TriState
+	}{
+		{
+			name: "Undefined",
+			req:  `{ }`,
+			exp:  TriStateUndefined,
+		},
+		{
+			name: "True",
+			req:  `{ "use_mmap": true }`,
+			exp:  TriStateTrue,
+		},
+		{
+			name: "False",
+			req:  `{ "use_mmap": false }`,
+			exp:  TriStateFalse,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			var oMap map[string]interface{}
+			err := json.Unmarshal([]byte(test.req), &oMap)
+			require.NoError(t, err)
+			opts := DefaultOptions()
+			err = opts.FromMap(oMap)
+			require.NoError(t, err)
+			assert.Equal(t, test.exp, opts.UseMMap)
+		})
+	}
+}