From 34088dbcfb47546fc0f375276173467bc8bbed29 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Tue, 8 Jul 2025 11:59:06 -0700 Subject: [PATCH] API/CLI context enhancements (#11331) * API: expose context size of loaded models * CLI: add context UX This adds a column in the ps output to show the models context size. --- api/types.go | 15 ++++++++------- cmd/cmd.go | 5 +++-- server/routes.go | 3 +++ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/api/types.go b/api/types.go index f1e47c5928..699dba4287 100644 --- a/api/types.go +++ b/api/types.go @@ -468,13 +468,14 @@ type ListModelResponse struct { // ProcessModelResponse is a single model description in [ProcessResponse]. type ProcessModelResponse struct { - Name string `json:"name"` - Model string `json:"model"` - Size int64 `json:"size"` - Digest string `json:"digest"` - Details ModelDetails `json:"details,omitempty"` - ExpiresAt time.Time `json:"expires_at"` - SizeVRAM int64 `json:"size_vram"` + Name string `json:"name"` + Model string `json:"model"` + Size int64 `json:"size"` + Digest string `json:"digest"` + Details ModelDetails `json:"details,omitempty"` + ExpiresAt time.Time `json:"expires_at"` + SizeVRAM int64 `json:"size_vram"` + ContextLength int `json:"context_length"` } type TokenResponse struct { diff --git a/cmd/cmd.go b/cmd/cmd.go index 2d16537906..b569ddddcb 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -583,12 +583,13 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error { } else { until = format.HumanTime(m.ExpiresAt, "Never") } - data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, until}) + ctxStr := strconv.Itoa(m.ContextLength) + data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, ctxStr, until}) } } table := tablewriter.NewWriter(os.Stdout) - table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "UNTIL"}) + table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "CONTEXT", "UNTIL"}) table.SetHeaderAlignment(tablewriter.ALIGN_LEFT) table.SetAlignment(tablewriter.ALIGN_LEFT) table.SetHeaderLine(false) diff --git a/server/routes.go b/server/routes.go index cb46cef11d..603cd42a25 100644 --- a/server/routes.go +++ b/server/routes.go @@ -1404,6 +1404,9 @@ func (s *Server) PsHandler(c *gin.Context) { Details: modelDetails, ExpiresAt: v.expiresAt, } + if v.Options != nil { + mr.ContextLength = v.Options.NumCtx / v.numParallel + } // The scheduler waits to set expiresAt, so if a model is loading it's // possible that it will be set to the unix epoch. For those cases, just // calculate the time w/ the sessionDuration instead.