Revert "add truncate and shift parameters (#12519)" (#12545)

This reverts commit 6a62b894c7.
This commit is contained in:
Jeffrey Morgan
2025-10-08 17:57:57 -07:00
committed by GitHub
parent 6a62b894c7
commit 7d965258ce
8 changed files with 67 additions and 272 deletions

View File

@@ -594,58 +594,6 @@ func TestGenerateChat(t *testing.T) {
t.Errorf("final tool call mismatch (-got +want):\n%s", diff)
}
})
t.Run("status error non-streaming", func(t *testing.T) {
mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
return api.StatusError{
StatusCode: http.StatusServiceUnavailable,
Status: "Service Unavailable",
ErrorMessage: "model is overloaded",
}
}
stream := false
w := createRequest(t, s.ChatHandler, api.ChatRequest{
Model: "test",
Messages: []api.Message{
{Role: "user", Content: "Hello!"},
},
Stream: &stream,
})
if w.Code != http.StatusServiceUnavailable {
t.Errorf("expected status 503, got %d", w.Code)
}
if diff := cmp.Diff(w.Body.String(), `{"error":"model is overloaded"}`); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
t.Run("status error streaming", func(t *testing.T) {
mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
return api.StatusError{
StatusCode: http.StatusTooManyRequests,
Status: "Too Many Requests",
ErrorMessage: "rate limit exceeded",
}
}
w := createRequest(t, s.ChatHandler, api.ChatRequest{
Model: "test",
Messages: []api.Message{
{Role: "user", Content: "Hello!"},
},
})
if w.Code != http.StatusTooManyRequests {
t.Errorf("expected status 429, got %d", w.Code)
}
if diff := cmp.Diff(w.Body.String(), `{"error":"rate limit exceeded"}`); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
func TestGenerate(t *testing.T) {
@@ -1020,55 +968,6 @@ func TestGenerate(t *testing.T) {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
t.Run("status error non-streaming", func(t *testing.T) {
mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
return api.StatusError{
StatusCode: http.StatusServiceUnavailable,
Status: "Service Unavailable",
ErrorMessage: "model is overloaded",
}
}
streamRequest := false
w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
Model: "test",
Prompt: "Hello!",
Stream: &streamRequest,
})
if w.Code != http.StatusServiceUnavailable {
t.Errorf("expected status 503, got %d", w.Code)
}
if diff := cmp.Diff(w.Body.String(), `{"error":"model is overloaded"}`); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
t.Run("status error streaming", func(t *testing.T) {
mock.CompletionFn = func(ctx context.Context, r llm.CompletionRequest, fn func(r llm.CompletionResponse)) error {
return api.StatusError{
StatusCode: http.StatusTooManyRequests,
Status: "Too Many Requests",
ErrorMessage: "rate limit exceeded",
}
}
w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
Model: "test",
Prompt: "Hello!",
Stream: &stream,
})
if w.Code != http.StatusTooManyRequests {
t.Errorf("expected status 429, got %d", w.Code)
}
if diff := cmp.Diff(w.Body.String(), `{"error":"rate limit exceeded"}`); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
func TestChatWithPromptEndingInThinkTag(t *testing.T) {