Token budgets (#1302)

---------

Co-authored-by: Nick Donohue <ndonohue@gmail.com>
This commit is contained in:
Chris Weaver 2024-04-04 20:43:24 -07:00 committed by GitHub
parent 7ba7224929
commit 447791b455
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 279 additions and 3 deletions

View File

@ -40,6 +40,10 @@ DEFAULT_BOOST = 0
SESSION_KEY = "session"
QUERY_EVENT_ID = "query_event_id"
LLM_CHUNKS = "llm_chunks"
TOKEN_BUDGET = "token_budget"
TOKEN_BUDGET_TIME_PERIOD = "token_budget_time_period"
ENABLE_TOKEN_BUDGET = "enable_token_budget"
TOKEN_BUDGET_SETTINGS = "token_budget_settings"
# For chunking/processing chunks
TITLE_SEPARATOR = "\n\r\n"

View File

@ -1,3 +1,4 @@
import json
from collections.abc import Callable
from datetime import datetime
from datetime import timedelta
@ -5,6 +6,7 @@ from datetime import timezone
from typing import cast
from fastapi import APIRouter
from fastapi import Body
from fastapi import Depends
from fastapi import HTTPException
from sqlalchemy.orm import Session
@ -12,8 +14,12 @@ from sqlalchemy.orm import Session
from danswer.auth.users import current_admin_user
from danswer.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ
from danswer.configs.constants import DocumentSource
from danswer.configs.constants import ENABLE_TOKEN_BUDGET
from danswer.configs.constants import GEN_AI_API_KEY_STORAGE_KEY
from danswer.configs.constants import GEN_AI_DETECTED_MODEL
from danswer.configs.constants import TOKEN_BUDGET
from danswer.configs.constants import TOKEN_BUDGET_SETTINGS
from danswer.configs.constants import TOKEN_BUDGET_TIME_PERIOD
from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER
from danswer.configs.model_configs import GEN_AI_MODEL_VERSION
from danswer.db.connector_credential_pair import get_connector_credential_pair
@ -262,3 +268,36 @@ def create_deletion_attempt_for_connector_id(
file_store = get_default_file_store(db_session)
for file_name in connector.connector_specific_config["file_locations"]:
file_store.delete_file(file_name)
@router.get("/admin/token-budget-settings")
def get_token_budget_settings(_: User = Depends(current_admin_user)) -> dict:
try:
settings_json = cast(
str, get_dynamic_config_store().load(TOKEN_BUDGET_SETTINGS)
)
settings = json.loads(settings_json)
return settings
except ConfigNotFoundError:
raise HTTPException(status_code=404, detail="Token budget settings not found.")
@router.put("/admin/token-budget-settings")
def update_token_budget_settings(
_: User = Depends(current_admin_user),
enable_token_budget: bool = Body(..., embed=True),
token_budget: int = Body(..., ge=0, embed=True), # Ensure non-negative
token_budget_time_period: int = Body(..., ge=1, embed=True), # Ensure positive
) -> dict[str, str]:
# Prepare the settings as a JSON string
settings_json = json.dumps(
{
ENABLE_TOKEN_BUDGET: enable_token_budget,
TOKEN_BUDGET: token_budget,
TOKEN_BUDGET_TIME_PERIOD: token_budget_time_period,
}
)
# Store the settings in the dynamic config store
get_dynamic_config_store().store(TOKEN_BUDGET_SETTINGS, settings_json)
return {"message": "Token budget settings updated successfully."}

View File

@ -29,6 +29,7 @@ from danswer.server.query_and_chat.models import QueryValidationResponse
from danswer.server.query_and_chat.models import SimpleQueryRequest
from danswer.server.query_and_chat.models import SourceTag
from danswer.server.query_and_chat.models import TagResponse
from danswer.server.query_and_chat.token_budget import check_token_budget
from danswer.utils.logger import setup_logger
logger = setup_logger()
@ -148,6 +149,7 @@ def stream_query_validation(
def get_answer_with_quote(
query_request: DirectQARequest,
user: User = Depends(current_user),
_: bool = Depends(check_token_budget),
) -> StreamingResponse:
query = query_request.messages[0].message
logger.info(f"Received query for one shot answer with quotes: {query}")

View File

@ -0,0 +1,69 @@
import json
from datetime import datetime
from datetime import timedelta
from typing import cast
from fastapi import HTTPException
from sqlalchemy import func
from sqlalchemy.orm import Session
from danswer.configs.constants import ENABLE_TOKEN_BUDGET
from danswer.configs.constants import TOKEN_BUDGET
from danswer.configs.constants import TOKEN_BUDGET_SETTINGS
from danswer.configs.constants import TOKEN_BUDGET_TIME_PERIOD
from danswer.db.engine import get_session_context_manager
from danswer.db.models import ChatMessage
from danswer.dynamic_configs.factory import get_dynamic_config_store
BUDGET_LIMIT_DEFAULT = -1 # Default to no limit
TIME_PERIOD_HOURS_DEFAULT = 12
def is_under_token_budget(db_session: Session) -> bool:
settings_json = cast(str, get_dynamic_config_store().load(TOKEN_BUDGET_SETTINGS))
settings = json.loads(settings_json)
is_enabled = settings.get(ENABLE_TOKEN_BUDGET, False)
if not is_enabled:
return True
budget_limit = settings.get(TOKEN_BUDGET, -1)
if budget_limit < 0:
return True
period_hours = settings.get(TOKEN_BUDGET_TIME_PERIOD, TIME_PERIOD_HOURS_DEFAULT)
period_start_time = datetime.now() - timedelta(hours=period_hours)
# Fetch the sum of all tokens used within the period
token_sum = (
db_session.query(func.sum(ChatMessage.token_count))
.filter(ChatMessage.time_sent >= period_start_time)
.scalar()
or 0
)
print(
"token_sum:",
token_sum,
"budget_limit:",
budget_limit,
"period_hours:",
period_hours,
"period_start_time:",
period_start_time,
)
return token_sum < (
budget_limit * 1000
) # Budget limit is expressed in thousands of tokens
def check_token_budget() -> None:
with get_session_context_manager() as db_session:
# Perform the token budget check here, possibly using `user` and `db_session` for database access if needed
if not is_under_token_budget(db_session):
raise HTTPException(
status_code=429, detail="Sorry, token budget exceeded. Try again later."
)

View File

@ -1,12 +1,20 @@
"use client";
import { Form, Formik } from "formik";
import { useEffect, useState } from "react";
import { LoadingAnimation } from "@/components/Loading";
import { AdminPageTitle } from "@/components/admin/Title";
import { KeyIcon, TrashIcon } from "@/components/icons/icons";
import {
BooleanFormField,
SectionHeader,
TextFormField,
} from "@/components/admin/connectors/Field";
import { Popup } from "@/components/admin/connectors/Popup";
import { TrashIcon } from "@/components/icons/icons";
import { ApiKeyForm } from "@/components/openai/ApiKeyForm";
import { GEN_AI_API_KEY_URL } from "@/components/openai/constants";
import { fetcher } from "@/lib/fetcher";
import { Text, Title } from "@tremor/react";
import { Button, Divider, Text, Title } from "@tremor/react";
import { FiCpu } from "react-icons/fi";
import useSWR, { mutate } from "swr";
@ -49,14 +57,167 @@ const ExistingKeys = () => {
);
};
const LLMOptions = () => {
const [popup, setPopup] = useState<{
message: string;
type: "success" | "error";
} | null>(null);
const [initialValues, setInitialValues] = useState({
enable_token_budget: false,
token_budget: "",
token_budget_time_period: "",
});
const fetchConfig = async () => {
const response = await fetch("/api/manage/admin/token-budget-settings");
if (response.ok) {
const config = await response.json();
// Assuming the config object directly matches the structure needed for initialValues
setInitialValues({
enable_token_budget: config.enable_token_budget || false,
token_budget: config.token_budget || "",
token_budget_time_period: config.token_budget_time_period || "",
});
} else {
// Handle error or provide fallback values
setPopup({
message: "Failed to load current LLM options.",
type: "error",
});
}
};
// Fetch current config when the component mounts
useEffect(() => {
fetchConfig();
}, []);
return (
<>
{popup && <Popup message={popup.message} type={popup.type} />}
<Formik
enableReinitialize={true}
initialValues={initialValues}
onSubmit={async (values) => {
const response = await fetch(
"/api/manage/admin/token-budget-settings",
{
method: "PUT",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(values),
}
);
if (response.ok) {
setPopup({
message: "Updated LLM Options",
type: "success",
});
await fetchConfig();
} else {
const body = await response.json();
if (body.detail) {
setPopup({ message: body.detail, type: "error" });
} else {
setPopup({
message: "Unable to update LLM options.",
type: "error",
});
}
setTimeout(() => {
setPopup(null);
}, 4000);
}
}}
>
{({ isSubmitting, values, setFieldValue, setValues }) => {
return (
<Form>
<Divider />
<>
<SectionHeader>Token Budget</SectionHeader>
<Text>
Set a maximum token use per time period. If the token budget
is exceeded, the persona will not be able to respond to
queries until the next time period.
</Text>
<br />
<BooleanFormField
name="enable_token_budget"
label="Enable Token Budget"
subtext="If enabled, the persona will be limited to the token budget specified below."
onChange={(e) => {
setFieldValue("enable_token_budget", e.target.checked);
}}
/>
{values.enable_token_budget && (
<>
<TextFormField
name="token_budget"
label="Token Budget"
subtext={
<div>
How many tokens (in thousands) can be used per time
period? If unspecified, no limit will be set.
</div>
}
onChange={(e) => {
const value = e.target.value;
// Allow only integer values
if (value === "" || /^[0-9]+$/.test(value)) {
setFieldValue("token_budget", value);
}
}}
/>
<TextFormField
name="token_budget_time_period"
label="Token Budget Time Period (hours)"
subtext={
<div>
Specify the length of the time period, in hours, over
which the token budget will be applied.
</div>
}
onChange={(e) => {
const value = e.target.value;
// Allow only integer values
if (value === "" || /^[0-9]+$/.test(value)) {
setFieldValue("token_budget_time_period", value);
}
}}
/>
</>
)}
</>
<div className="flex">
<Button
className="w-64 mx-auto"
type="submit"
disabled={isSubmitting}
>
Submit
</Button>
</div>
</Form>
);
}}
</Formik>
</>
);
};
const Page = () => {
return (
<div className="mx-auto container">
<AdminPageTitle
title="LLM Keys"
title="LLM Options"
icon={<FiCpu size={32} className="my-auto" />}
/>
<SectionHeader>LLM Keys</SectionHeader>
<ExistingKeys />
<Title className="mb-2 mt-6">Update Key</Title>
@ -72,6 +233,7 @@ const Page = () => {
}}
/>
</div>
<LLMOptions />
</div>
);
};