Token budgets (#1302)

---------

Co-authored-by: Nick Donohue <ndonohue@gmail.com>
This commit is contained in:
Chris Weaver 2024-04-04 20:43:24 -07:00 committed by GitHub
parent 7ba7224929
commit 447791b455
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 279 additions and 3 deletions

View File

@ -40,6 +40,10 @@ DEFAULT_BOOST = 0
SESSION_KEY = "session" SESSION_KEY = "session"
QUERY_EVENT_ID = "query_event_id" QUERY_EVENT_ID = "query_event_id"
LLM_CHUNKS = "llm_chunks" LLM_CHUNKS = "llm_chunks"
TOKEN_BUDGET = "token_budget"
TOKEN_BUDGET_TIME_PERIOD = "token_budget_time_period"
ENABLE_TOKEN_BUDGET = "enable_token_budget"
TOKEN_BUDGET_SETTINGS = "token_budget_settings"
# For chunking/processing chunks # For chunking/processing chunks
TITLE_SEPARATOR = "\n\r\n" TITLE_SEPARATOR = "\n\r\n"

View File

@ -1,3 +1,4 @@
import json
from collections.abc import Callable from collections.abc import Callable
from datetime import datetime from datetime import datetime
from datetime import timedelta from datetime import timedelta
@ -5,6 +6,7 @@ from datetime import timezone
from typing import cast from typing import cast
from fastapi import APIRouter from fastapi import APIRouter
from fastapi import Body
from fastapi import Depends from fastapi import Depends
from fastapi import HTTPException from fastapi import HTTPException
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
@ -12,8 +14,12 @@ from sqlalchemy.orm import Session
from danswer.auth.users import current_admin_user from danswer.auth.users import current_admin_user
from danswer.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ from danswer.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ
from danswer.configs.constants import DocumentSource from danswer.configs.constants import DocumentSource
from danswer.configs.constants import ENABLE_TOKEN_BUDGET
from danswer.configs.constants import GEN_AI_API_KEY_STORAGE_KEY from danswer.configs.constants import GEN_AI_API_KEY_STORAGE_KEY
from danswer.configs.constants import GEN_AI_DETECTED_MODEL from danswer.configs.constants import GEN_AI_DETECTED_MODEL
from danswer.configs.constants import TOKEN_BUDGET
from danswer.configs.constants import TOKEN_BUDGET_SETTINGS
from danswer.configs.constants import TOKEN_BUDGET_TIME_PERIOD
from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER
from danswer.configs.model_configs import GEN_AI_MODEL_VERSION from danswer.configs.model_configs import GEN_AI_MODEL_VERSION
from danswer.db.connector_credential_pair import get_connector_credential_pair from danswer.db.connector_credential_pair import get_connector_credential_pair
@ -262,3 +268,36 @@ def create_deletion_attempt_for_connector_id(
file_store = get_default_file_store(db_session) file_store = get_default_file_store(db_session)
for file_name in connector.connector_specific_config["file_locations"]: for file_name in connector.connector_specific_config["file_locations"]:
file_store.delete_file(file_name) file_store.delete_file(file_name)
@router.get("/admin/token-budget-settings")
def get_token_budget_settings(_: User = Depends(current_admin_user)) -> dict:
try:
settings_json = cast(
str, get_dynamic_config_store().load(TOKEN_BUDGET_SETTINGS)
)
settings = json.loads(settings_json)
return settings
except ConfigNotFoundError:
raise HTTPException(status_code=404, detail="Token budget settings not found.")
@router.put("/admin/token-budget-settings")
def update_token_budget_settings(
_: User = Depends(current_admin_user),
enable_token_budget: bool = Body(..., embed=True),
token_budget: int = Body(..., ge=0, embed=True), # Ensure non-negative
token_budget_time_period: int = Body(..., ge=1, embed=True), # Ensure positive
) -> dict[str, str]:
# Prepare the settings as a JSON string
settings_json = json.dumps(
{
ENABLE_TOKEN_BUDGET: enable_token_budget,
TOKEN_BUDGET: token_budget,
TOKEN_BUDGET_TIME_PERIOD: token_budget_time_period,
}
)
# Store the settings in the dynamic config store
get_dynamic_config_store().store(TOKEN_BUDGET_SETTINGS, settings_json)
return {"message": "Token budget settings updated successfully."}

View File

@ -29,6 +29,7 @@ from danswer.server.query_and_chat.models import QueryValidationResponse
from danswer.server.query_and_chat.models import SimpleQueryRequest from danswer.server.query_and_chat.models import SimpleQueryRequest
from danswer.server.query_and_chat.models import SourceTag from danswer.server.query_and_chat.models import SourceTag
from danswer.server.query_and_chat.models import TagResponse from danswer.server.query_and_chat.models import TagResponse
from danswer.server.query_and_chat.token_budget import check_token_budget
from danswer.utils.logger import setup_logger from danswer.utils.logger import setup_logger
logger = setup_logger() logger = setup_logger()
@ -148,6 +149,7 @@ def stream_query_validation(
def get_answer_with_quote( def get_answer_with_quote(
query_request: DirectQARequest, query_request: DirectQARequest,
user: User = Depends(current_user), user: User = Depends(current_user),
_: bool = Depends(check_token_budget),
) -> StreamingResponse: ) -> StreamingResponse:
query = query_request.messages[0].message query = query_request.messages[0].message
logger.info(f"Received query for one shot answer with quotes: {query}") logger.info(f"Received query for one shot answer with quotes: {query}")

View File

@ -0,0 +1,69 @@
import json
from datetime import datetime
from datetime import timedelta
from typing import cast
from fastapi import HTTPException
from sqlalchemy import func
from sqlalchemy.orm import Session
from danswer.configs.constants import ENABLE_TOKEN_BUDGET
from danswer.configs.constants import TOKEN_BUDGET
from danswer.configs.constants import TOKEN_BUDGET_SETTINGS
from danswer.configs.constants import TOKEN_BUDGET_TIME_PERIOD
from danswer.db.engine import get_session_context_manager
from danswer.db.models import ChatMessage
from danswer.dynamic_configs.factory import get_dynamic_config_store
BUDGET_LIMIT_DEFAULT = -1 # Default to no limit
TIME_PERIOD_HOURS_DEFAULT = 12
def is_under_token_budget(db_session: Session) -> bool:
settings_json = cast(str, get_dynamic_config_store().load(TOKEN_BUDGET_SETTINGS))
settings = json.loads(settings_json)
is_enabled = settings.get(ENABLE_TOKEN_BUDGET, False)
if not is_enabled:
return True
budget_limit = settings.get(TOKEN_BUDGET, -1)
if budget_limit < 0:
return True
period_hours = settings.get(TOKEN_BUDGET_TIME_PERIOD, TIME_PERIOD_HOURS_DEFAULT)
period_start_time = datetime.now() - timedelta(hours=period_hours)
# Fetch the sum of all tokens used within the period
token_sum = (
db_session.query(func.sum(ChatMessage.token_count))
.filter(ChatMessage.time_sent >= period_start_time)
.scalar()
or 0
)
print(
"token_sum:",
token_sum,
"budget_limit:",
budget_limit,
"period_hours:",
period_hours,
"period_start_time:",
period_start_time,
)
return token_sum < (
budget_limit * 1000
) # Budget limit is expressed in thousands of tokens
def check_token_budget() -> None:
with get_session_context_manager() as db_session:
# Perform the token budget check here, possibly using `user` and `db_session` for database access if needed
if not is_under_token_budget(db_session):
raise HTTPException(
status_code=429, detail="Sorry, token budget exceeded. Try again later."
)

View File

@ -1,12 +1,20 @@
"use client"; "use client";
import { Form, Formik } from "formik";
import { useEffect, useState } from "react";
import { LoadingAnimation } from "@/components/Loading"; import { LoadingAnimation } from "@/components/Loading";
import { AdminPageTitle } from "@/components/admin/Title"; import { AdminPageTitle } from "@/components/admin/Title";
import { KeyIcon, TrashIcon } from "@/components/icons/icons"; import {
BooleanFormField,
SectionHeader,
TextFormField,
} from "@/components/admin/connectors/Field";
import { Popup } from "@/components/admin/connectors/Popup";
import { TrashIcon } from "@/components/icons/icons";
import { ApiKeyForm } from "@/components/openai/ApiKeyForm"; import { ApiKeyForm } from "@/components/openai/ApiKeyForm";
import { GEN_AI_API_KEY_URL } from "@/components/openai/constants"; import { GEN_AI_API_KEY_URL } from "@/components/openai/constants";
import { fetcher } from "@/lib/fetcher"; import { fetcher } from "@/lib/fetcher";
import { Text, Title } from "@tremor/react"; import { Button, Divider, Text, Title } from "@tremor/react";
import { FiCpu } from "react-icons/fi"; import { FiCpu } from "react-icons/fi";
import useSWR, { mutate } from "swr"; import useSWR, { mutate } from "swr";
@ -49,14 +57,167 @@ const ExistingKeys = () => {
); );
}; };
const LLMOptions = () => {
const [popup, setPopup] = useState<{
message: string;
type: "success" | "error";
} | null>(null);
const [initialValues, setInitialValues] = useState({
enable_token_budget: false,
token_budget: "",
token_budget_time_period: "",
});
const fetchConfig = async () => {
const response = await fetch("/api/manage/admin/token-budget-settings");
if (response.ok) {
const config = await response.json();
// Assuming the config object directly matches the structure needed for initialValues
setInitialValues({
enable_token_budget: config.enable_token_budget || false,
token_budget: config.token_budget || "",
token_budget_time_period: config.token_budget_time_period || "",
});
} else {
// Handle error or provide fallback values
setPopup({
message: "Failed to load current LLM options.",
type: "error",
});
}
};
// Fetch current config when the component mounts
useEffect(() => {
fetchConfig();
}, []);
return (
<>
{popup && <Popup message={popup.message} type={popup.type} />}
<Formik
enableReinitialize={true}
initialValues={initialValues}
onSubmit={async (values) => {
const response = await fetch(
"/api/manage/admin/token-budget-settings",
{
method: "PUT",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(values),
}
);
if (response.ok) {
setPopup({
message: "Updated LLM Options",
type: "success",
});
await fetchConfig();
} else {
const body = await response.json();
if (body.detail) {
setPopup({ message: body.detail, type: "error" });
} else {
setPopup({
message: "Unable to update LLM options.",
type: "error",
});
}
setTimeout(() => {
setPopup(null);
}, 4000);
}
}}
>
{({ isSubmitting, values, setFieldValue, setValues }) => {
return (
<Form>
<Divider />
<>
<SectionHeader>Token Budget</SectionHeader>
<Text>
Set a maximum token use per time period. If the token budget
is exceeded, the persona will not be able to respond to
queries until the next time period.
</Text>
<br />
<BooleanFormField
name="enable_token_budget"
label="Enable Token Budget"
subtext="If enabled, the persona will be limited to the token budget specified below."
onChange={(e) => {
setFieldValue("enable_token_budget", e.target.checked);
}}
/>
{values.enable_token_budget && (
<>
<TextFormField
name="token_budget"
label="Token Budget"
subtext={
<div>
How many tokens (in thousands) can be used per time
period? If unspecified, no limit will be set.
</div>
}
onChange={(e) => {
const value = e.target.value;
// Allow only integer values
if (value === "" || /^[0-9]+$/.test(value)) {
setFieldValue("token_budget", value);
}
}}
/>
<TextFormField
name="token_budget_time_period"
label="Token Budget Time Period (hours)"
subtext={
<div>
Specify the length of the time period, in hours, over
which the token budget will be applied.
</div>
}
onChange={(e) => {
const value = e.target.value;
// Allow only integer values
if (value === "" || /^[0-9]+$/.test(value)) {
setFieldValue("token_budget_time_period", value);
}
}}
/>
</>
)}
</>
<div className="flex">
<Button
className="w-64 mx-auto"
type="submit"
disabled={isSubmitting}
>
Submit
</Button>
</div>
</Form>
);
}}
</Formik>
</>
);
};
const Page = () => { const Page = () => {
return ( return (
<div className="mx-auto container"> <div className="mx-auto container">
<AdminPageTitle <AdminPageTitle
title="LLM Keys" title="LLM Options"
icon={<FiCpu size={32} className="my-auto" />} icon={<FiCpu size={32} className="my-auto" />}
/> />
<SectionHeader>LLM Keys</SectionHeader>
<ExistingKeys /> <ExistingKeys />
<Title className="mb-2 mt-6">Update Key</Title> <Title className="mb-2 mt-6">Update Key</Title>
@ -72,6 +233,7 @@ const Page = () => {
}} }}
/> />
</div> </div>
<LLMOptions />
</div> </div>
); );
}; };