mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-26 17:51:54 +01:00
Token budgets (#1302)
--------- Co-authored-by: Nick Donohue <ndonohue@gmail.com>
This commit is contained in:
parent
7ba7224929
commit
447791b455
@ -40,6 +40,10 @@ DEFAULT_BOOST = 0
|
||||
SESSION_KEY = "session"
|
||||
QUERY_EVENT_ID = "query_event_id"
|
||||
LLM_CHUNKS = "llm_chunks"
|
||||
TOKEN_BUDGET = "token_budget"
|
||||
TOKEN_BUDGET_TIME_PERIOD = "token_budget_time_period"
|
||||
ENABLE_TOKEN_BUDGET = "enable_token_budget"
|
||||
TOKEN_BUDGET_SETTINGS = "token_budget_settings"
|
||||
|
||||
# For chunking/processing chunks
|
||||
TITLE_SEPARATOR = "\n\r\n"
|
||||
|
@ -1,3 +1,4 @@
|
||||
import json
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
@ -5,6 +6,7 @@ from datetime import timezone
|
||||
from typing import cast
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Body
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy.orm import Session
|
||||
@ -12,8 +14,12 @@ from sqlalchemy.orm import Session
|
||||
from danswer.auth.users import current_admin_user
|
||||
from danswer.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.configs.constants import ENABLE_TOKEN_BUDGET
|
||||
from danswer.configs.constants import GEN_AI_API_KEY_STORAGE_KEY
|
||||
from danswer.configs.constants import GEN_AI_DETECTED_MODEL
|
||||
from danswer.configs.constants import TOKEN_BUDGET
|
||||
from danswer.configs.constants import TOKEN_BUDGET_SETTINGS
|
||||
from danswer.configs.constants import TOKEN_BUDGET_TIME_PERIOD
|
||||
from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER
|
||||
from danswer.configs.model_configs import GEN_AI_MODEL_VERSION
|
||||
from danswer.db.connector_credential_pair import get_connector_credential_pair
|
||||
@ -262,3 +268,36 @@ def create_deletion_attempt_for_connector_id(
|
||||
file_store = get_default_file_store(db_session)
|
||||
for file_name in connector.connector_specific_config["file_locations"]:
|
||||
file_store.delete_file(file_name)
|
||||
|
||||
|
||||
@router.get("/admin/token-budget-settings")
|
||||
def get_token_budget_settings(_: User = Depends(current_admin_user)) -> dict:
|
||||
try:
|
||||
settings_json = cast(
|
||||
str, get_dynamic_config_store().load(TOKEN_BUDGET_SETTINGS)
|
||||
)
|
||||
settings = json.loads(settings_json)
|
||||
return settings
|
||||
except ConfigNotFoundError:
|
||||
raise HTTPException(status_code=404, detail="Token budget settings not found.")
|
||||
|
||||
|
||||
@router.put("/admin/token-budget-settings")
|
||||
def update_token_budget_settings(
|
||||
_: User = Depends(current_admin_user),
|
||||
enable_token_budget: bool = Body(..., embed=True),
|
||||
token_budget: int = Body(..., ge=0, embed=True), # Ensure non-negative
|
||||
token_budget_time_period: int = Body(..., ge=1, embed=True), # Ensure positive
|
||||
) -> dict[str, str]:
|
||||
# Prepare the settings as a JSON string
|
||||
settings_json = json.dumps(
|
||||
{
|
||||
ENABLE_TOKEN_BUDGET: enable_token_budget,
|
||||
TOKEN_BUDGET: token_budget,
|
||||
TOKEN_BUDGET_TIME_PERIOD: token_budget_time_period,
|
||||
}
|
||||
)
|
||||
|
||||
# Store the settings in the dynamic config store
|
||||
get_dynamic_config_store().store(TOKEN_BUDGET_SETTINGS, settings_json)
|
||||
return {"message": "Token budget settings updated successfully."}
|
||||
|
@ -29,6 +29,7 @@ from danswer.server.query_and_chat.models import QueryValidationResponse
|
||||
from danswer.server.query_and_chat.models import SimpleQueryRequest
|
||||
from danswer.server.query_and_chat.models import SourceTag
|
||||
from danswer.server.query_and_chat.models import TagResponse
|
||||
from danswer.server.query_and_chat.token_budget import check_token_budget
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
@ -148,6 +149,7 @@ def stream_query_validation(
|
||||
def get_answer_with_quote(
|
||||
query_request: DirectQARequest,
|
||||
user: User = Depends(current_user),
|
||||
_: bool = Depends(check_token_budget),
|
||||
) -> StreamingResponse:
|
||||
query = query_request.messages[0].message
|
||||
logger.info(f"Received query for one shot answer with quotes: {query}")
|
||||
|
69
backend/danswer/server/query_and_chat/token_budget.py
Normal file
69
backend/danswer/server/query_and_chat/token_budget.py
Normal file
@ -0,0 +1,69 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from datetime import timedelta
|
||||
from typing import cast
|
||||
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.configs.constants import ENABLE_TOKEN_BUDGET
|
||||
from danswer.configs.constants import TOKEN_BUDGET
|
||||
from danswer.configs.constants import TOKEN_BUDGET_SETTINGS
|
||||
from danswer.configs.constants import TOKEN_BUDGET_TIME_PERIOD
|
||||
from danswer.db.engine import get_session_context_manager
|
||||
from danswer.db.models import ChatMessage
|
||||
from danswer.dynamic_configs.factory import get_dynamic_config_store
|
||||
|
||||
BUDGET_LIMIT_DEFAULT = -1 # Default to no limit
|
||||
TIME_PERIOD_HOURS_DEFAULT = 12
|
||||
|
||||
|
||||
def is_under_token_budget(db_session: Session) -> bool:
|
||||
settings_json = cast(str, get_dynamic_config_store().load(TOKEN_BUDGET_SETTINGS))
|
||||
settings = json.loads(settings_json)
|
||||
|
||||
is_enabled = settings.get(ENABLE_TOKEN_BUDGET, False)
|
||||
|
||||
if not is_enabled:
|
||||
return True
|
||||
|
||||
budget_limit = settings.get(TOKEN_BUDGET, -1)
|
||||
|
||||
if budget_limit < 0:
|
||||
return True
|
||||
|
||||
period_hours = settings.get(TOKEN_BUDGET_TIME_PERIOD, TIME_PERIOD_HOURS_DEFAULT)
|
||||
period_start_time = datetime.now() - timedelta(hours=period_hours)
|
||||
|
||||
# Fetch the sum of all tokens used within the period
|
||||
token_sum = (
|
||||
db_session.query(func.sum(ChatMessage.token_count))
|
||||
.filter(ChatMessage.time_sent >= period_start_time)
|
||||
.scalar()
|
||||
or 0
|
||||
)
|
||||
|
||||
print(
|
||||
"token_sum:",
|
||||
token_sum,
|
||||
"budget_limit:",
|
||||
budget_limit,
|
||||
"period_hours:",
|
||||
period_hours,
|
||||
"period_start_time:",
|
||||
period_start_time,
|
||||
)
|
||||
|
||||
return token_sum < (
|
||||
budget_limit * 1000
|
||||
) # Budget limit is expressed in thousands of tokens
|
||||
|
||||
|
||||
def check_token_budget() -> None:
|
||||
with get_session_context_manager() as db_session:
|
||||
# Perform the token budget check here, possibly using `user` and `db_session` for database access if needed
|
||||
if not is_under_token_budget(db_session):
|
||||
raise HTTPException(
|
||||
status_code=429, detail="Sorry, token budget exceeded. Try again later."
|
||||
)
|
@ -1,12 +1,20 @@
|
||||
"use client";
|
||||
|
||||
import { Form, Formik } from "formik";
|
||||
import { useEffect, useState } from "react";
|
||||
import { LoadingAnimation } from "@/components/Loading";
|
||||
import { AdminPageTitle } from "@/components/admin/Title";
|
||||
import { KeyIcon, TrashIcon } from "@/components/icons/icons";
|
||||
import {
|
||||
BooleanFormField,
|
||||
SectionHeader,
|
||||
TextFormField,
|
||||
} from "@/components/admin/connectors/Field";
|
||||
import { Popup } from "@/components/admin/connectors/Popup";
|
||||
import { TrashIcon } from "@/components/icons/icons";
|
||||
import { ApiKeyForm } from "@/components/openai/ApiKeyForm";
|
||||
import { GEN_AI_API_KEY_URL } from "@/components/openai/constants";
|
||||
import { fetcher } from "@/lib/fetcher";
|
||||
import { Text, Title } from "@tremor/react";
|
||||
import { Button, Divider, Text, Title } from "@tremor/react";
|
||||
import { FiCpu } from "react-icons/fi";
|
||||
import useSWR, { mutate } from "swr";
|
||||
|
||||
@ -49,14 +57,167 @@ const ExistingKeys = () => {
|
||||
);
|
||||
};
|
||||
|
||||
const LLMOptions = () => {
|
||||
const [popup, setPopup] = useState<{
|
||||
message: string;
|
||||
type: "success" | "error";
|
||||
} | null>(null);
|
||||
|
||||
const [initialValues, setInitialValues] = useState({
|
||||
enable_token_budget: false,
|
||||
token_budget: "",
|
||||
token_budget_time_period: "",
|
||||
});
|
||||
|
||||
const fetchConfig = async () => {
|
||||
const response = await fetch("/api/manage/admin/token-budget-settings");
|
||||
if (response.ok) {
|
||||
const config = await response.json();
|
||||
// Assuming the config object directly matches the structure needed for initialValues
|
||||
setInitialValues({
|
||||
enable_token_budget: config.enable_token_budget || false,
|
||||
token_budget: config.token_budget || "",
|
||||
token_budget_time_period: config.token_budget_time_period || "",
|
||||
});
|
||||
} else {
|
||||
// Handle error or provide fallback values
|
||||
setPopup({
|
||||
message: "Failed to load current LLM options.",
|
||||
type: "error",
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// Fetch current config when the component mounts
|
||||
useEffect(() => {
|
||||
fetchConfig();
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<>
|
||||
{popup && <Popup message={popup.message} type={popup.type} />}
|
||||
<Formik
|
||||
enableReinitialize={true}
|
||||
initialValues={initialValues}
|
||||
onSubmit={async (values) => {
|
||||
const response = await fetch(
|
||||
"/api/manage/admin/token-budget-settings",
|
||||
{
|
||||
method: "PUT",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify(values),
|
||||
}
|
||||
);
|
||||
if (response.ok) {
|
||||
setPopup({
|
||||
message: "Updated LLM Options",
|
||||
type: "success",
|
||||
});
|
||||
await fetchConfig();
|
||||
} else {
|
||||
const body = await response.json();
|
||||
if (body.detail) {
|
||||
setPopup({ message: body.detail, type: "error" });
|
||||
} else {
|
||||
setPopup({
|
||||
message: "Unable to update LLM options.",
|
||||
type: "error",
|
||||
});
|
||||
}
|
||||
setTimeout(() => {
|
||||
setPopup(null);
|
||||
}, 4000);
|
||||
}
|
||||
}}
|
||||
>
|
||||
{({ isSubmitting, values, setFieldValue, setValues }) => {
|
||||
return (
|
||||
<Form>
|
||||
<Divider />
|
||||
<>
|
||||
<SectionHeader>Token Budget</SectionHeader>
|
||||
<Text>
|
||||
Set a maximum token use per time period. If the token budget
|
||||
is exceeded, the persona will not be able to respond to
|
||||
queries until the next time period.
|
||||
</Text>
|
||||
<br />
|
||||
<BooleanFormField
|
||||
name="enable_token_budget"
|
||||
label="Enable Token Budget"
|
||||
subtext="If enabled, the persona will be limited to the token budget specified below."
|
||||
onChange={(e) => {
|
||||
setFieldValue("enable_token_budget", e.target.checked);
|
||||
}}
|
||||
/>
|
||||
{values.enable_token_budget && (
|
||||
<>
|
||||
<TextFormField
|
||||
name="token_budget"
|
||||
label="Token Budget"
|
||||
subtext={
|
||||
<div>
|
||||
How many tokens (in thousands) can be used per time
|
||||
period? If unspecified, no limit will be set.
|
||||
</div>
|
||||
}
|
||||
onChange={(e) => {
|
||||
const value = e.target.value;
|
||||
// Allow only integer values
|
||||
if (value === "" || /^[0-9]+$/.test(value)) {
|
||||
setFieldValue("token_budget", value);
|
||||
}
|
||||
}}
|
||||
/>
|
||||
<TextFormField
|
||||
name="token_budget_time_period"
|
||||
label="Token Budget Time Period (hours)"
|
||||
subtext={
|
||||
<div>
|
||||
Specify the length of the time period, in hours, over
|
||||
which the token budget will be applied.
|
||||
</div>
|
||||
}
|
||||
onChange={(e) => {
|
||||
const value = e.target.value;
|
||||
// Allow only integer values
|
||||
if (value === "" || /^[0-9]+$/.test(value)) {
|
||||
setFieldValue("token_budget_time_period", value);
|
||||
}
|
||||
}}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
<div className="flex">
|
||||
<Button
|
||||
className="w-64 mx-auto"
|
||||
type="submit"
|
||||
disabled={isSubmitting}
|
||||
>
|
||||
Submit
|
||||
</Button>
|
||||
</div>
|
||||
</Form>
|
||||
);
|
||||
}}
|
||||
</Formik>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
const Page = () => {
|
||||
return (
|
||||
<div className="mx-auto container">
|
||||
<AdminPageTitle
|
||||
title="LLM Keys"
|
||||
title="LLM Options"
|
||||
icon={<FiCpu size={32} className="my-auto" />}
|
||||
/>
|
||||
|
||||
<SectionHeader>LLM Keys</SectionHeader>
|
||||
|
||||
<ExistingKeys />
|
||||
|
||||
<Title className="mb-2 mt-6">Update Key</Title>
|
||||
@ -72,6 +233,7 @@ const Page = () => {
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<LLMOptions />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user