mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-28 13:53:28 +02:00
use max_tokens to do better rate limit handling (#4224)
* use max_tokens to do better rate limit handling * fix unti tests * address greptile comment, thanks greptile
This commit is contained in:
@@ -25,7 +25,7 @@ from onyx.indexing.models import IndexingSetting
|
||||
from onyx.setup import setup_postgres
|
||||
from onyx.setup import setup_vespa
|
||||
from onyx.utils.logger import setup_logger
|
||||
from tests.integration.common_utils.timeout import run_with_timeout
|
||||
from tests.integration.common_utils.timeout import run_with_timeout_multiproc
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
@@ -161,7 +161,7 @@ def reset_postgres(
|
||||
for _ in range(NUM_TRIES):
|
||||
logger.info(f"Downgrading Postgres... ({_ + 1}/{NUM_TRIES})")
|
||||
try:
|
||||
run_with_timeout(
|
||||
run_with_timeout_multiproc(
|
||||
downgrade_postgres,
|
||||
TIMEOUT,
|
||||
kwargs={
|
||||
|
@@ -6,7 +6,9 @@ from typing import TypeVar
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def run_with_timeout(task: Callable[..., T], timeout: int, kwargs: dict[str, Any]) -> T:
|
||||
def run_with_timeout_multiproc(
|
||||
task: Callable[..., T], timeout: int, kwargs: dict[str, Any]
|
||||
) -> T:
|
||||
# Use multiprocessing to prevent a thread from blocking the main thread
|
||||
with multiprocessing.Pool(processes=1) as pool:
|
||||
async_result = pool.apply_async(task, kwds=kwargs)
|
||||
|
@@ -145,6 +145,7 @@ def test_multiple_tool_calls(default_multi_llm: DefaultMultiLLM) -> None:
|
||||
timeout=30,
|
||||
parallel_tool_calls=False,
|
||||
mock_response=MOCK_LLM_RESPONSE,
|
||||
max_tokens=None,
|
||||
)
|
||||
|
||||
|
||||
@@ -290,4 +291,5 @@ def test_multiple_tool_calls_streaming(default_multi_llm: DefaultMultiLLM) -> No
|
||||
timeout=30,
|
||||
parallel_tool_calls=False,
|
||||
mock_response=MOCK_LLM_RESPONSE,
|
||||
max_tokens=None,
|
||||
)
|
||||
|
Reference in New Issue
Block a user