mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-26 16:01:09 +02:00
prompt piece optimizations
This commit is contained in:
parent
eaffdee0dc
commit
a96728ff4d
@ -60,17 +60,19 @@ def build_sub_question_answer_prompt(
|
|||||||
|
|
||||||
|
|
||||||
def trim_prompt_piece(config: LLMConfig, prompt_piece: str, reserved_str: str) -> str:
|
def trim_prompt_piece(config: LLMConfig, prompt_piece: str, reserved_str: str) -> str:
|
||||||
# TODO: this truncating might add latency. We could do a rougher + faster check
|
# TODO: save the max input tokens in LLMConfig
|
||||||
# first to determine whether truncation is needed
|
max_tokens = get_max_input_tokens(
|
||||||
|
model_provider=config.model_provider,
|
||||||
# TODO: maybe save the tokenizer and max input tokens if this is getting called multiple times?
|
|
||||||
llm_tokenizer = get_tokenizer(
|
|
||||||
provider_type=config.model_provider,
|
|
||||||
model_name=config.model_name,
|
model_name=config.model_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
max_tokens = get_max_input_tokens(
|
# no need to trim if a conservative estimate of one token
|
||||||
model_provider=config.model_provider,
|
# per character is already less than the max tokens
|
||||||
|
if len(prompt_piece) + len(reserved_str) < max_tokens:
|
||||||
|
return prompt_piece
|
||||||
|
|
||||||
|
llm_tokenizer = get_tokenizer(
|
||||||
|
provider_type=config.model_provider,
|
||||||
model_name=config.model_name,
|
model_name=config.model_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user