From 936e69bc2b0f79fa8d5e0146071717bc090c0dbb Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Tue, 9 Jan 2024 23:52:21 -0800 Subject: [PATCH] Stop Streaming Pattern (#923) --- backend/danswer/chat/chat_utils.py | 20 +++++++++++++++++++- backend/danswer/configs/chat_configs.py | 3 +++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/backend/danswer/chat/chat_utils.py b/backend/danswer/chat/chat_utils.py index 4714a51e8988..9a96879c2011 100644 --- a/backend/danswer/chat/chat_utils.py +++ b/backend/danswer/chat/chat_utils.py @@ -14,6 +14,7 @@ from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import LlmDoc from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.chat_configs import NUM_DOCUMENT_TOKENS_FED_TO_GENERATIVE_MODEL +from danswer.configs.chat_configs import STOP_STREAM_PAT from danswer.configs.constants import IGNORE_FOR_QA from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF from danswer.configs.model_configs import GEN_AI_MAX_INPUT_TOKENS @@ -415,13 +416,30 @@ def extract_citations_from_stream( tokens: Iterator[str], context_docs: list[LlmDoc], doc_id_to_rank_map: dict[str, int], + stop_stream: str | None = STOP_STREAM_PAT, ) -> Iterator[DanswerAnswerPiece | CitationInfo]: llm_out = "" max_citation_num = len(context_docs) curr_segment = "" prepend_bracket = False cited_inds = set() - for token in tokens: + hold = "" + for raw_token in tokens: + if stop_stream: + next_hold = hold + raw_token + + if stop_stream in next_hold: + break + + if next_hold == stop_stream[: len(next_hold)]: + hold = next_hold + continue + + token = next_hold + hold = "" + else: + token = raw_token + # Special case of [1][ where ][ is a single token # This is where the model attempts to do consecutive citations like [1][2] if prepend_bracket: diff --git a/backend/danswer/configs/chat_configs.py b/backend/danswer/configs/chat_configs.py index 21d7b8c28823..8d0c91a79d15 100644 --- a/backend/danswer/configs/chat_configs.py +++ b/backend/danswer/configs/chat_configs.py @@ -71,5 +71,8 @@ TITLE_CONTENT_RATIO = max( # For example "English,French,Spanish", be sure to use the "," separator MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None +# Stops streaming answers back to the UI if this pattern is seen: +STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None + # The backend logic for this being True isn't fully supported yet HARD_DELETE_CHATS = False