Stop Streaming Pattern (#923)

2025-09-26 20:08:38 +02:00 · 2024-01-09 23:52:21 -08:00
parent 0056cdcf44
commit 936e69bc2b
2 changed files with 22 additions and 1 deletions
--- a/backend/danswer/chat/chat_utils.py
+++ b/backend/danswer/chat/chat_utils.py
@@ -14,6 +14,7 @@ from danswer.chat.models import DanswerAnswerPiece
 from danswer.chat.models import LlmDoc
 from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
 from danswer.configs.chat_configs import NUM_DOCUMENT_TOKENS_FED_TO_GENERATIVE_MODEL
+from danswer.configs.chat_configs import STOP_STREAM_PAT
 from danswer.configs.constants import IGNORE_FOR_QA
 from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF
 from danswer.configs.model_configs import GEN_AI_MAX_INPUT_TOKENS
@@ -415,13 +416,30 @@ def extract_citations_from_stream(
    tokens: Iterator[str],
    context_docs: list[LlmDoc],
    doc_id_to_rank_map: dict[str, int],
+    stop_stream: str | None = STOP_STREAM_PAT,
 ) -> Iterator[DanswerAnswerPiece | CitationInfo]:
    llm_out = ""
    max_citation_num = len(context_docs)
    curr_segment = ""
    prepend_bracket = False
    cited_inds = set()
-    for token in tokens:
+    hold = ""
+    for raw_token in tokens:
+        if stop_stream:
+            next_hold = hold + raw_token
+
+            if stop_stream in next_hold:
+                break
+
+            if next_hold == stop_stream[: len(next_hold)]:
+                hold = next_hold
+                continue
+
+            token = next_hold
+            hold = ""
+        else:
+            token = raw_token
+
        # Special case of [1][ where ][ is a single token
        # This is where the model attempts to do consecutive citations like [1][2]
        if prepend_bracket:
--- a/backend/danswer/configs/chat_configs.py
+++ b/backend/danswer/configs/chat_configs.py
@@ -71,5 +71,8 @@ TITLE_CONTENT_RATIO = max(
 # For example "English,French,Spanish", be sure to use the "," separator
 MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None

+# Stops streaming answers back to the UI if this pattern is seen:
+STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None
+
 # The backend logic for this being True isn't fully supported yet
 HARD_DELETE_CHATS = False