don't skip the start of the json answer value (#2067)

2025-06-06 13:09:39 +02:00 · 2024-08-06 16:59:13 -07:00 · 2024-08-06 16:59:13 -07:00 · fcc4c30ead
commit fcc4c30ead
parent f20984ea1d
3 changed files with 215 additions and 20 deletions
--- a/backend/danswer/llm/answering/stream_processing/quotes_processing.py
+++ b/backend/danswer/llm/answering/stream_processing/quotes_processing.py
@ -17,7 +17,6 @@ from danswer.chat.models import LlmDoc
 from danswer.configs.chat_configs import QUOTE_ALLOWED_ERROR_PERCENT
 from danswer.prompts.constants import ANSWER_PAT
 from danswer.prompts.constants import QUOTE_PAT
 from danswer.prompts.constants import UNCERTAINTY_PAT
 from danswer.search.models import InferenceChunk
 from danswer.utils.logger import setup_logger
 from danswer.utils.text_processing import clean_model_quote
@ -28,6 +27,8 @@ from danswer.utils.text_processing import shared_precompare_cleanup
 logger = setup_logger()
 answer_pattern = re.compile(r'{\s*"answer"\s*:\s*"')
 def _extract_answer_quotes_freeform(
    answer_raw: str,
@ -166,10 +167,7 @@ def process_answer(
    into an Answer and Quotes AND (2) after the complete streaming response
    has been received to process the model output into an Answer and Quotes."""
    answer, quote_strings = separate_answer_quotes(answer_raw, is_json_prompt)
-    if answer == UNCERTAINTY_PAT or not answer:
+    if not answer:
        if answer == UNCERTAINTY_PAT:
            logger.debug("Answer matched UNCERTAINTY_PAT")
        else:
        logger.debug("No answer extracted from raw output")
        return DanswerAnswer(answer=None), DanswerQuotes(quotes=[])
@ -227,21 +225,25 @@ def process_model_tokens(
    found_answer_start = False if is_json_prompt else True
    found_answer_end = False
    hold_quote = ""
    for token in tokens:
        model_previous = model_output
        model_output += token
-        if not found_answer_start and '{"answer":"' in re.sub(r"\s", "", model_output):
+        if not found_answer_start:
-            # Note, if the token that completes the pattern has additional text, for example if the token is "?
+            m = answer_pattern.match(model_output)
-            # Then the chars after " will not be streamed, but this is ok as it prevents streaming the ? in the
+            if m:
            # event that the model outputs the UNCERTAINTY_PAT
                found_answer_start = True
                # Prevent heavy cases of hallucinations where model is not even providing a json until later
                if is_json_prompt and len(model_output) > 40:
                    logger.warning("LLM did not produce json as prompted")
                    found_answer_end = True
                    continue
                remaining = model_output[m.end() :]
                if len(remaining) > 0:
                    yield DanswerAnswerPiece(answer_piece=remaining)
                continue
        if found_answer_start and not found_answer_end:
--- a/backend/danswer/prompts/direct_qa_prompts.py
+++ b/backend/danswer/prompts/direct_qa_prompts.py
@ -7,7 +7,6 @@ from danswer.prompts.constants import FINAL_QUERY_PAT
 from danswer.prompts.constants import GENERAL_SEP_PAT
 from danswer.prompts.constants import QUESTION_PAT
 from danswer.prompts.constants import THOUGHT_PAT
 from danswer.prompts.constants import UNCERTAINTY_PAT
 ONE_SHOT_SYSTEM_PROMPT = """
@ -66,9 +65,6 @@ EMPTY_SAMPLE_JSON = {
 }
 ANSWER_NOT_FOUND_RESPONSE = f'{{"answer": "{UNCERTAINTY_PAT}", "quotes": []}}'
 # Default json prompt which can reference multiple docs and provide answer + quotes
 # system_like_header is similar to system message, can be user provided or defaults to QA_HEADER
 # context/history blocks are for context documents and conversation history, they can be blank
--- a/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py
+++ b/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py
@ -0,0 +1,197 @@
 import json
 from datetime import datetime
 from danswer.chat.models import DanswerAnswerPiece
 from danswer.chat.models import LlmDoc
 from danswer.configs.constants import DocumentSource
 from danswer.llm.answering.stream_processing.quotes_processing import (
    process_model_tokens,
 )
 mock_docs = [
    LlmDoc(
        document_id=f"doc_{int(id/2)}",
        content="Document is a doc",
        blurb=f"Document #{id}",
        semantic_identifier=f"Doc {id}",
        source_type=DocumentSource.WEB,
        metadata={},
        updated_at=datetime.now(),
        link=f"https://{int(id/2)}.com" if int(id / 2) % 2 == 0 else None,
        source_links={0: "https://mintlify.com/docs/settings/broken-links"},
    )
    for id in range(10)
 ]
 def test_process_model_tokens() -> None:
    tokens = [
        "{",
        "\n  ",
        '"answer": "Yes',
        ", Danswer allows",
        " customized prompts. This",
        " feature",
        " is currently being",
        " developed and implemente",
        "d to",
        " improve",
        " the accuracy",
        " of",
        " Language",
        " Models (",
        "LL",
        "Ms) for",
        " different",
        " companies",
        ".",
        " The custom",
        "ized prompts feature",
        " woul",
        "d allow users to ad",
        "d person",
        "alized prom",
        "pts through",
        " an",
        " interface or",
        " metho",
        "d,",
        " which would then be used to",
        " train",
        " the LLM.",
        " This enhancement",
        " aims to make",
        " Danswer more",
        " adaptable to",
        " different",
        " business",
        " contexts",
        " by",
        " tail",
        "oring it",
        " to the specific language",
        " an",
        "d terminology",
        " used within",
        " a",
        " company.",
        " Additionally",
        ",",
        " Danswer already",
        " supports creating",
        " custom AI",
        " Assistants with",
        " different",
        " prom",
        "pts and backing",
        " knowledge",
        " sets",
        ",",
        " which",
        " is",
        " a form",
        " of prompt",
        " customization. However, it",
        "'s important to nLogging Details LiteLLM-Success Call: Noneote that some",
        " aspects",
        " of prompt",
        " customization,",
        " such as for",
        " Sl",
        "ack",
        "b",
        "ots, may",
        " still",
        " be in",
        " development or have",
        ' limitations.",',
        '\n  "quotes": [',
        '\n    "We',
        " woul",
        "d like to ad",
        "d customized prompts for",
        " different",
        " companies to improve the accuracy of",
        " Language",
        " Model",
        " (LLM)",
        '.",\n    "A',
        " new",
        " feature that",
        " allows users to add personalize",
        "d prompts.",
        " This would involve",
        " creating",
        " an interface or method for",
        " users to input",
        " their",
        " own",
        " prom",
        "pts,",
        " which would then be used to",
        ' train the LLM.",',
        '\n    "Create',
        " custom AI Assistants with",
        " different prompts and backing knowledge",
        ' sets.",',
        '\n    "This',
        " PR",
        " fixes",
        " https",
        "://github.com/dan",
        "swer-ai/dan",
        "swer/issues/1",
        "584",
        " by",
        " setting",
        " the system",
        " default",
        " prompt for",
        " sl",
        "ackbots const",
        "rained by",
        " ",
        "document sets",
        ".",
        " It",
        " probably",
        " isn",
        "'t ideal",
        " -",
        " it",
        " might",
        " be pref",
        "erable to be",
        " able to select",
        " a prompt for",
        " the",
        " slackbot from",
        " the",
        " admin",
        " panel",
        " -",
        " but it sol",
        "ves the immediate problem",
        " of",
        " the slack",
        " listener",
        " cr",
        "ashing when",
        " configure",
        "d this",
        ' way."\n  ]',
        "\n}",
        "",
    ]
    gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
    s_json = "".join(tokens)
    j = json.loads(s_json)
    expected_answer = j["answer"]
    actual = ""
    for o in gen:
        if isinstance(o, DanswerAnswerPiece):
            if o.answer_piece:
                actual += o.answer_piece
    assert expected_answer == actual