account for empty links + fix quote processing

2025-09-30 14:39:55 +02:00 · 2024-08-07 20:08:56 -07:00
parent 509fa3a994
commit 9eb48ca2c3
3 changed files with 93 additions and 3 deletions
--- a/backend/danswer/danswerbot/slack/blocks.py
+++ b/backend/danswer/danswerbot/slack/blocks.py
@@ -70,6 +70,10 @@ def _process_citations_for_slack(text: str) -> str:
    def slack_link_format(match: Match) -> str:
        link_text = match.group(1)
        link_url = match.group(2)
+
+        # Account for empty link citations
+        if link_url == "":
+            return f"[{link_text}]"
        return f"<{link_url}|[{link_text}]>"

    # Substitute all matches in the input text
@@ -299,7 +303,9 @@ def build_sources_blocks(
                    else []
                )
                + [
-                    MarkdownTextObject(
+                    MarkdownTextObject(text=f"{document_title}")
+                    if d.link == ""
+                    else MarkdownTextObject(
                        text=f"*<{d.link}|[{citation_num}] {document_title}>*\n{final_metadata_str}"
                    ),
                ]
--- a/backend/danswer/llm/answering/stream_processing/quotes_processing.py
+++ b/backend/danswer/llm/answering/stream_processing/quotes_processing.py
@@ -26,8 +26,9 @@ from danswer.utils.text_processing import shared_precompare_cleanup


 logger = setup_logger()
-
-answer_pattern = re.compile(r'{\s*"answer"\s*:\s*"')
+answer_pattern = re.compile(
+    r'(?:```(?:json)?)?\s*{\s*"answer"\s*:\s*"', re.DOTALL | re.IGNORECASE
+)


 def _extract_answer_quotes_freeform(
--- a/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py
+++ b/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py
@@ -195,3 +195,86 @@ def test_process_model_tokens() -> None:
                actual += o.answer_piece

    assert expected_answer == actual
+
+
+def test_simple_json_answer() -> None:
+    tokens = [
+        "```",
+        "json",
+        "\n",
+        "{",
+        '"answer": "This is a simple ',
+        "answer.",
+        '",\n"',
+        'quotes": []',
+        "\n}",
+        "\n",
+        "```",
+    ]
+    gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
+
+    expected_answer = "This is a simple answer."
+    actual = "".join(
+        o.answer_piece
+        for o in gen
+        if isinstance(o, DanswerAnswerPiece) and o.answer_piece
+    )
+
+    assert expected_answer == actual
+
+
+def test_json_answer_with_quotes() -> None:
+    tokens = [
+        "```",
+        "json",
+        "\n",
+        "{",
+        '"answer": "This ',
+        "is a ",
+        "split ",
+        "answer.",
+        '",\n"',
+        'quotes": []',
+        "\n}",
+        "\n",
+        "```",
+    ]
+    gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
+
+    expected_answer = "This is a split answer."
+    actual = "".join(
+        o.answer_piece
+        for o in gen
+        if isinstance(o, DanswerAnswerPiece) and o.answer_piece
+    )
+
+    assert expected_answer == actual
+
+
+def test_json_answer_split_tokens() -> None:
+    tokens = [
+        "```",
+        "json",
+        "\n",
+        "{",
+        '\n"',
+        'answer": "This ',
+        "is a ",
+        "split ",
+        "answer.",
+        '",\n"',
+        'quotes": []',
+        "\n}",
+        "\n",
+        "```",
+    ]
+    gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
+
+    expected_answer = "This is a split answer."
+    actual = "".join(
+        o.answer_piece
+        for o in gen
+        if isinstance(o, DanswerAnswerPiece) and o.answer_piece
+    )
+
+    assert expected_answer == actual