account for empty links + fix quote processing

This commit is contained in:
pablodanswer 2024-08-07 20:08:56 -07:00 committed by Chris Weaver
parent 509fa3a994
commit 9eb48ca2c3
3 changed files with 93 additions and 3 deletions

View File

@ -70,6 +70,10 @@ def _process_citations_for_slack(text: str) -> str:
def slack_link_format(match: Match) -> str:
link_text = match.group(1)
link_url = match.group(2)
# Account for empty link citations
if link_url == "":
return f"[{link_text}]"
return f"<{link_url}|[{link_text}]>"
# Substitute all matches in the input text
@ -299,7 +303,9 @@ def build_sources_blocks(
else []
)
+ [
MarkdownTextObject(
MarkdownTextObject(text=f"{document_title}")
if d.link == ""
else MarkdownTextObject(
text=f"*<{d.link}|[{citation_num}] {document_title}>*\n{final_metadata_str}"
),
]

View File

@ -26,8 +26,9 @@ from danswer.utils.text_processing import shared_precompare_cleanup
logger = setup_logger()
answer_pattern = re.compile(r'{\s*"answer"\s*:\s*"')
answer_pattern = re.compile(
r'(?:```(?:json)?)?\s*{\s*"answer"\s*:\s*"', re.DOTALL | re.IGNORECASE
)
def _extract_answer_quotes_freeform(

View File

@ -195,3 +195,86 @@ def test_process_model_tokens() -> None:
actual += o.answer_piece
assert expected_answer == actual
def test_simple_json_answer() -> None:
tokens = [
"```",
"json",
"\n",
"{",
'"answer": "This is a simple ',
"answer.",
'",\n"',
'quotes": []',
"\n}",
"\n",
"```",
]
gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
expected_answer = "This is a simple answer."
actual = "".join(
o.answer_piece
for o in gen
if isinstance(o, DanswerAnswerPiece) and o.answer_piece
)
assert expected_answer == actual
def test_json_answer_with_quotes() -> None:
tokens = [
"```",
"json",
"\n",
"{",
'"answer": "This ',
"is a ",
"split ",
"answer.",
'",\n"',
'quotes": []',
"\n}",
"\n",
"```",
]
gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
expected_answer = "This is a split answer."
actual = "".join(
o.answer_piece
for o in gen
if isinstance(o, DanswerAnswerPiece) and o.answer_piece
)
assert expected_answer == actual
def test_json_answer_split_tokens() -> None:
tokens = [
"```",
"json",
"\n",
"{",
'\n"',
'answer": "This ',
"is a ",
"split ",
"answer.",
'",\n"',
'quotes": []',
"\n}",
"\n",
"```",
]
gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
expected_answer = "This is a split answer."
actual = "".join(
o.answer_piece
for o in gen
if isinstance(o, DanswerAnswerPiece) and o.answer_piece
)
assert expected_answer == actual