mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-10 21:09:51 +02:00
Move code block default language creation to citation processing (#2501)
* move code block default language creation to citaiton processing * add test cases * update copy
This commit is contained in:
parent
3884f1d70a
commit
f404c4b448
@ -709,6 +709,7 @@ def stream_chat_message_objects(
|
||||
yield FinalUsedContextDocsResponse(
|
||||
final_context_docs=packet.response
|
||||
)
|
||||
|
||||
elif packet.id == IMAGE_GENERATION_RESPONSE_ID:
|
||||
img_generation_response = cast(
|
||||
list[ImageGenerationResponse], packet.response
|
||||
|
@ -85,6 +85,15 @@ def extract_citations_from_stream(
|
||||
curr_segment += token
|
||||
llm_out += token
|
||||
|
||||
# Handle code blocks without language tags
|
||||
if "`" in curr_segment:
|
||||
if curr_segment.endswith("`"):
|
||||
continue
|
||||
elif "```" in curr_segment:
|
||||
piece_that_comes_after = curr_segment.split("```")[1][0]
|
||||
if piece_that_comes_after == "\n" and in_code_block(llm_out):
|
||||
curr_segment = curr_segment.replace("```", "```plaintext")
|
||||
|
||||
citation_pattern = r"\[(\d+)\]"
|
||||
|
||||
citations_found = list(re.finditer(citation_pattern, curr_segment))
|
||||
|
@ -286,6 +286,92 @@ def process_text(
|
||||
"[[1]](https://0.com) Citation at the beginning. ",
|
||||
["doc_0"],
|
||||
),
|
||||
(
|
||||
"Code block without language specification",
|
||||
[
|
||||
"Here's",
|
||||
" a code block",
|
||||
":\n```\nd",
|
||||
"ef example():\n pass\n",
|
||||
"```\n",
|
||||
"End of code.",
|
||||
],
|
||||
"Here's a code block:\n```plaintext\ndef example():\n pass\n```\nEnd of code.",
|
||||
[],
|
||||
),
|
||||
(
|
||||
"Code block with language specification",
|
||||
[
|
||||
"Here's a Python code block:\n",
|
||||
"```",
|
||||
"python",
|
||||
"\n",
|
||||
"def greet",
|
||||
"(name):",
|
||||
"\n ",
|
||||
"print",
|
||||
"(f'Hello, ",
|
||||
"{name}!')",
|
||||
"\n",
|
||||
"greet('World')",
|
||||
"\n```\n",
|
||||
"This function ",
|
||||
"greets the user.",
|
||||
],
|
||||
"Here's a Python code block:\n```python\ndef greet(name):\n "
|
||||
"print(f'Hello, {name}!')\ngreet('World')\n```\nThis function greets the user.",
|
||||
[],
|
||||
),
|
||||
(
|
||||
"Multiple code blocks with different languages",
|
||||
[
|
||||
"JavaScript example:\n",
|
||||
"```",
|
||||
"javascript",
|
||||
"\n",
|
||||
"console",
|
||||
".",
|
||||
"log",
|
||||
"('Hello, World!');",
|
||||
"\n```\n",
|
||||
"Python example",
|
||||
":\n",
|
||||
"```",
|
||||
"python",
|
||||
"\n",
|
||||
"print",
|
||||
"('Hello, World!')",
|
||||
"\n```\n",
|
||||
"Both print greetings",
|
||||
".",
|
||||
],
|
||||
"JavaScript example:\n```javascript\nconsole.log('Hello, World!');\n"
|
||||
"```\nPython example:\n```python\nprint('Hello, World!')\n"
|
||||
"```\nBoth print greetings.",
|
||||
[],
|
||||
),
|
||||
(
|
||||
"Code block with text block",
|
||||
[
|
||||
"Here's a code block with a text block:\n",
|
||||
"```\n",
|
||||
"# This is a comment",
|
||||
"\n",
|
||||
"x = 10 # This assigns 10 to x\n",
|
||||
"print",
|
||||
"(x) # This prints x",
|
||||
"\n```\n",
|
||||
"The code demonstrates variable assignment.",
|
||||
],
|
||||
"Here's a code block with a text block:\n"
|
||||
"```plaintext\n"
|
||||
"# This is a comment\n"
|
||||
"x = 10 # This assigns 10 to x\n"
|
||||
"print(x) # This prints x\n"
|
||||
"```\n"
|
||||
"The code demonstrates variable assignment.",
|
||||
[],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_citation_extraction(
|
||||
|
Loading…
x
Reference in New Issue
Block a user