Move code block default language creation to citation processing (#2501)

* move code block default language creation to citaiton processing

* add test cases

* update copy
This commit is contained in:
pablodanswer 2024-09-18 23:00:58 -07:00 committed by GitHub
parent 3884f1d70a
commit f404c4b448
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 96 additions and 0 deletions

View File

@ -709,6 +709,7 @@ def stream_chat_message_objects(
yield FinalUsedContextDocsResponse(
final_context_docs=packet.response
)
elif packet.id == IMAGE_GENERATION_RESPONSE_ID:
img_generation_response = cast(
list[ImageGenerationResponse], packet.response

View File

@ -85,6 +85,15 @@ def extract_citations_from_stream(
curr_segment += token
llm_out += token
# Handle code blocks without language tags
if "`" in curr_segment:
if curr_segment.endswith("`"):
continue
elif "```" in curr_segment:
piece_that_comes_after = curr_segment.split("```")[1][0]
if piece_that_comes_after == "\n" and in_code_block(llm_out):
curr_segment = curr_segment.replace("```", "```plaintext")
citation_pattern = r"\[(\d+)\]"
citations_found = list(re.finditer(citation_pattern, curr_segment))

View File

@ -286,6 +286,92 @@ def process_text(
"[[1]](https://0.com) Citation at the beginning. ",
["doc_0"],
),
(
"Code block without language specification",
[
"Here's",
" a code block",
":\n```\nd",
"ef example():\n pass\n",
"```\n",
"End of code.",
],
"Here's a code block:\n```plaintext\ndef example():\n pass\n```\nEnd of code.",
[],
),
(
"Code block with language specification",
[
"Here's a Python code block:\n",
"```",
"python",
"\n",
"def greet",
"(name):",
"\n ",
"print",
"(f'Hello, ",
"{name}!')",
"\n",
"greet('World')",
"\n```\n",
"This function ",
"greets the user.",
],
"Here's a Python code block:\n```python\ndef greet(name):\n "
"print(f'Hello, {name}!')\ngreet('World')\n```\nThis function greets the user.",
[],
),
(
"Multiple code blocks with different languages",
[
"JavaScript example:\n",
"```",
"javascript",
"\n",
"console",
".",
"log",
"('Hello, World!');",
"\n```\n",
"Python example",
":\n",
"```",
"python",
"\n",
"print",
"('Hello, World!')",
"\n```\n",
"Both print greetings",
".",
],
"JavaScript example:\n```javascript\nconsole.log('Hello, World!');\n"
"```\nPython example:\n```python\nprint('Hello, World!')\n"
"```\nBoth print greetings.",
[],
),
(
"Code block with text block",
[
"Here's a code block with a text block:\n",
"```\n",
"# This is a comment",
"\n",
"x = 10 # This assigns 10 to x\n",
"print",
"(x) # This prints x",
"\n```\n",
"The code demonstrates variable assignment.",
],
"Here's a code block with a text block:\n"
"```plaintext\n"
"# This is a comment\n"
"x = 10 # This assigns 10 to x\n"
"print(x) # This prints x\n"
"```\n"
"The code demonstrates variable assignment.",
[],
),
],
)
def test_citation_extraction(