Fix gpt o-series code block formatting (#4089)

* prompt addition for gpt o-series to encourage markdown formatting of code blocks * fix to match https://simonwillison.net/tags/markdown/ * chris comment * chris comment
2025-05-30 01:30:21 +02:00 · 2025-02-23 16:59:48 -08:00 · 2025-02-23 16:59:48 -08:00 · a7125662f1
commit a7125662f1
parent 4a4e4a6c50
4 changed files with 15 additions and 18 deletions
--- a/backend/onyx/chat/process_message.py
+++ b/backend/onyx/chat/process_message.py
@ -747,14 +747,13 @@ def stream_chat_message_objects(
                files=latest_query_files,
                single_message_history=single_message_history,
            ),
-            system_message=default_build_system_message(prompt_config),
+            system_message=default_build_system_message(prompt_config, llm.config),
            message_history=message_history,
            llm_config=llm.config,
            raw_user_query=final_msg.message,
            raw_user_uploaded_files=latest_query_files or [],
            single_message_history=single_message_history,
        )
-        prompt_builder.update_system_prompt(default_build_system_message(prompt_config))

        # LLM prompt building, response capturing, etc.
        answer = Answer(
--- a/backend/onyx/chat/prompt_builder/answer_prompt_builder.py
+++ b/backend/onyx/chat/prompt_builder/answer_prompt_builder.py
@ -12,6 +12,7 @@ from onyx.chat.prompt_builder.citations_prompt import compute_max_llm_input_toke
 from onyx.chat.prompt_builder.utils import translate_history_to_basemessages
 from onyx.file_store.models import InMemoryChatFile
 from onyx.llm.interfaces import LLMConfig
+from onyx.llm.llm_provider_options import OPENAI_PROVIDER_NAME
 from onyx.llm.models import PreviousMessage
 from onyx.llm.utils import build_content_with_imgs
 from onyx.llm.utils import check_message_tokens
@ -19,6 +20,7 @@ from onyx.llm.utils import message_to_prompt_and_imgs
 from onyx.llm.utils import model_supports_image_input
 from onyx.natural_language_processing.utils import get_tokenizer
 from onyx.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT
+from onyx.prompts.chat_prompts import CODE_BLOCK_MARKDOWN
 from onyx.prompts.direct_qa_prompts import HISTORY_BLOCK
 from onyx.prompts.prompt_utils import drop_messages_history_overflow
 from onyx.prompts.prompt_utils import handle_onyx_date_awareness
@ -31,8 +33,16 @@ from onyx.tools.tool import Tool

 def default_build_system_message(
    prompt_config: PromptConfig,
+    llm_config: LLMConfig,
 ) -> SystemMessage | None:
    system_prompt = prompt_config.system_prompt.strip()
+    # See https://simonwillison.net/tags/markdown/ for context on this temporary fix
+    # for o-series markdown generation
+    if (
+        llm_config.model_provider == OPENAI_PROVIDER_NAME
+        and llm_config.model_name.startswith("o")
+    ):
+        system_prompt = CODE_BLOCK_MARKDOWN + system_prompt
    tag_handled_prompt = handle_onyx_date_awareness(
        system_prompt,
        prompt_config,
@ -110,21 +120,8 @@ class AnswerPromptBuilder:
            ),
        )

-        self.system_message_and_token_cnt: tuple[SystemMessage, int] | None = (
-            (
-                system_message,
-                check_message_tokens(system_message, self.llm_tokenizer_encode_func),
-            )
-            if system_message
-            else None
-        )
-        self.user_message_and_token_cnt = (
-            user_message,
-            check_message_tokens(
-                user_message,
-                self.llm_tokenizer_encode_func,
-            ),
-        )
+        self.update_system_prompt(system_message)
+        self.update_user_prompt(user_message)

        self.new_messages_and_token_cnts: list[tuple[BaseMessage, int]] = []

--- a/backend/onyx/prompts/chat_prompts.py
+++ b/backend/onyx/prompts/chat_prompts.py
@ -18,6 +18,7 @@ Remember to provide inline citations in the format [1], [2], [3], etc.

 ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}."

+CODE_BLOCK_MARKDOWN = "Formatting re-enabled. "

 CHAT_USER_PROMPT = f"""
 Refer to the following context documents when responding to me.{{optional_ignore_statement}}
--- a/backend/tests/unit/onyx/chat/test_answer.py
+++ b/backend/tests/unit/onyx/chat/test_answer.py
@ -70,7 +70,7 @@ def _answer_fixture_impl(
                files=[],
                single_message_history=None,
            ),
-            system_message=default_build_system_message(prompt_config),
+            system_message=default_build_system_message(prompt_config, mock_llm.config),
            message_history=[],
            llm_config=mock_llm.config,
            raw_user_query=QUERY,