first pass at dead code deletion

2025-09-25 19:37:29 +02:00 · 2025-01-29 14:28:46 -08:00
parent 3d99ad7bc4
commit 6c7f8eaefb
12 changed files with 68 additions and 494 deletions
--- a/backend/onyx/chat/stream_processing/answer_response_handler.py
+++ b/backend/onyx/chat/stream_processing/answer_response_handler.py
@@ -1,7 +1,5 @@
 import abc
 from collections.abc import Generator
-from typing import Any
-from typing import cast

 from langchain_core.messages import BaseMessage

@@ -26,10 +24,6 @@ class AnswerResponseHandler(abc.ABC):
    ) -> Generator[ResponsePart, None, None]:
        raise NotImplementedError

-    @abc.abstractmethod
-    def update(self, state_update: Any) -> None:
-        raise NotImplementedError
-

 class PassThroughAnswerResponseHandler(AnswerResponseHandler):
    def handle_response_part(
@@ -40,9 +34,6 @@ class PassThroughAnswerResponseHandler(AnswerResponseHandler):
        content = _message_to_str(response_item)
        yield OnyxAnswerPiece(answer_piece=content)

-    def update(self, state_update: Any) -> None:
-        pass
-

 class DummyAnswerResponseHandler(AnswerResponseHandler):
    def handle_response_part(
@@ -53,9 +44,6 @@ class DummyAnswerResponseHandler(AnswerResponseHandler):
        # This is a dummy handler that returns nothing
        yield from []

-    def update(self, state_update: Any) -> None:
-        pass
-

 class CitationResponseHandler(AnswerResponseHandler):
    def __init__(
@@ -91,20 +79,6 @@ class CitationResponseHandler(AnswerResponseHandler):
        # Process the new content through the citation processor
        yield from self.citation_processor.process_token(content)

-    def update(self, state_update: Any) -> None:
-        state = cast(
-            tuple[list[LlmDoc], DocumentIdOrderMapping, DocumentIdOrderMapping],
-            state_update,
-        )
-        self.context_docs = state[0]
-        self.final_doc_id_to_rank_map = state[1]
-        self.display_doc_id_to_rank_map = state[2]
-        self.citation_processor = CitationProcessor(
-            context_docs=self.context_docs,
-            final_doc_id_to_rank_map=self.final_doc_id_to_rank_map,
-            display_doc_id_to_rank_map=self.display_doc_id_to_rank_map,
-        )
-

 def _message_to_str(message: BaseMessage | str | None) -> str:
    if message is None:
@@ -116,80 +90,3 @@ def _message_to_str(message: BaseMessage | str | None) -> str:
        logger.warning(f"Received non-string content: {type(content)}")
        content = str(content) if content is not None else ""
    return content
-
-
-# class CitationMultiResponseHandler(AnswerResponseHandler):
-#     def __init__(self) -> None:
-#         self.channel_processors: dict[str, CitationProcessor] = {}
-#         self._default_channel = "__default__"
-
-#     def register_default_channel(
-#         self,
-#         context_docs: list[LlmDoc],
-#         final_doc_id_to_rank_map: DocumentIdOrderMapping,
-#         display_doc_id_to_rank_map: DocumentIdOrderMapping,
-#     ) -> None:
-#         """Register the default channel with its associated documents and ranking maps."""
-#         self.register_channel(
-#             channel_id=self._default_channel,
-#             context_docs=context_docs,
-#             final_doc_id_to_rank_map=final_doc_id_to_rank_map,
-#             display_doc_id_to_rank_map=display_doc_id_to_rank_map,
-#         )
-
-#     def register_channel(
-#         self,
-#         channel_id: str,
-#         context_docs: list[LlmDoc],
-#         final_doc_id_to_rank_map: DocumentIdOrderMapping,
-#         display_doc_id_to_rank_map: DocumentIdOrderMapping,
-#     ) -> None:
-#         """Register a new channel with its associated documents and ranking maps."""
-#         self.channel_processors[channel_id] = CitationProcessor(
-#             context_docs=context_docs,
-#             final_doc_id_to_rank_map=final_doc_id_to_rank_map,
-#             display_doc_id_to_rank_map=display_doc_id_to_rank_map,
-#         )
-
-#     def handle_response_part(
-#         self,
-#         response_item: BaseMessage | str | None,
-#         previous_response_items: list[BaseMessage | str],
-#     ) -> Generator[ResponsePart, None, None]:
-#         """Default implementation that uses the default channel."""
-
-#         yield from self.handle_channel_response(
-#             response_item=content,
-#             previous_response_items=previous_response_items,
-#             channel_id=self._default_channel,
-#         )
-
-#     def handle_channel_response(
-#         self,
-#         response_item: ResponsePart | str | None,
-#         previous_response_items: list[ResponsePart | str],
-#         channel_id: str,
-#     ) -> Generator[ResponsePart, None, None]:
-#         """Process a response part for a specific channel."""
-#         if channel_id not in self.channel_processors:
-#             raise ValueError(f"Attempted to process response for unregistered channel {channel_id}")
-
-#         if response_item is None:
-#             return
-
-#         content = (
-#             response_item.content if isinstance(response_item, BaseMessage) else response_item
-#         )
-
-#         # Ensure content is a string
-#         if not isinstance(content, str):
-#             logger.warning(f"Received non-string content: {type(content)}")
-#             content = str(content) if content is not None else ""
-
-#         # Process the new content through the channel's citation processor
-#         yield from self.channel_processors[channel_id].multi_process_token(content)
-
-#     def remove_channel(self, channel_id: str) -> None:
-#         """Remove a channel and its associated processor."""
-#         if channel_id in self.channel_processors:
-#             del self.channel_processors[channel_id]
--- a/backend/onyx/chat/stream_processing/citation_processing.py
+++ b/backend/onyx/chat/stream_processing/citation_processing.py
@@ -4,7 +4,6 @@ from collections.abc import Generator
 from onyx.chat.models import CitationInfo
 from onyx.chat.models import LlmDoc
 from onyx.chat.models import OnyxAnswerPiece
-from onyx.chat.models import ResponsePart
 from onyx.chat.stream_processing.utils import DocumentIdOrderMapping
 from onyx.configs.chat_configs import STOP_STREAM_PAT
 from onyx.prompts.constants import TRIPLE_BACKTICK
@@ -41,164 +40,6 @@ class CitationProcessor:
        self.current_citations: list[int] = []
        self.past_cite_count = 0

-    # TODO: should reference previous citation processing, rework previous, or completely use new one?
-    def multi_process_token(
-        self, parsed_object: ResponsePart
-    ) -> Generator[ResponsePart, None, None]:
-        # if isinstance(parsed_object,OnyxAnswerPiece):
-        #     # standard citation processing
-        #     yield from self.process_token(parsed_object.answer_piece)
-
-        # elif isinstance(parsed_object, AgentAnswerPiece):
-        #     # citation processing for agent answer pieces
-        #     for token in self.process_token(parsed_object.answer_piece):
-        #         if isinstance(token, CitationInfo):
-        #             yield token
-        #         else:
-        #             yield AgentAnswerPiece(answer_piece=token.answer_piece or '',
-        #  answer_type=parsed_object.answer_type, level=parsed_object.level,
-        # level_question_nr=parsed_object.level_question_nr)
-
-        # level = getattr(parsed_object, "level", None)
-        # level_question_nr = getattr(parsed_object, "level_question_nr", None)
-
-        # if isinstance(parsed_object, (AgentAnswerPiece, OnyxAnswerPiece)):
-        #     # logger.debug(f"FA {parsed_object.answer_piece}")
-        #     if isinstance(parsed_object, AgentAnswerPiece):
-        #         token = parsed_object.answer_piece
-        #         level = parsed_object.level
-        #         level_question_nr = parsed_object.level_question_nr
-        #     else:
-        #         yield parsed_object
-        #         return
-        #         # raise ValueError(
-        #         #     f"Invalid parsed object type: {type(parsed_object)}"
-        #         # )
-
-        #     if not citation_potential[level][level_question_nr] and token:
-        #         if token.startswith(" ["):
-        #             citation_potential[level][level_question_nr] = True
-        #             current_yield_components[level][level_question_nr] = [token]
-        #         else:
-        #             yield parsed_object
-        #     elif token and citation_potential[level][level_question_nr]:
-        #         current_yield_components[level][level_question_nr].append(token)
-        #         current_yield_str[level][level_question_nr] = "".join(
-        #             current_yield_components[level][level_question_nr]
-        #         )
-
-        #         if current_yield_str[level][level_question_nr].strip().startswith(
-        #             "[D"
-        #         ) or current_yield_str[level][level_question_nr].strip().startswith(
-        #             "[Q"
-        #         ):
-        #             citation_potential[level][level_question_nr] = True
-
-        #         else:
-        #             citation_potential[level][level_question_nr] = False
-        #             parsed_object = _set_combined_token_value(
-        #                 current_yield_str[level][level_question_nr], parsed_object
-        #             )
-        #             yield parsed_object
-
-        #         if (
-        #             len(current_yield_components[level][level_question_nr]) > 15
-        #         ):  # ??? 15?
-        #             citation_potential[level][level_question_nr] = False
-        #             parsed_object = _set_combined_token_value(
-        #                 current_yield_str[level][level_question_nr], parsed_object
-        #             )
-        #             yield parsed_object
-        #         elif "]" in current_yield_str[level][level_question_nr]:
-        #             section_split = current_yield_str[level][level_question_nr].split(
-        #                 "]"
-        #             )
-        #             section_split[0] + "]"  # dead code?
-        #             start_of_next_section = "]".join(section_split[1:])
-        #             citation_string = current_yield_str[level][level_question_nr][
-        #                 : -len(start_of_next_section)
-        #             ]
-        #             if "[D" in citation_string:
-        #                 cite_open_bracket_marker, cite_close_bracket_marker = (
-        #                     "[",
-        #                     "]",
-        #                 )
-        #                 cite_identifyer = "D"
-
-        #                 try:
-        #                     cited_document = int(
-        #                         citation_string[level][level_question_nr][2:-1]
-        #                     )
-        #                     if level and level_question_nr:
-        #                         link = agent_document_citations[int(level)][
-        #                             int(level_question_nr)
-        #                         ][cited_document].link
-        #                     else:
-        #                         link = ""
-        #                 except (ValueError, IndexError):
-        #                     link = ""
-        #             elif "[Q" in citation_string:
-        #                 cite_open_bracket_marker, cite_close_bracket_marker = (
-        #                     "{",
-        #                     "}",
-        #                 )
-        #                 cite_identifyer = "Q"
-        #             else:
-        #                 pass
-
-        #             citation_string = citation_string.replace(
-        #                 "[" + cite_identifyer,
-        #                 cite_open_bracket_marker * 2,
-        #             ).replace("]", cite_close_bracket_marker * 2)
-
-        #             if cite_identifyer == "D":
-        #                 citation_string += f"({link})"
-
-        #             parsed_object = _set_combined_token_value(
-        #                 citation_string, parsed_object
-        #             )
-
-        #             yield parsed_object
-
-        #             current_yield_components[level][level_question_nr] = [
-        #                 start_of_next_section
-        #             ]
-        #             if not start_of_next_section.strip().startswith("["):
-        #                 citation_potential[level][level_question_nr] = False
-
-        # elif isinstance(parsed_object, ExtendedToolResponse):
-        #     if parsed_object.id == "search_response_summary":
-        #         level = parsed_object.level
-        #         level_question_nr = parsed_object.level_question_nr
-        #         for inference_section in parsed_object.response.top_sections:
-        #             doc_link = inference_section.center_chunk.source_links[0]
-        #             doc_title = inference_section.center_chunk.title
-        #             doc_id = inference_section.center_chunk.document_id
-
-        #             if (
-        #                 doc_id
-        #                 not in agent_question_citations_used_docs[level][
-        #                     level_question_nr
-        #                 ]
-        #             ):
-        #                 if level not in agent_document_citations:
-        #                     agent_document_citations[level] = {}
-        #                 if level_question_nr not in agent_document_citations[level]:
-        #                     agent_document_citations[level][level_question_nr] = []
-
-        #                 agent_document_citations[level][level_question_nr].append(
-        #                     AgentDocumentCitations(
-        #                         document_id=doc_id,
-        #                         document_title=doc_title,
-        #                         link=doc_link,
-        #                     )
-        #                 )
-        #                 agent_question_citations_used_docs[level][
-        #                     level_question_nr
-        #                 ].append(doc_id)
-
-        yield parsed_object
-
    def process_token(
        self, token: str | None
    ) -> Generator[OnyxAnswerPiece | CitationInfo, None, None]: