Token Level Log (#3238)

2025-03-26 17:51:54 +01:00 · 2024-11-23 18:41:50 -08:00 · 2024-11-23 18:41:50 -08:00 · 413891f143
commit 413891f143
parent 7a0a4d4b79
4 changed files with 22 additions and 1 deletions
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@ -422,6 +422,9 @@ LOG_ALL_MODEL_INTERACTIONS = (
 LOG_DANSWER_MODEL_INTERACTIONS = (
    os.environ.get("LOG_DANSWER_MODEL_INTERACTIONS", "").lower() == "true"
 )
+LOG_INDIVIDUAL_MODEL_TOKENS = (
+    os.environ.get("LOG_INDIVIDUAL_MODEL_TOKENS", "").lower() == "true"
+)
 # If set to `true` will enable additional logs about Vespa query performance
 # (time spent on finding the right docs + time spent fetching summaries from disk)
 LOG_VESPA_TIMING_INFORMATION = (
--- a/backend/danswer/llm/answering/stream_processing/answer_response_handler.py
+++ b/backend/danswer/llm/answering/stream_processing/answer_response_handler.py
@ -13,6 +13,9 @@ from danswer.llm.answering.stream_processing.quotes_processing import (
    QuotesProcessor,
 )
 from danswer.llm.answering.stream_processing.utils import DocumentIdOrderMapping
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()


 class AnswerResponseHandler(abc.ABC):
@ -48,6 +51,9 @@ class CitationResponseHandler(AnswerResponseHandler):
        self.processed_text = ""
        self.citations: list[CitationInfo] = []

+        # TODO remove this after citation issue is resolved
+        logger.debug(f"Document to ranking map {self.doc_id_to_rank_map}")
+
    def handle_response_part(
        self,
        response_item: BaseMessage | None,
--- a/backend/danswer/llm/interfaces.py
+++ b/backend/danswer/llm/interfaces.py
@ -9,6 +9,7 @@ from pydantic import BaseModel

 from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
 from danswer.configs.app_configs import LOG_DANSWER_MODEL_INTERACTIONS
+from danswer.configs.app_configs import LOG_INDIVIDUAL_MODEL_TOKENS
 from danswer.utils.logger import setup_logger


@ -117,10 +118,19 @@ class LLM(abc.ABC):
        self._precall(prompt)
        # TODO add a postcall to log model outputs independent of concrete class
        # implementation
-        return self._stream_implementation(
+        messages = self._stream_implementation(
            prompt, tools, tool_choice, structured_response_format
        )

+        tokens = []
+        for message in messages:
+            if LOG_INDIVIDUAL_MODEL_TOKENS:
+                tokens.append(message.content)
+            yield message
+
+        if LOG_INDIVIDUAL_MODEL_TOKENS and tokens:
+            logger.debug(f"Model Tokens: {tokens}")
+
    @abc.abstractmethod
    def _stream_implementation(
        self,
--- a/deployment/docker_compose/docker-compose.dev.yml
+++ b/deployment/docker_compose/docker-compose.dev.yml
@ -83,6 +83,7 @@ services:
      - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging
      # Log all of Danswer prompts and interactions with the LLM
      - LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-}
+      - LOG_INDIVIDUAL_MODEL_TOKENS=${LOG_INDIVIDUAL_MODEL_TOKENS:-}
      # If set to `true` will enable additional logs about Vespa query performance
      # (time spent on finding the right docs + time spent fetching summaries from disk)
      - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
@ -204,6 +205,7 @@ services:
      - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging
      # Log all of Danswer prompts and interactions with the LLM
      - LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-}
+      - LOG_INDIVIDUAL_MODEL_TOKENS=${LOG_INDIVIDUAL_MODEL_TOKENS:-}
      - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}

      # Analytics Configs