Remove AI Thoughts by default (#783)

- Removes AI Thoughts by default - only shows when validation fails - Removes punctuation "words" from queries in addition to stopwords (Vespa ignores punctuation anyways) - Fixes Vespa deletion script for larger doc counts
2025-06-26 07:50:56 +02:00 · 2023-11-29 01:00:53 -08:00 · 2023-11-29 01:00:53 -08:00 · 37daf4f3e4
commit 37daf4f3e4
parent fcb7f6fcc0
10 changed files with 92 additions and 106 deletions
--- a/backend/danswer/document_index/vespa/index.py
+++ b/backend/danswer/document_index/vespa/index.py
@ -57,7 +57,7 @@ from danswer.indexing.models import InferenceChunk
 from danswer.search.models import IndexFilters
 from danswer.search.search_runner import embed_query
 from danswer.search.search_runner import query_processing
-from danswer.search.search_runner import remove_stop_words
+from danswer.search.search_runner import remove_stop_words_and_punctuation
 from danswer.utils.batching import batch_generator
 from danswer.utils.logger import setup_logger

@ -732,7 +732,9 @@ class VespaIndex(DocumentIndex):
        query_embedding = embed_query(query)

        query_keywords = (
-            " ".join(remove_stop_words(query)) if edit_keyword_query else query
+            " ".join(remove_stop_words_and_punctuation(query))
+            if edit_keyword_query
+            else query
        )

        params: dict[str, str | int] = {
@ -773,7 +775,9 @@ class VespaIndex(DocumentIndex):
        query_embedding = embed_query(query)

        query_keywords = (
-            " ".join(remove_stop_words(query)) if edit_keyword_query else query
+            " ".join(remove_stop_words_and_punctuation(query))
+            if edit_keyword_query
+            else query
        )

        params: dict[str, str | int | float] = {
--- a/backend/danswer/search/danswer_helper.py
+++ b/backend/danswer/search/danswer_helper.py
@ -4,7 +4,7 @@ from danswer.search.models import QueryFlow
 from danswer.search.models import SearchType
 from danswer.search.search_nlp_models import get_default_tokenizer
 from danswer.search.search_nlp_models import IntentModel
-from danswer.search.search_runner import remove_stop_words
+from danswer.search.search_runner import remove_stop_words_and_punctuation
 from danswer.server.models import HelperResponse
 from danswer.utils.logger import setup_logger
 from danswer.utils.timing import log_function_time
@ -67,7 +67,7 @@ def recommend_search_flow(

    # Heuristics based decisions
    words = query.split()
-    non_stopwords = remove_stop_words(query)
+    non_stopwords = remove_stop_words_and_punctuation(query)
    non_stopword_percent = len(non_stopwords) / len(words)

    # UNK tokens -> suggest Keyword (still may be valid QA)
--- a/backend/danswer/search/search_runner.py
+++ b/backend/danswer/search/search_runner.py
@ -1,3 +1,4 @@
+import string
 from collections.abc import Callable
 from collections.abc import Iterator
 from copy import deepcopy
@ -55,17 +56,21 @@ def lemmatize_text(text: str) -> list[str]:
    return [lemmatizer.lemmatize(word) for word in word_tokens]


-def remove_stop_words(text: str) -> list[str]:
+def remove_stop_words_and_punctuation(text: str) -> list[str]:
    stop_words = set(stopwords.words("english"))
    word_tokens = word_tokenize(text)
-    text_trimmed = [word for word in word_tokens if word.casefold() not in stop_words]
+    text_trimmed = [
+        word
+        for word in word_tokens
+        if (word.casefold() not in stop_words and word not in string.punctuation)
+    ]
    return text_trimmed or word_tokens


 def query_processing(
    query: str,
 ) -> str:
-    query = " ".join(remove_stop_words(query))
+    query = " ".join(remove_stop_words_and_punctuation(query))
    query = " ".join(lemmatize_text(query))
    return query

--- a/backend/scripts/reset_indexes.py
+++ b/backend/scripts/reset_indexes.py
@ -16,9 +16,20 @@ logger = setup_logger()


 def wipe_vespa_index() -> None:
-    params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
-    response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
-    response.raise_for_status()
+    continuation = None
+    should_continue = True
+    while should_continue:
+        params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
+        if continuation:
+            params = {**params, "continuation": continuation}
+        response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
+        response.raise_for_status()
+
+        response_json = response.json()
+        print(response_json)
+
+        continuation = response_json.get("continuation")
+        should_continue = bool(continuation)


 if __name__ == "__main__":
--- a/web/src/components/search/SearchResultsDisplay.tsx
+++ b/web/src/components/search/SearchResultsDisplay.tsx
@ -14,13 +14,10 @@ import { DocumentDisplay } from "./DocumentDisplay";
 import { ResponseSection, StatusOptions } from "./results/ResponseSection";
 import { QuotesSection } from "./results/QuotesSection";
 import { AnswerSection } from "./results/AnswerSection";
-import {
-  getAIThoughtsIsOpenSavedValue,
-  setAIThoughtsIsOpenSavedValue,
-} from "@/lib/search/aiThoughtUtils";
 import { ThreeDots } from "react-loader-spinner";
 import { usePopup } from "../admin/connectors/Popup";
 import { AlertIcon } from "../icons/icons";
+import Link from "next/link";

 const removeDuplicateDocs = (documents: DanswerDocument[]) => {
  const seen = new Set<string>();
@ -45,29 +42,20 @@ const getSelectedDocumentIds = (
  return selectedDocumentIds;
 };

-interface SearchResultsDisplayProps {
-  searchResponse: SearchResponse | null;
-  validQuestionResponse: ValidQuestionResponse;
-  isFetching: boolean;
-  defaultOverrides: SearchDefaultOverrides;
-  personaName?: string | null;
-}
-
 export const SearchResultsDisplay = ({
  searchResponse,
  validQuestionResponse,
  isFetching,
  defaultOverrides,
  personaName = null,
-}: SearchResultsDisplayProps) => {
+}: {
+  searchResponse: SearchResponse | null;
+  validQuestionResponse: ValidQuestionResponse;
+  isFetching: boolean;
+  defaultOverrides: SearchDefaultOverrides;
+  personaName?: string | null;
+}) => {
  const { popup, setPopup } = usePopup();
-  const [isAIThoughtsOpen, setIsAIThoughtsOpen] = React.useState<boolean>(
-    getAIThoughtsIsOpenSavedValue()
-  );
-  const handleAIThoughtToggle = (newAIThoughtsOpenValue: boolean) => {
-    setAIThoughtsIsOpenSavedValue(newAIThoughtsOpenValue);
-    setIsAIThoughtsOpen(newAIThoughtsOpenValue);
-  };

  if (!searchResponse) {
    return null;
@ -95,19 +83,25 @@ export const SearchResultsDisplay = ({
    );
  }

-  if (answer === null && documents === null && quotes === null) {
-    if (error) {
-      return (
-        <div className="text-red-500 text-sm">
-          <div className="flex">
-            <AlertIcon size={16} className="text-red-500 my-auto mr-1" />
-            <p className="italic">{error}</p>
+  if (
+    answer === null &&
+    (documents === null || documents.length === 0) &&
+    quotes === null
+  ) {
+    return (
+      <div className="mt-4">
+        {error ? (
+          <div className="text-red-500 text-sm">
+            <div className="flex">
+              <AlertIcon size={16} className="text-red-500 my-auto mr-1" />
+              <p className="italic">{error}</p>
+            </div>
          </div>
-        </div>
-      );
-    }
-
-    return <div className="text-gray-300">No matching documents found.</div>;
+        ) : (
+          <div className="text-gray-300">No matching documents found.</div>
+        )}
+      </div>
+    );
  }

  const dedupedQuotes: Quote[] = [];
@ -130,13 +124,6 @@ export const SearchResultsDisplay = ({
    searchResponse.suggestedFlowType === FlowType.QUESTION_ANSWER ||
    defaultOverrides.forceDisplayQA;

-  let questionValidityCheckStatus: StatusOptions = "in-progress";
-  if (validQuestionResponse.answerable) {
-    questionValidityCheckStatus = "success";
-  } else if (validQuestionResponse.answerable === false) {
-    questionValidityCheckStatus = "failed";
-  }
-
  return (
    <>
      {popup}
@ -147,34 +134,17 @@ export const SearchResultsDisplay = ({
              <h2 className="text font-bold my-auto mb-1 w-full">AI Answer</h2>
            </div>

-            {!isPersona && (
-              <div className="mb-2 w-full">
-                <ResponseSection
-                  status={questionValidityCheckStatus}
-                  header={
-                    validQuestionResponse.answerable === null ? (
-                      <div className="flex ml-2">Evaluating question...</div>
-                    ) : (
-                      <div className="flex ml-2">AI thoughts</div>
-                    )
-                  }
-                  body={<div>{validQuestionResponse.reasoning}</div>}
-                  desiredOpenStatus={isAIThoughtsOpen}
-                  setDesiredOpenStatus={handleAIThoughtToggle}
-                />
-              </div>
-            )}
-
            <div className="mb-2 pt-1 border-t border-gray-700 w-full">
              <AnswerSection
                answer={answer}
                quotes={quotes}
                error={error}
-                isAnswerable={
-                  validQuestionResponse.answerable || (isPersona ? true : null)
+                nonAnswerableReason={
+                  validQuestionResponse.answerable === false && !isPersona
+                    ? validQuestionResponse.reasoning
+                    : ""
                }
                isFetching={isFetching}
-                aiThoughtsIsOpen={isAIThoughtsOpen}
              />
            </div>

--- a/web/src/components/search/SearchSection.tsx
+++ b/web/src/components/search/SearchSection.tsx
@ -34,6 +34,7 @@ const SEARCH_DEFAULT_OVERRIDES_START: SearchDefaultOverrides = {
 const VALID_QUESTION_RESPONSE_DEFAULT: ValidQuestionResponse = {
  reasoning: null,
  answerable: null,
+  error: null,
 };

 interface SearchSectionProps {
--- a/web/src/components/search/results/AnswerSection.tsx
+++ b/web/src/components/search/results/AnswerSection.tsx
@ -6,27 +6,26 @@ interface AnswerSectionProps {
  answer: string | null;
  quotes: Quote[] | null;
  error: string | null;
-  isAnswerable: boolean | null;
+  nonAnswerableReason: string | null;
  isFetching: boolean;
-  aiThoughtsIsOpen: boolean;
 }

 const AnswerHeader = ({
  answer,
  error,
  quotes,
-  isAnswerable,
+  nonAnswerableReason,
  isFetching,
 }: AnswerSectionProps) => {
  if (error) {
    return <>Error while building answer</>;
  } else if ((answer && quotes !== null) || !isFetching) {
-    if (isAnswerable === false) {
+    if (nonAnswerableReason) {
      return <>Best effort AI answer</>;
    }
    return <>AI answer</>;
  }
-  if (isAnswerable === false) {
+  if (nonAnswerableReason) {
    return <>Building best effort AI answer...</>;
  }
  return <>Building answer...</>;
@ -56,15 +55,10 @@ export const AnswerSection = (props: AnswerSectionProps) => {
  let status = "in-progress" as StatusOptions;
  if (props.error) {
    status = "failed";
-  }
-  // if AI thoughts is visible, don't mark this as a success until that section
-  // is complete
-  else if (!props.aiThoughtsIsOpen || props.isAnswerable !== null) {
-    if (props.isAnswerable === false) {
-      status = "warning";
-    } else if ((props.quotes !== null && props.answer) || !props.isFetching) {
-      status = "success";
-    }
+  } else if (props.nonAnswerableReason) {
+    status = "warning";
+  } else if ((props.quotes !== null && props.answer) || !props.isFetching) {
+    status = "success";
  }

  return (
@ -78,11 +72,18 @@ export const AnswerSection = (props: AnswerSectionProps) => {
      body={
        <div className="">
          <AnswerBody {...props} />
+          {props.nonAnswerableReason && !props.isFetching && (
+            <div className="text-gray-300 mt-4 text-sm">
+              <b className="font-medium">Warning:</b> the AI did not think this
+              question was answerable.{" "}
+              <div className="italic mt-1 ml-2">
+                {props.nonAnswerableReason}
+              </div>
+            </div>
+          )}
        </div>
      }
-      desiredOpenStatus={
-        props.aiThoughtsIsOpen ? props.isAnswerable !== null : true
-      }
+      desiredOpenStatus={true}
      isNotControllable={true}
    />
  );
--- a/web/src/lib/search/aiThoughtUtils.ts
+++ b/web/src/lib/search/aiThoughtUtils.ts
@ -1,16 +0,0 @@
-const IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY = "isAIThoughtsOpen";
-
-export const getAIThoughtsIsOpenSavedValue = () => {
-  // wrapping in `try / catch` to avoid SSR errors during development
-  try {
-    return (
-      localStorage.getItem(IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY) === "true"
-    );
-  } catch (e) {
-    return false;
-  }
-};
-
-export const setAIThoughtsIsOpenSavedValue = (isOpen: boolean) => {
-  localStorage.setItem(IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY, String(isOpen));
-};
--- a/web/src/lib/search/interfaces.ts
+++ b/web/src/lib/search/interfaces.ts
@ -116,4 +116,5 @@ export interface SearchRequestOverrides {
 export interface ValidQuestionResponse {
  answerable: boolean | null;
  reasoning: string | null;
+  error: string | null;
 }
--- a/web/src/lib/search/streamingQuestionValidation.ts
+++ b/web/src/lib/search/streamingQuestionValidation.ts
@ -1,4 +1,8 @@
-import { AnswerPiecePacket, ValidQuestionResponse } from "./interfaces";
+import {
+  AnswerPiecePacket,
+  ErrorMessagePacket,
+  ValidQuestionResponse,
+} from "./interfaces";
 import { processRawChunkString } from "./streamingUtils";

 export interface QuestionValidationArgs {
@ -39,6 +43,7 @@ export const questionValidationStreamed = async <T>({
  let previousPartialChunk: string | null = null;
  while (true) {
    const rawChunk = await reader?.read();
+    console.log(rawChunk);
    if (!rawChunk) {
      throw new Error("Unable to process chunk");
    }
@ -48,7 +53,7 @@ export const questionValidationStreamed = async <T>({
    }

    const [completedChunks, partialChunk] = processRawChunkString<
-      AnswerPiecePacket | ValidQuestionResponse
+      AnswerPiecePacket | ValidQuestionResponse | ErrorMessagePacket
    >(decoder.decode(value, { stream: true }), previousPartialChunk);
    if (!completedChunks.length && !partialChunk) {
      break;
@ -66,6 +71,10 @@ export const questionValidationStreamed = async <T>({
      if (Object.hasOwn(chunk, "answerable")) {
        update({ answerable: (chunk as ValidQuestionResponse).answerable });
      }
+
+      if (Object.hasOwn(chunk, "error")) {
+        update({ error: (chunk as ErrorMessagePacket).error });
+      }
    });
  }
 };