Remove AI Thoughts by default (#783)

- Removes AI Thoughts by default - only shows when validation fails - Removes punctuation "words" from queries in addition to stopwords (Vespa ignores punctuation anyways) - Fixes Vespa deletion script for larger doc counts
2025-09-20 13:05:49 +02:00 · 2023-11-29 01:00:53 -08:00
parent fcb7f6fcc0
commit 37daf4f3e4
10 changed files with 92 additions and 106 deletions
--- a/backend/danswer/document_index/vespa/index.py
+++ b/backend/danswer/document_index/vespa/index.py
@@ -57,7 +57,7 @@ from danswer.indexing.models import InferenceChunk
 from danswer.search.models import IndexFilters
 from danswer.search.search_runner import embed_query
 from danswer.search.search_runner import query_processing
-from danswer.search.search_runner import remove_stop_words
+from danswer.search.search_runner import remove_stop_words_and_punctuation
 from danswer.utils.batching import batch_generator
 from danswer.utils.logger import setup_logger
@@ -732,7 +732,9 @@ class VespaIndex(DocumentIndex):
        query_embedding = embed_query(query)
        query_keywords = (
-            " ".join(remove_stop_words(query)) if edit_keyword_query else query
+            " ".join(remove_stop_words_and_punctuation(query))
            if edit_keyword_query
            else query
        )
        params: dict[str, str | int] = {
@@ -773,7 +775,9 @@ class VespaIndex(DocumentIndex):
        query_embedding = embed_query(query)
        query_keywords = (
-            " ".join(remove_stop_words(query)) if edit_keyword_query else query
+            " ".join(remove_stop_words_and_punctuation(query))
            if edit_keyword_query
            else query
        )
        params: dict[str, str | int | float] = {
--- a/backend/danswer/search/danswer_helper.py
+++ b/backend/danswer/search/danswer_helper.py
@@ -4,7 +4,7 @@ from danswer.search.models import QueryFlow
 from danswer.search.models import SearchType
 from danswer.search.search_nlp_models import get_default_tokenizer
 from danswer.search.search_nlp_models import IntentModel
-from danswer.search.search_runner import remove_stop_words
+from danswer.search.search_runner import remove_stop_words_and_punctuation
 from danswer.server.models import HelperResponse
 from danswer.utils.logger import setup_logger
 from danswer.utils.timing import log_function_time
@@ -67,7 +67,7 @@ def recommend_search_flow(
    # Heuristics based decisions
    words = query.split()
-    non_stopwords = remove_stop_words(query)
+    non_stopwords = remove_stop_words_and_punctuation(query)
    non_stopword_percent = len(non_stopwords) / len(words)
    # UNK tokens -> suggest Keyword (still may be valid QA)
--- a/backend/danswer/search/search_runner.py
+++ b/backend/danswer/search/search_runner.py
@@ -1,3 +1,4 @@
 import string
 from collections.abc import Callable
 from collections.abc import Iterator
 from copy import deepcopy
@@ -55,17 +56,21 @@ def lemmatize_text(text: str) -> list[str]:
    return [lemmatizer.lemmatize(word) for word in word_tokens]
-def remove_stop_words(text: str) -> list[str]:
+def remove_stop_words_and_punctuation(text: str) -> list[str]:
    stop_words = set(stopwords.words("english"))
    word_tokens = word_tokenize(text)
-    text_trimmed = [word for word in word_tokens if word.casefold() not in stop_words]
+    text_trimmed = [
        word
        for word in word_tokens
        if (word.casefold() not in stop_words and word not in string.punctuation)
    ]
    return text_trimmed or word_tokens
 def query_processing(
    query: str,
 ) -> str:
-    query = " ".join(remove_stop_words(query))
+    query = " ".join(remove_stop_words_and_punctuation(query))
    query = " ".join(lemmatize_text(query))
    return query
--- a/backend/scripts/reset_indexes.py
+++ b/backend/scripts/reset_indexes.py
@@ -16,9 +16,20 @@ logger = setup_logger()
 def wipe_vespa_index() -> None:
-    params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
+    continuation = None
-    response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
+    should_continue = True
-    response.raise_for_status()
+    while should_continue:
        params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
        if continuation:
            params = {**params, "continuation": continuation}
        response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
        response.raise_for_status()
        response_json = response.json()
        print(response_json)
        continuation = response_json.get("continuation")
        should_continue = bool(continuation)
 if __name__ == "__main__":
--- a/web/src/components/search/SearchResultsDisplay.tsx
+++ b/web/src/components/search/SearchResultsDisplay.tsx
@@ -14,13 +14,10 @@ import { DocumentDisplay } from "./DocumentDisplay";
 import { ResponseSection, StatusOptions } from "./results/ResponseSection";
 import { QuotesSection } from "./results/QuotesSection";
 import { AnswerSection } from "./results/AnswerSection";
 import {
  getAIThoughtsIsOpenSavedValue,
  setAIThoughtsIsOpenSavedValue,
 } from "@/lib/search/aiThoughtUtils";
 import { ThreeDots } from "react-loader-spinner";
 import { usePopup } from "../admin/connectors/Popup";
 import { AlertIcon } from "../icons/icons";
 import Link from "next/link";
 const removeDuplicateDocs = (documents: DanswerDocument[]) => {
  const seen = new Set<string>();
@@ -45,29 +42,20 @@ const getSelectedDocumentIds = (
  return selectedDocumentIds;
 };
 interface SearchResultsDisplayProps {
  searchResponse: SearchResponse | null;
  validQuestionResponse: ValidQuestionResponse;
  isFetching: boolean;
  defaultOverrides: SearchDefaultOverrides;
  personaName?: string | null;
 }
 export const SearchResultsDisplay = ({
  searchResponse,
  validQuestionResponse,
  isFetching,
  defaultOverrides,
  personaName = null,
-}: SearchResultsDisplayProps) => {
+}: {
  searchResponse: SearchResponse | null;
  validQuestionResponse: ValidQuestionResponse;
  isFetching: boolean;
  defaultOverrides: SearchDefaultOverrides;
  personaName?: string | null;
 }) => {
  const { popup, setPopup } = usePopup();
  const [isAIThoughtsOpen, setIsAIThoughtsOpen] = React.useState<boolean>(
    getAIThoughtsIsOpenSavedValue()
  );
  const handleAIThoughtToggle = (newAIThoughtsOpenValue: boolean) => {
    setAIThoughtsIsOpenSavedValue(newAIThoughtsOpenValue);
    setIsAIThoughtsOpen(newAIThoughtsOpenValue);
  };
  if (!searchResponse) {
    return null;
@@ -95,19 +83,25 @@ export const SearchResultsDisplay = ({
    );
  }
-  if (answer === null && documents === null && quotes === null) {
+  if (
-    if (error) {
+    answer === null &&
-      return (
+    (documents === null || documents.length === 0) &&
-        <div className="text-red-500 text-sm">
+    quotes === null
-          <div className="flex">
+  ) {
-            <AlertIcon size={16} className="text-red-500 my-auto mr-1" />
+    return (
-            <p className="italic">{error}</p>
+      <div className="mt-4">
        {error ? (
          <div className="text-red-500 text-sm">
            <div className="flex">
              <AlertIcon size={16} className="text-red-500 my-auto mr-1" />
              <p className="italic">{error}</p>
            </div>
          </div>
-        </div>
+        ) : (
-      );
+          <div className="text-gray-300">No matching documents found.</div>
-    }
+        )}
-
+      </div>
-    return <div className="text-gray-300">No matching documents found.</div>;
+    );
  }
  const dedupedQuotes: Quote[] = [];
@@ -130,13 +124,6 @@ export const SearchResultsDisplay = ({
    searchResponse.suggestedFlowType === FlowType.QUESTION_ANSWER ||
    defaultOverrides.forceDisplayQA;
  let questionValidityCheckStatus: StatusOptions = "in-progress";
  if (validQuestionResponse.answerable) {
    questionValidityCheckStatus = "success";
  } else if (validQuestionResponse.answerable === false) {
    questionValidityCheckStatus = "failed";
  }
  return (
    <>
      {popup}
@@ -147,34 +134,17 @@ export const SearchResultsDisplay = ({
              <h2 className="text font-bold my-auto mb-1 w-full">AI Answer</h2>
            </div>
            {!isPersona && (
              <div className="mb-2 w-full">
                <ResponseSection
                  status={questionValidityCheckStatus}
                  header={
                    validQuestionResponse.answerable === null ? (
                      <div className="flex ml-2">Evaluating question...</div>
                    ) : (
                      <div className="flex ml-2">AI thoughts</div>
                    )
                  }
                  body={<div>{validQuestionResponse.reasoning}</div>}
                  desiredOpenStatus={isAIThoughtsOpen}
                  setDesiredOpenStatus={handleAIThoughtToggle}
                />
              </div>
            )}
            <div className="mb-2 pt-1 border-t border-gray-700 w-full">
              <AnswerSection
                answer={answer}
                quotes={quotes}
                error={error}
-                isAnswerable={
+                nonAnswerableReason={
-                  validQuestionResponse.answerable || (isPersona ? true : null)
+                  validQuestionResponse.answerable === false && !isPersona
                    ? validQuestionResponse.reasoning
                    : ""
                }
                isFetching={isFetching}
                aiThoughtsIsOpen={isAIThoughtsOpen}
              />
            </div>
--- a/web/src/components/search/SearchSection.tsx
+++ b/web/src/components/search/SearchSection.tsx
@@ -34,6 +34,7 @@ const SEARCH_DEFAULT_OVERRIDES_START: SearchDefaultOverrides = {
 const VALID_QUESTION_RESPONSE_DEFAULT: ValidQuestionResponse = {
  reasoning: null,
  answerable: null,
  error: null,
 };
 interface SearchSectionProps {
--- a/web/src/components/search/results/AnswerSection.tsx
+++ b/web/src/components/search/results/AnswerSection.tsx
@@ -6,27 +6,26 @@ interface AnswerSectionProps {
  answer: string | null;
  quotes: Quote[] | null;
  error: string | null;
-  isAnswerable: boolean | null;
+  nonAnswerableReason: string | null;
  isFetching: boolean;
  aiThoughtsIsOpen: boolean;
 }
 const AnswerHeader = ({
  answer,
  error,
  quotes,
-  isAnswerable,
+  nonAnswerableReason,
  isFetching,
 }: AnswerSectionProps) => {
  if (error) {
    return <>Error while building answer</>;
  } else if ((answer && quotes !== null) || !isFetching) {
-    if (isAnswerable === false) {
+    if (nonAnswerableReason) {
      return <>Best effort AI answer</>;
    }
    return <>AI answer</>;
  }
-  if (isAnswerable === false) {
+  if (nonAnswerableReason) {
    return <>Building best effort AI answer...</>;
  }
  return <>Building answer...</>;
@@ -56,15 +55,10 @@ export const AnswerSection = (props: AnswerSectionProps) => {
  let status = "in-progress" as StatusOptions;
  if (props.error) {
    status = "failed";
-  }
+  } else if (props.nonAnswerableReason) {
-  // if AI thoughts is visible, don't mark this as a success until that section
+    status = "warning";
-  // is complete
+  } else if ((props.quotes !== null && props.answer) || !props.isFetching) {
-  else if (!props.aiThoughtsIsOpen || props.isAnswerable !== null) {
+    status = "success";
    if (props.isAnswerable === false) {
      status = "warning";
    } else if ((props.quotes !== null && props.answer) || !props.isFetching) {
      status = "success";
    }
  }
  return (
@@ -78,11 +72,18 @@ export const AnswerSection = (props: AnswerSectionProps) => {
      body={
        <div className="">
          <AnswerBody {...props} />
          {props.nonAnswerableReason && !props.isFetching && (
            <div className="text-gray-300 mt-4 text-sm">
              <b className="font-medium">Warning:</b> the AI did not think this
              question was answerable.{" "}
              <div className="italic mt-1 ml-2">
                {props.nonAnswerableReason}
              </div>
            </div>
          )}
        </div>
      }
-      desiredOpenStatus={
+      desiredOpenStatus={true}
        props.aiThoughtsIsOpen ? props.isAnswerable !== null : true
      }
      isNotControllable={true}
    />
  );
--- a/web/src/lib/search/aiThoughtUtils.ts
+++ b/web/src/lib/search/aiThoughtUtils.ts
@@ -1,16 +0,0 @@
 const IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY = "isAIThoughtsOpen";
 export const getAIThoughtsIsOpenSavedValue = () => {
  // wrapping in `try / catch` to avoid SSR errors during development
  try {
    return (
      localStorage.getItem(IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY) === "true"
    );
  } catch (e) {
    return false;
  }
 };
 export const setAIThoughtsIsOpenSavedValue = (isOpen: boolean) => {
  localStorage.setItem(IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY, String(isOpen));
 };
--- a/web/src/lib/search/interfaces.ts
+++ b/web/src/lib/search/interfaces.ts
@@ -116,4 +116,5 @@ export interface SearchRequestOverrides {
 export interface ValidQuestionResponse {
  answerable: boolean | null;
  reasoning: string | null;
  error: string | null;
 }
--- a/web/src/lib/search/streamingQuestionValidation.ts
+++ b/web/src/lib/search/streamingQuestionValidation.ts
@@ -1,4 +1,8 @@
-import { AnswerPiecePacket, ValidQuestionResponse } from "./interfaces";
+import {
  AnswerPiecePacket,
  ErrorMessagePacket,
  ValidQuestionResponse,
 } from "./interfaces";
 import { processRawChunkString } from "./streamingUtils";
 export interface QuestionValidationArgs {
@@ -39,6 +43,7 @@ export const questionValidationStreamed = async <T>({
  let previousPartialChunk: string | null = null;
  while (true) {
    const rawChunk = await reader?.read();
    console.log(rawChunk);
    if (!rawChunk) {
      throw new Error("Unable to process chunk");
    }
@@ -48,7 +53,7 @@ export const questionValidationStreamed = async <T>({
    }
    const [completedChunks, partialChunk] = processRawChunkString<
-      AnswerPiecePacket | ValidQuestionResponse
+      AnswerPiecePacket | ValidQuestionResponse | ErrorMessagePacket
    >(decoder.decode(value, { stream: true }), previousPartialChunk);
    if (!completedChunks.length && !partialChunk) {
      break;
@@ -66,6 +71,10 @@ export const questionValidationStreamed = async <T>({
      if (Object.hasOwn(chunk, "answerable")) {
        update({ answerable: (chunk as ValidQuestionResponse).answerable });
      }
      if (Object.hasOwn(chunk, "error")) {
        update({ error: (chunk as ErrorMessagePacket).error });
      }
    });
  }
 };