diff --git a/backend/danswer/document_index/vespa/index.py b/backend/danswer/document_index/vespa/index.py index 6f2bbe1e3..d82c59eb6 100644 --- a/backend/danswer/document_index/vespa/index.py +++ b/backend/danswer/document_index/vespa/index.py @@ -57,7 +57,7 @@ from danswer.indexing.models import InferenceChunk from danswer.search.models import IndexFilters from danswer.search.search_runner import embed_query from danswer.search.search_runner import query_processing -from danswer.search.search_runner import remove_stop_words +from danswer.search.search_runner import remove_stop_words_and_punctuation from danswer.utils.batching import batch_generator from danswer.utils.logger import setup_logger @@ -732,7 +732,9 @@ class VespaIndex(DocumentIndex): query_embedding = embed_query(query) query_keywords = ( - " ".join(remove_stop_words(query)) if edit_keyword_query else query + " ".join(remove_stop_words_and_punctuation(query)) + if edit_keyword_query + else query ) params: dict[str, str | int] = { @@ -773,7 +775,9 @@ class VespaIndex(DocumentIndex): query_embedding = embed_query(query) query_keywords = ( - " ".join(remove_stop_words(query)) if edit_keyword_query else query + " ".join(remove_stop_words_and_punctuation(query)) + if edit_keyword_query + else query ) params: dict[str, str | int | float] = { diff --git a/backend/danswer/search/danswer_helper.py b/backend/danswer/search/danswer_helper.py index 216375bc5..0ccdbb68b 100644 --- a/backend/danswer/search/danswer_helper.py +++ b/backend/danswer/search/danswer_helper.py @@ -4,7 +4,7 @@ from danswer.search.models import QueryFlow from danswer.search.models import SearchType from danswer.search.search_nlp_models import get_default_tokenizer from danswer.search.search_nlp_models import IntentModel -from danswer.search.search_runner import remove_stop_words +from danswer.search.search_runner import remove_stop_words_and_punctuation from danswer.server.models import HelperResponse from danswer.utils.logger import setup_logger from danswer.utils.timing import log_function_time @@ -67,7 +67,7 @@ def recommend_search_flow( # Heuristics based decisions words = query.split() - non_stopwords = remove_stop_words(query) + non_stopwords = remove_stop_words_and_punctuation(query) non_stopword_percent = len(non_stopwords) / len(words) # UNK tokens -> suggest Keyword (still may be valid QA) diff --git a/backend/danswer/search/search_runner.py b/backend/danswer/search/search_runner.py index 1dbca5a3f..285061d47 100644 --- a/backend/danswer/search/search_runner.py +++ b/backend/danswer/search/search_runner.py @@ -1,3 +1,4 @@ +import string from collections.abc import Callable from collections.abc import Iterator from copy import deepcopy @@ -55,17 +56,21 @@ def lemmatize_text(text: str) -> list[str]: return [lemmatizer.lemmatize(word) for word in word_tokens] -def remove_stop_words(text: str) -> list[str]: +def remove_stop_words_and_punctuation(text: str) -> list[str]: stop_words = set(stopwords.words("english")) word_tokens = word_tokenize(text) - text_trimmed = [word for word in word_tokens if word.casefold() not in stop_words] + text_trimmed = [ + word + for word in word_tokens + if (word.casefold() not in stop_words and word not in string.punctuation) + ] return text_trimmed or word_tokens def query_processing( query: str, ) -> str: - query = " ".join(remove_stop_words(query)) + query = " ".join(remove_stop_words_and_punctuation(query)) query = " ".join(lemmatize_text(query)) return query diff --git a/backend/scripts/reset_indexes.py b/backend/scripts/reset_indexes.py index bff988c30..4ec8d9bf3 100644 --- a/backend/scripts/reset_indexes.py +++ b/backend/scripts/reset_indexes.py @@ -16,9 +16,20 @@ logger = setup_logger() def wipe_vespa_index() -> None: - params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME} - response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params) - response.raise_for_status() + continuation = None + should_continue = True + while should_continue: + params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME} + if continuation: + params = {**params, "continuation": continuation} + response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params) + response.raise_for_status() + + response_json = response.json() + print(response_json) + + continuation = response_json.get("continuation") + should_continue = bool(continuation) if __name__ == "__main__": diff --git a/web/src/components/search/SearchResultsDisplay.tsx b/web/src/components/search/SearchResultsDisplay.tsx index 8ffd1f535..c221cc684 100644 --- a/web/src/components/search/SearchResultsDisplay.tsx +++ b/web/src/components/search/SearchResultsDisplay.tsx @@ -14,13 +14,10 @@ import { DocumentDisplay } from "./DocumentDisplay"; import { ResponseSection, StatusOptions } from "./results/ResponseSection"; import { QuotesSection } from "./results/QuotesSection"; import { AnswerSection } from "./results/AnswerSection"; -import { - getAIThoughtsIsOpenSavedValue, - setAIThoughtsIsOpenSavedValue, -} from "@/lib/search/aiThoughtUtils"; import { ThreeDots } from "react-loader-spinner"; import { usePopup } from "../admin/connectors/Popup"; import { AlertIcon } from "../icons/icons"; +import Link from "next/link"; const removeDuplicateDocs = (documents: DanswerDocument[]) => { const seen = new Set(); @@ -45,29 +42,20 @@ const getSelectedDocumentIds = ( return selectedDocumentIds; }; -interface SearchResultsDisplayProps { - searchResponse: SearchResponse | null; - validQuestionResponse: ValidQuestionResponse; - isFetching: boolean; - defaultOverrides: SearchDefaultOverrides; - personaName?: string | null; -} - export const SearchResultsDisplay = ({ searchResponse, validQuestionResponse, isFetching, defaultOverrides, personaName = null, -}: SearchResultsDisplayProps) => { +}: { + searchResponse: SearchResponse | null; + validQuestionResponse: ValidQuestionResponse; + isFetching: boolean; + defaultOverrides: SearchDefaultOverrides; + personaName?: string | null; +}) => { const { popup, setPopup } = usePopup(); - const [isAIThoughtsOpen, setIsAIThoughtsOpen] = React.useState( - getAIThoughtsIsOpenSavedValue() - ); - const handleAIThoughtToggle = (newAIThoughtsOpenValue: boolean) => { - setAIThoughtsIsOpenSavedValue(newAIThoughtsOpenValue); - setIsAIThoughtsOpen(newAIThoughtsOpenValue); - }; if (!searchResponse) { return null; @@ -95,19 +83,25 @@ export const SearchResultsDisplay = ({ ); } - if (answer === null && documents === null && quotes === null) { - if (error) { - return ( -
-
- -

{error}

+ if ( + answer === null && + (documents === null || documents.length === 0) && + quotes === null + ) { + return ( +
+ {error ? ( +
+
+ +

{error}

+
-
- ); - } - - return
No matching documents found.
; + ) : ( +
No matching documents found.
+ )} +
+ ); } const dedupedQuotes: Quote[] = []; @@ -130,13 +124,6 @@ export const SearchResultsDisplay = ({ searchResponse.suggestedFlowType === FlowType.QUESTION_ANSWER || defaultOverrides.forceDisplayQA; - let questionValidityCheckStatus: StatusOptions = "in-progress"; - if (validQuestionResponse.answerable) { - questionValidityCheckStatus = "success"; - } else if (validQuestionResponse.answerable === false) { - questionValidityCheckStatus = "failed"; - } - return ( <> {popup} @@ -147,34 +134,17 @@ export const SearchResultsDisplay = ({

AI Answer

- {!isPersona && ( -
- Evaluating question...
- ) : ( -
AI thoughts
- ) - } - body={
{validQuestionResponse.reasoning}
} - desiredOpenStatus={isAIThoughtsOpen} - setDesiredOpenStatus={handleAIThoughtToggle} - /> - - )} -
diff --git a/web/src/components/search/SearchSection.tsx b/web/src/components/search/SearchSection.tsx index ecd526743..13e7d9f46 100644 --- a/web/src/components/search/SearchSection.tsx +++ b/web/src/components/search/SearchSection.tsx @@ -34,6 +34,7 @@ const SEARCH_DEFAULT_OVERRIDES_START: SearchDefaultOverrides = { const VALID_QUESTION_RESPONSE_DEFAULT: ValidQuestionResponse = { reasoning: null, answerable: null, + error: null, }; interface SearchSectionProps { diff --git a/web/src/components/search/results/AnswerSection.tsx b/web/src/components/search/results/AnswerSection.tsx index f65c08dda..bc3e1b1a0 100644 --- a/web/src/components/search/results/AnswerSection.tsx +++ b/web/src/components/search/results/AnswerSection.tsx @@ -6,27 +6,26 @@ interface AnswerSectionProps { answer: string | null; quotes: Quote[] | null; error: string | null; - isAnswerable: boolean | null; + nonAnswerableReason: string | null; isFetching: boolean; - aiThoughtsIsOpen: boolean; } const AnswerHeader = ({ answer, error, quotes, - isAnswerable, + nonAnswerableReason, isFetching, }: AnswerSectionProps) => { if (error) { return <>Error while building answer; } else if ((answer && quotes !== null) || !isFetching) { - if (isAnswerable === false) { + if (nonAnswerableReason) { return <>Best effort AI answer; } return <>AI answer; } - if (isAnswerable === false) { + if (nonAnswerableReason) { return <>Building best effort AI answer...; } return <>Building answer...; @@ -56,15 +55,10 @@ export const AnswerSection = (props: AnswerSectionProps) => { let status = "in-progress" as StatusOptions; if (props.error) { status = "failed"; - } - // if AI thoughts is visible, don't mark this as a success until that section - // is complete - else if (!props.aiThoughtsIsOpen || props.isAnswerable !== null) { - if (props.isAnswerable === false) { - status = "warning"; - } else if ((props.quotes !== null && props.answer) || !props.isFetching) { - status = "success"; - } + } else if (props.nonAnswerableReason) { + status = "warning"; + } else if ((props.quotes !== null && props.answer) || !props.isFetching) { + status = "success"; } return ( @@ -78,11 +72,18 @@ export const AnswerSection = (props: AnswerSectionProps) => { body={
+ {props.nonAnswerableReason && !props.isFetching && ( +
+ Warning: the AI did not think this + question was answerable.{" "} +
+ {props.nonAnswerableReason} +
+
+ )}
} - desiredOpenStatus={ - props.aiThoughtsIsOpen ? props.isAnswerable !== null : true - } + desiredOpenStatus={true} isNotControllable={true} /> ); diff --git a/web/src/lib/search/aiThoughtUtils.ts b/web/src/lib/search/aiThoughtUtils.ts deleted file mode 100644 index 2635aa768..000000000 --- a/web/src/lib/search/aiThoughtUtils.ts +++ /dev/null @@ -1,16 +0,0 @@ -const IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY = "isAIThoughtsOpen"; - -export const getAIThoughtsIsOpenSavedValue = () => { - // wrapping in `try / catch` to avoid SSR errors during development - try { - return ( - localStorage.getItem(IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY) === "true" - ); - } catch (e) { - return false; - } -}; - -export const setAIThoughtsIsOpenSavedValue = (isOpen: boolean) => { - localStorage.setItem(IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY, String(isOpen)); -}; diff --git a/web/src/lib/search/interfaces.ts b/web/src/lib/search/interfaces.ts index 6ece37907..4721b43db 100644 --- a/web/src/lib/search/interfaces.ts +++ b/web/src/lib/search/interfaces.ts @@ -116,4 +116,5 @@ export interface SearchRequestOverrides { export interface ValidQuestionResponse { answerable: boolean | null; reasoning: string | null; + error: string | null; } diff --git a/web/src/lib/search/streamingQuestionValidation.ts b/web/src/lib/search/streamingQuestionValidation.ts index 855416a67..a5a7b5a6f 100644 --- a/web/src/lib/search/streamingQuestionValidation.ts +++ b/web/src/lib/search/streamingQuestionValidation.ts @@ -1,4 +1,8 @@ -import { AnswerPiecePacket, ValidQuestionResponse } from "./interfaces"; +import { + AnswerPiecePacket, + ErrorMessagePacket, + ValidQuestionResponse, +} from "./interfaces"; import { processRawChunkString } from "./streamingUtils"; export interface QuestionValidationArgs { @@ -39,6 +43,7 @@ export const questionValidationStreamed = async ({ let previousPartialChunk: string | null = null; while (true) { const rawChunk = await reader?.read(); + console.log(rawChunk); if (!rawChunk) { throw new Error("Unable to process chunk"); } @@ -48,7 +53,7 @@ export const questionValidationStreamed = async ({ } const [completedChunks, partialChunk] = processRawChunkString< - AnswerPiecePacket | ValidQuestionResponse + AnswerPiecePacket | ValidQuestionResponse | ErrorMessagePacket >(decoder.decode(value, { stream: true }), previousPartialChunk); if (!completedChunks.length && !partialChunk) { break; @@ -66,6 +71,10 @@ export const questionValidationStreamed = async ({ if (Object.hasOwn(chunk, "answerable")) { update({ answerable: (chunk as ValidQuestionResponse).answerable }); } + + if (Object.hasOwn(chunk, "error")) { + update({ error: (chunk as ErrorMessagePacket).error }); + } }); } };