Remove AI Thoughts by default (#783)

- Removes AI Thoughts by default - only shows when validation fails
- Removes punctuation "words" from queries in addition to stopwords (Vespa ignores punctuation anyways)
- Fixes Vespa deletion script for larger doc counts
This commit is contained in:
Chris Weaver 2023-11-29 01:00:53 -08:00 committed by GitHub
parent fcb7f6fcc0
commit 37daf4f3e4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 92 additions and 106 deletions

View File

@ -57,7 +57,7 @@ from danswer.indexing.models import InferenceChunk
from danswer.search.models import IndexFilters
from danswer.search.search_runner import embed_query
from danswer.search.search_runner import query_processing
from danswer.search.search_runner import remove_stop_words
from danswer.search.search_runner import remove_stop_words_and_punctuation
from danswer.utils.batching import batch_generator
from danswer.utils.logger import setup_logger
@ -732,7 +732,9 @@ class VespaIndex(DocumentIndex):
query_embedding = embed_query(query)
query_keywords = (
" ".join(remove_stop_words(query)) if edit_keyword_query else query
" ".join(remove_stop_words_and_punctuation(query))
if edit_keyword_query
else query
)
params: dict[str, str | int] = {
@ -773,7 +775,9 @@ class VespaIndex(DocumentIndex):
query_embedding = embed_query(query)
query_keywords = (
" ".join(remove_stop_words(query)) if edit_keyword_query else query
" ".join(remove_stop_words_and_punctuation(query))
if edit_keyword_query
else query
)
params: dict[str, str | int | float] = {

View File

@ -4,7 +4,7 @@ from danswer.search.models import QueryFlow
from danswer.search.models import SearchType
from danswer.search.search_nlp_models import get_default_tokenizer
from danswer.search.search_nlp_models import IntentModel
from danswer.search.search_runner import remove_stop_words
from danswer.search.search_runner import remove_stop_words_and_punctuation
from danswer.server.models import HelperResponse
from danswer.utils.logger import setup_logger
from danswer.utils.timing import log_function_time
@ -67,7 +67,7 @@ def recommend_search_flow(
# Heuristics based decisions
words = query.split()
non_stopwords = remove_stop_words(query)
non_stopwords = remove_stop_words_and_punctuation(query)
non_stopword_percent = len(non_stopwords) / len(words)
# UNK tokens -> suggest Keyword (still may be valid QA)

View File

@ -1,3 +1,4 @@
import string
from collections.abc import Callable
from collections.abc import Iterator
from copy import deepcopy
@ -55,17 +56,21 @@ def lemmatize_text(text: str) -> list[str]:
return [lemmatizer.lemmatize(word) for word in word_tokens]
def remove_stop_words(text: str) -> list[str]:
def remove_stop_words_and_punctuation(text: str) -> list[str]:
stop_words = set(stopwords.words("english"))
word_tokens = word_tokenize(text)
text_trimmed = [word for word in word_tokens if word.casefold() not in stop_words]
text_trimmed = [
word
for word in word_tokens
if (word.casefold() not in stop_words and word not in string.punctuation)
]
return text_trimmed or word_tokens
def query_processing(
query: str,
) -> str:
query = " ".join(remove_stop_words(query))
query = " ".join(remove_stop_words_and_punctuation(query))
query = " ".join(lemmatize_text(query))
return query

View File

@ -16,9 +16,20 @@ logger = setup_logger()
def wipe_vespa_index() -> None:
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
response.raise_for_status()
continuation = None
should_continue = True
while should_continue:
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
if continuation:
params = {**params, "continuation": continuation}
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
response.raise_for_status()
response_json = response.json()
print(response_json)
continuation = response_json.get("continuation")
should_continue = bool(continuation)
if __name__ == "__main__":

View File

@ -14,13 +14,10 @@ import { DocumentDisplay } from "./DocumentDisplay";
import { ResponseSection, StatusOptions } from "./results/ResponseSection";
import { QuotesSection } from "./results/QuotesSection";
import { AnswerSection } from "./results/AnswerSection";
import {
getAIThoughtsIsOpenSavedValue,
setAIThoughtsIsOpenSavedValue,
} from "@/lib/search/aiThoughtUtils";
import { ThreeDots } from "react-loader-spinner";
import { usePopup } from "../admin/connectors/Popup";
import { AlertIcon } from "../icons/icons";
import Link from "next/link";
const removeDuplicateDocs = (documents: DanswerDocument[]) => {
const seen = new Set<string>();
@ -45,29 +42,20 @@ const getSelectedDocumentIds = (
return selectedDocumentIds;
};
interface SearchResultsDisplayProps {
searchResponse: SearchResponse | null;
validQuestionResponse: ValidQuestionResponse;
isFetching: boolean;
defaultOverrides: SearchDefaultOverrides;
personaName?: string | null;
}
export const SearchResultsDisplay = ({
searchResponse,
validQuestionResponse,
isFetching,
defaultOverrides,
personaName = null,
}: SearchResultsDisplayProps) => {
}: {
searchResponse: SearchResponse | null;
validQuestionResponse: ValidQuestionResponse;
isFetching: boolean;
defaultOverrides: SearchDefaultOverrides;
personaName?: string | null;
}) => {
const { popup, setPopup } = usePopup();
const [isAIThoughtsOpen, setIsAIThoughtsOpen] = React.useState<boolean>(
getAIThoughtsIsOpenSavedValue()
);
const handleAIThoughtToggle = (newAIThoughtsOpenValue: boolean) => {
setAIThoughtsIsOpenSavedValue(newAIThoughtsOpenValue);
setIsAIThoughtsOpen(newAIThoughtsOpenValue);
};
if (!searchResponse) {
return null;
@ -95,19 +83,25 @@ export const SearchResultsDisplay = ({
);
}
if (answer === null && documents === null && quotes === null) {
if (error) {
return (
<div className="text-red-500 text-sm">
<div className="flex">
<AlertIcon size={16} className="text-red-500 my-auto mr-1" />
<p className="italic">{error}</p>
if (
answer === null &&
(documents === null || documents.length === 0) &&
quotes === null
) {
return (
<div className="mt-4">
{error ? (
<div className="text-red-500 text-sm">
<div className="flex">
<AlertIcon size={16} className="text-red-500 my-auto mr-1" />
<p className="italic">{error}</p>
</div>
</div>
</div>
);
}
return <div className="text-gray-300">No matching documents found.</div>;
) : (
<div className="text-gray-300">No matching documents found.</div>
)}
</div>
);
}
const dedupedQuotes: Quote[] = [];
@ -130,13 +124,6 @@ export const SearchResultsDisplay = ({
searchResponse.suggestedFlowType === FlowType.QUESTION_ANSWER ||
defaultOverrides.forceDisplayQA;
let questionValidityCheckStatus: StatusOptions = "in-progress";
if (validQuestionResponse.answerable) {
questionValidityCheckStatus = "success";
} else if (validQuestionResponse.answerable === false) {
questionValidityCheckStatus = "failed";
}
return (
<>
{popup}
@ -147,34 +134,17 @@ export const SearchResultsDisplay = ({
<h2 className="text font-bold my-auto mb-1 w-full">AI Answer</h2>
</div>
{!isPersona && (
<div className="mb-2 w-full">
<ResponseSection
status={questionValidityCheckStatus}
header={
validQuestionResponse.answerable === null ? (
<div className="flex ml-2">Evaluating question...</div>
) : (
<div className="flex ml-2">AI thoughts</div>
)
}
body={<div>{validQuestionResponse.reasoning}</div>}
desiredOpenStatus={isAIThoughtsOpen}
setDesiredOpenStatus={handleAIThoughtToggle}
/>
</div>
)}
<div className="mb-2 pt-1 border-t border-gray-700 w-full">
<AnswerSection
answer={answer}
quotes={quotes}
error={error}
isAnswerable={
validQuestionResponse.answerable || (isPersona ? true : null)
nonAnswerableReason={
validQuestionResponse.answerable === false && !isPersona
? validQuestionResponse.reasoning
: ""
}
isFetching={isFetching}
aiThoughtsIsOpen={isAIThoughtsOpen}
/>
</div>

View File

@ -34,6 +34,7 @@ const SEARCH_DEFAULT_OVERRIDES_START: SearchDefaultOverrides = {
const VALID_QUESTION_RESPONSE_DEFAULT: ValidQuestionResponse = {
reasoning: null,
answerable: null,
error: null,
};
interface SearchSectionProps {

View File

@ -6,27 +6,26 @@ interface AnswerSectionProps {
answer: string | null;
quotes: Quote[] | null;
error: string | null;
isAnswerable: boolean | null;
nonAnswerableReason: string | null;
isFetching: boolean;
aiThoughtsIsOpen: boolean;
}
const AnswerHeader = ({
answer,
error,
quotes,
isAnswerable,
nonAnswerableReason,
isFetching,
}: AnswerSectionProps) => {
if (error) {
return <>Error while building answer</>;
} else if ((answer && quotes !== null) || !isFetching) {
if (isAnswerable === false) {
if (nonAnswerableReason) {
return <>Best effort AI answer</>;
}
return <>AI answer</>;
}
if (isAnswerable === false) {
if (nonAnswerableReason) {
return <>Building best effort AI answer...</>;
}
return <>Building answer...</>;
@ -56,15 +55,10 @@ export const AnswerSection = (props: AnswerSectionProps) => {
let status = "in-progress" as StatusOptions;
if (props.error) {
status = "failed";
}
// if AI thoughts is visible, don't mark this as a success until that section
// is complete
else if (!props.aiThoughtsIsOpen || props.isAnswerable !== null) {
if (props.isAnswerable === false) {
status = "warning";
} else if ((props.quotes !== null && props.answer) || !props.isFetching) {
status = "success";
}
} else if (props.nonAnswerableReason) {
status = "warning";
} else if ((props.quotes !== null && props.answer) || !props.isFetching) {
status = "success";
}
return (
@ -78,11 +72,18 @@ export const AnswerSection = (props: AnswerSectionProps) => {
body={
<div className="">
<AnswerBody {...props} />
{props.nonAnswerableReason && !props.isFetching && (
<div className="text-gray-300 mt-4 text-sm">
<b className="font-medium">Warning:</b> the AI did not think this
question was answerable.{" "}
<div className="italic mt-1 ml-2">
{props.nonAnswerableReason}
</div>
</div>
)}
</div>
}
desiredOpenStatus={
props.aiThoughtsIsOpen ? props.isAnswerable !== null : true
}
desiredOpenStatus={true}
isNotControllable={true}
/>
);

View File

@ -1,16 +0,0 @@
const IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY = "isAIThoughtsOpen";
export const getAIThoughtsIsOpenSavedValue = () => {
// wrapping in `try / catch` to avoid SSR errors during development
try {
return (
localStorage.getItem(IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY) === "true"
);
} catch (e) {
return false;
}
};
export const setAIThoughtsIsOpenSavedValue = (isOpen: boolean) => {
localStorage.setItem(IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY, String(isOpen));
};

View File

@ -116,4 +116,5 @@ export interface SearchRequestOverrides {
export interface ValidQuestionResponse {
answerable: boolean | null;
reasoning: string | null;
error: string | null;
}

View File

@ -1,4 +1,8 @@
import { AnswerPiecePacket, ValidQuestionResponse } from "./interfaces";
import {
AnswerPiecePacket,
ErrorMessagePacket,
ValidQuestionResponse,
} from "./interfaces";
import { processRawChunkString } from "./streamingUtils";
export interface QuestionValidationArgs {
@ -39,6 +43,7 @@ export const questionValidationStreamed = async <T>({
let previousPartialChunk: string | null = null;
while (true) {
const rawChunk = await reader?.read();
console.log(rawChunk);
if (!rawChunk) {
throw new Error("Unable to process chunk");
}
@ -48,7 +53,7 @@ export const questionValidationStreamed = async <T>({
}
const [completedChunks, partialChunk] = processRawChunkString<
AnswerPiecePacket | ValidQuestionResponse
AnswerPiecePacket | ValidQuestionResponse | ErrorMessagePacket
>(decoder.decode(value, { stream: true }), previousPartialChunk);
if (!completedChunks.length && !partialChunk) {
break;
@ -66,6 +71,10 @@ export const questionValidationStreamed = async <T>({
if (Object.hasOwn(chunk, "answerable")) {
update({ answerable: (chunk as ValidQuestionResponse).answerable });
}
if (Object.hasOwn(chunk, "error")) {
update({ error: (chunk as ErrorMessagePacket).error });
}
});
}
};