mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-26 16:01:09 +02:00
Remove AI Thoughts by default (#783)
- Removes AI Thoughts by default - only shows when validation fails - Removes punctuation "words" from queries in addition to stopwords (Vespa ignores punctuation anyways) - Fixes Vespa deletion script for larger doc counts
This commit is contained in:
parent
fcb7f6fcc0
commit
37daf4f3e4
@ -57,7 +57,7 @@ from danswer.indexing.models import InferenceChunk
|
|||||||
from danswer.search.models import IndexFilters
|
from danswer.search.models import IndexFilters
|
||||||
from danswer.search.search_runner import embed_query
|
from danswer.search.search_runner import embed_query
|
||||||
from danswer.search.search_runner import query_processing
|
from danswer.search.search_runner import query_processing
|
||||||
from danswer.search.search_runner import remove_stop_words
|
from danswer.search.search_runner import remove_stop_words_and_punctuation
|
||||||
from danswer.utils.batching import batch_generator
|
from danswer.utils.batching import batch_generator
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
|
|
||||||
@ -732,7 +732,9 @@ class VespaIndex(DocumentIndex):
|
|||||||
query_embedding = embed_query(query)
|
query_embedding = embed_query(query)
|
||||||
|
|
||||||
query_keywords = (
|
query_keywords = (
|
||||||
" ".join(remove_stop_words(query)) if edit_keyword_query else query
|
" ".join(remove_stop_words_and_punctuation(query))
|
||||||
|
if edit_keyword_query
|
||||||
|
else query
|
||||||
)
|
)
|
||||||
|
|
||||||
params: dict[str, str | int] = {
|
params: dict[str, str | int] = {
|
||||||
@ -773,7 +775,9 @@ class VespaIndex(DocumentIndex):
|
|||||||
query_embedding = embed_query(query)
|
query_embedding = embed_query(query)
|
||||||
|
|
||||||
query_keywords = (
|
query_keywords = (
|
||||||
" ".join(remove_stop_words(query)) if edit_keyword_query else query
|
" ".join(remove_stop_words_and_punctuation(query))
|
||||||
|
if edit_keyword_query
|
||||||
|
else query
|
||||||
)
|
)
|
||||||
|
|
||||||
params: dict[str, str | int | float] = {
|
params: dict[str, str | int | float] = {
|
||||||
|
@ -4,7 +4,7 @@ from danswer.search.models import QueryFlow
|
|||||||
from danswer.search.models import SearchType
|
from danswer.search.models import SearchType
|
||||||
from danswer.search.search_nlp_models import get_default_tokenizer
|
from danswer.search.search_nlp_models import get_default_tokenizer
|
||||||
from danswer.search.search_nlp_models import IntentModel
|
from danswer.search.search_nlp_models import IntentModel
|
||||||
from danswer.search.search_runner import remove_stop_words
|
from danswer.search.search_runner import remove_stop_words_and_punctuation
|
||||||
from danswer.server.models import HelperResponse
|
from danswer.server.models import HelperResponse
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
from danswer.utils.timing import log_function_time
|
from danswer.utils.timing import log_function_time
|
||||||
@ -67,7 +67,7 @@ def recommend_search_flow(
|
|||||||
|
|
||||||
# Heuristics based decisions
|
# Heuristics based decisions
|
||||||
words = query.split()
|
words = query.split()
|
||||||
non_stopwords = remove_stop_words(query)
|
non_stopwords = remove_stop_words_and_punctuation(query)
|
||||||
non_stopword_percent = len(non_stopwords) / len(words)
|
non_stopword_percent = len(non_stopwords) / len(words)
|
||||||
|
|
||||||
# UNK tokens -> suggest Keyword (still may be valid QA)
|
# UNK tokens -> suggest Keyword (still may be valid QA)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import string
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from collections.abc import Iterator
|
from collections.abc import Iterator
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
@ -55,17 +56,21 @@ def lemmatize_text(text: str) -> list[str]:
|
|||||||
return [lemmatizer.lemmatize(word) for word in word_tokens]
|
return [lemmatizer.lemmatize(word) for word in word_tokens]
|
||||||
|
|
||||||
|
|
||||||
def remove_stop_words(text: str) -> list[str]:
|
def remove_stop_words_and_punctuation(text: str) -> list[str]:
|
||||||
stop_words = set(stopwords.words("english"))
|
stop_words = set(stopwords.words("english"))
|
||||||
word_tokens = word_tokenize(text)
|
word_tokens = word_tokenize(text)
|
||||||
text_trimmed = [word for word in word_tokens if word.casefold() not in stop_words]
|
text_trimmed = [
|
||||||
|
word
|
||||||
|
for word in word_tokens
|
||||||
|
if (word.casefold() not in stop_words and word not in string.punctuation)
|
||||||
|
]
|
||||||
return text_trimmed or word_tokens
|
return text_trimmed or word_tokens
|
||||||
|
|
||||||
|
|
||||||
def query_processing(
|
def query_processing(
|
||||||
query: str,
|
query: str,
|
||||||
) -> str:
|
) -> str:
|
||||||
query = " ".join(remove_stop_words(query))
|
query = " ".join(remove_stop_words_and_punctuation(query))
|
||||||
query = " ".join(lemmatize_text(query))
|
query = " ".join(lemmatize_text(query))
|
||||||
return query
|
return query
|
||||||
|
|
||||||
|
@ -16,9 +16,20 @@ logger = setup_logger()
|
|||||||
|
|
||||||
|
|
||||||
def wipe_vespa_index() -> None:
|
def wipe_vespa_index() -> None:
|
||||||
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
|
continuation = None
|
||||||
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
|
should_continue = True
|
||||||
response.raise_for_status()
|
while should_continue:
|
||||||
|
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
|
||||||
|
if continuation:
|
||||||
|
params = {**params, "continuation": continuation}
|
||||||
|
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
response_json = response.json()
|
||||||
|
print(response_json)
|
||||||
|
|
||||||
|
continuation = response_json.get("continuation")
|
||||||
|
should_continue = bool(continuation)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -14,13 +14,10 @@ import { DocumentDisplay } from "./DocumentDisplay";
|
|||||||
import { ResponseSection, StatusOptions } from "./results/ResponseSection";
|
import { ResponseSection, StatusOptions } from "./results/ResponseSection";
|
||||||
import { QuotesSection } from "./results/QuotesSection";
|
import { QuotesSection } from "./results/QuotesSection";
|
||||||
import { AnswerSection } from "./results/AnswerSection";
|
import { AnswerSection } from "./results/AnswerSection";
|
||||||
import {
|
|
||||||
getAIThoughtsIsOpenSavedValue,
|
|
||||||
setAIThoughtsIsOpenSavedValue,
|
|
||||||
} from "@/lib/search/aiThoughtUtils";
|
|
||||||
import { ThreeDots } from "react-loader-spinner";
|
import { ThreeDots } from "react-loader-spinner";
|
||||||
import { usePopup } from "../admin/connectors/Popup";
|
import { usePopup } from "../admin/connectors/Popup";
|
||||||
import { AlertIcon } from "../icons/icons";
|
import { AlertIcon } from "../icons/icons";
|
||||||
|
import Link from "next/link";
|
||||||
|
|
||||||
const removeDuplicateDocs = (documents: DanswerDocument[]) => {
|
const removeDuplicateDocs = (documents: DanswerDocument[]) => {
|
||||||
const seen = new Set<string>();
|
const seen = new Set<string>();
|
||||||
@ -45,29 +42,20 @@ const getSelectedDocumentIds = (
|
|||||||
return selectedDocumentIds;
|
return selectedDocumentIds;
|
||||||
};
|
};
|
||||||
|
|
||||||
interface SearchResultsDisplayProps {
|
|
||||||
searchResponse: SearchResponse | null;
|
|
||||||
validQuestionResponse: ValidQuestionResponse;
|
|
||||||
isFetching: boolean;
|
|
||||||
defaultOverrides: SearchDefaultOverrides;
|
|
||||||
personaName?: string | null;
|
|
||||||
}
|
|
||||||
|
|
||||||
export const SearchResultsDisplay = ({
|
export const SearchResultsDisplay = ({
|
||||||
searchResponse,
|
searchResponse,
|
||||||
validQuestionResponse,
|
validQuestionResponse,
|
||||||
isFetching,
|
isFetching,
|
||||||
defaultOverrides,
|
defaultOverrides,
|
||||||
personaName = null,
|
personaName = null,
|
||||||
}: SearchResultsDisplayProps) => {
|
}: {
|
||||||
|
searchResponse: SearchResponse | null;
|
||||||
|
validQuestionResponse: ValidQuestionResponse;
|
||||||
|
isFetching: boolean;
|
||||||
|
defaultOverrides: SearchDefaultOverrides;
|
||||||
|
personaName?: string | null;
|
||||||
|
}) => {
|
||||||
const { popup, setPopup } = usePopup();
|
const { popup, setPopup } = usePopup();
|
||||||
const [isAIThoughtsOpen, setIsAIThoughtsOpen] = React.useState<boolean>(
|
|
||||||
getAIThoughtsIsOpenSavedValue()
|
|
||||||
);
|
|
||||||
const handleAIThoughtToggle = (newAIThoughtsOpenValue: boolean) => {
|
|
||||||
setAIThoughtsIsOpenSavedValue(newAIThoughtsOpenValue);
|
|
||||||
setIsAIThoughtsOpen(newAIThoughtsOpenValue);
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!searchResponse) {
|
if (!searchResponse) {
|
||||||
return null;
|
return null;
|
||||||
@ -95,19 +83,25 @@ export const SearchResultsDisplay = ({
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (answer === null && documents === null && quotes === null) {
|
if (
|
||||||
if (error) {
|
answer === null &&
|
||||||
return (
|
(documents === null || documents.length === 0) &&
|
||||||
<div className="text-red-500 text-sm">
|
quotes === null
|
||||||
<div className="flex">
|
) {
|
||||||
<AlertIcon size={16} className="text-red-500 my-auto mr-1" />
|
return (
|
||||||
<p className="italic">{error}</p>
|
<div className="mt-4">
|
||||||
|
{error ? (
|
||||||
|
<div className="text-red-500 text-sm">
|
||||||
|
<div className="flex">
|
||||||
|
<AlertIcon size={16} className="text-red-500 my-auto mr-1" />
|
||||||
|
<p className="italic">{error}</p>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
) : (
|
||||||
);
|
<div className="text-gray-300">No matching documents found.</div>
|
||||||
}
|
)}
|
||||||
|
</div>
|
||||||
return <div className="text-gray-300">No matching documents found.</div>;
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const dedupedQuotes: Quote[] = [];
|
const dedupedQuotes: Quote[] = [];
|
||||||
@ -130,13 +124,6 @@ export const SearchResultsDisplay = ({
|
|||||||
searchResponse.suggestedFlowType === FlowType.QUESTION_ANSWER ||
|
searchResponse.suggestedFlowType === FlowType.QUESTION_ANSWER ||
|
||||||
defaultOverrides.forceDisplayQA;
|
defaultOverrides.forceDisplayQA;
|
||||||
|
|
||||||
let questionValidityCheckStatus: StatusOptions = "in-progress";
|
|
||||||
if (validQuestionResponse.answerable) {
|
|
||||||
questionValidityCheckStatus = "success";
|
|
||||||
} else if (validQuestionResponse.answerable === false) {
|
|
||||||
questionValidityCheckStatus = "failed";
|
|
||||||
}
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
{popup}
|
{popup}
|
||||||
@ -147,34 +134,17 @@ export const SearchResultsDisplay = ({
|
|||||||
<h2 className="text font-bold my-auto mb-1 w-full">AI Answer</h2>
|
<h2 className="text font-bold my-auto mb-1 w-full">AI Answer</h2>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{!isPersona && (
|
|
||||||
<div className="mb-2 w-full">
|
|
||||||
<ResponseSection
|
|
||||||
status={questionValidityCheckStatus}
|
|
||||||
header={
|
|
||||||
validQuestionResponse.answerable === null ? (
|
|
||||||
<div className="flex ml-2">Evaluating question...</div>
|
|
||||||
) : (
|
|
||||||
<div className="flex ml-2">AI thoughts</div>
|
|
||||||
)
|
|
||||||
}
|
|
||||||
body={<div>{validQuestionResponse.reasoning}</div>}
|
|
||||||
desiredOpenStatus={isAIThoughtsOpen}
|
|
||||||
setDesiredOpenStatus={handleAIThoughtToggle}
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
)}
|
|
||||||
|
|
||||||
<div className="mb-2 pt-1 border-t border-gray-700 w-full">
|
<div className="mb-2 pt-1 border-t border-gray-700 w-full">
|
||||||
<AnswerSection
|
<AnswerSection
|
||||||
answer={answer}
|
answer={answer}
|
||||||
quotes={quotes}
|
quotes={quotes}
|
||||||
error={error}
|
error={error}
|
||||||
isAnswerable={
|
nonAnswerableReason={
|
||||||
validQuestionResponse.answerable || (isPersona ? true : null)
|
validQuestionResponse.answerable === false && !isPersona
|
||||||
|
? validQuestionResponse.reasoning
|
||||||
|
: ""
|
||||||
}
|
}
|
||||||
isFetching={isFetching}
|
isFetching={isFetching}
|
||||||
aiThoughtsIsOpen={isAIThoughtsOpen}
|
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -34,6 +34,7 @@ const SEARCH_DEFAULT_OVERRIDES_START: SearchDefaultOverrides = {
|
|||||||
const VALID_QUESTION_RESPONSE_DEFAULT: ValidQuestionResponse = {
|
const VALID_QUESTION_RESPONSE_DEFAULT: ValidQuestionResponse = {
|
||||||
reasoning: null,
|
reasoning: null,
|
||||||
answerable: null,
|
answerable: null,
|
||||||
|
error: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
interface SearchSectionProps {
|
interface SearchSectionProps {
|
||||||
|
@ -6,27 +6,26 @@ interface AnswerSectionProps {
|
|||||||
answer: string | null;
|
answer: string | null;
|
||||||
quotes: Quote[] | null;
|
quotes: Quote[] | null;
|
||||||
error: string | null;
|
error: string | null;
|
||||||
isAnswerable: boolean | null;
|
nonAnswerableReason: string | null;
|
||||||
isFetching: boolean;
|
isFetching: boolean;
|
||||||
aiThoughtsIsOpen: boolean;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const AnswerHeader = ({
|
const AnswerHeader = ({
|
||||||
answer,
|
answer,
|
||||||
error,
|
error,
|
||||||
quotes,
|
quotes,
|
||||||
isAnswerable,
|
nonAnswerableReason,
|
||||||
isFetching,
|
isFetching,
|
||||||
}: AnswerSectionProps) => {
|
}: AnswerSectionProps) => {
|
||||||
if (error) {
|
if (error) {
|
||||||
return <>Error while building answer</>;
|
return <>Error while building answer</>;
|
||||||
} else if ((answer && quotes !== null) || !isFetching) {
|
} else if ((answer && quotes !== null) || !isFetching) {
|
||||||
if (isAnswerable === false) {
|
if (nonAnswerableReason) {
|
||||||
return <>Best effort AI answer</>;
|
return <>Best effort AI answer</>;
|
||||||
}
|
}
|
||||||
return <>AI answer</>;
|
return <>AI answer</>;
|
||||||
}
|
}
|
||||||
if (isAnswerable === false) {
|
if (nonAnswerableReason) {
|
||||||
return <>Building best effort AI answer...</>;
|
return <>Building best effort AI answer...</>;
|
||||||
}
|
}
|
||||||
return <>Building answer...</>;
|
return <>Building answer...</>;
|
||||||
@ -56,15 +55,10 @@ export const AnswerSection = (props: AnswerSectionProps) => {
|
|||||||
let status = "in-progress" as StatusOptions;
|
let status = "in-progress" as StatusOptions;
|
||||||
if (props.error) {
|
if (props.error) {
|
||||||
status = "failed";
|
status = "failed";
|
||||||
}
|
} else if (props.nonAnswerableReason) {
|
||||||
// if AI thoughts is visible, don't mark this as a success until that section
|
status = "warning";
|
||||||
// is complete
|
} else if ((props.quotes !== null && props.answer) || !props.isFetching) {
|
||||||
else if (!props.aiThoughtsIsOpen || props.isAnswerable !== null) {
|
status = "success";
|
||||||
if (props.isAnswerable === false) {
|
|
||||||
status = "warning";
|
|
||||||
} else if ((props.quotes !== null && props.answer) || !props.isFetching) {
|
|
||||||
status = "success";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@ -78,11 +72,18 @@ export const AnswerSection = (props: AnswerSectionProps) => {
|
|||||||
body={
|
body={
|
||||||
<div className="">
|
<div className="">
|
||||||
<AnswerBody {...props} />
|
<AnswerBody {...props} />
|
||||||
|
{props.nonAnswerableReason && !props.isFetching && (
|
||||||
|
<div className="text-gray-300 mt-4 text-sm">
|
||||||
|
<b className="font-medium">Warning:</b> the AI did not think this
|
||||||
|
question was answerable.{" "}
|
||||||
|
<div className="italic mt-1 ml-2">
|
||||||
|
{props.nonAnswerableReason}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
}
|
}
|
||||||
desiredOpenStatus={
|
desiredOpenStatus={true}
|
||||||
props.aiThoughtsIsOpen ? props.isAnswerable !== null : true
|
|
||||||
}
|
|
||||||
isNotControllable={true}
|
isNotControllable={true}
|
||||||
/>
|
/>
|
||||||
);
|
);
|
||||||
|
@ -1,16 +0,0 @@
|
|||||||
const IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY = "isAIThoughtsOpen";
|
|
||||||
|
|
||||||
export const getAIThoughtsIsOpenSavedValue = () => {
|
|
||||||
// wrapping in `try / catch` to avoid SSR errors during development
|
|
||||||
try {
|
|
||||||
return (
|
|
||||||
localStorage.getItem(IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY) === "true"
|
|
||||||
);
|
|
||||||
} catch (e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export const setAIThoughtsIsOpenSavedValue = (isOpen: boolean) => {
|
|
||||||
localStorage.setItem(IS_AI_THOUGHTS_OPEN_LOCAL_STORAGE_KEY, String(isOpen));
|
|
||||||
};
|
|
@ -116,4 +116,5 @@ export interface SearchRequestOverrides {
|
|||||||
export interface ValidQuestionResponse {
|
export interface ValidQuestionResponse {
|
||||||
answerable: boolean | null;
|
answerable: boolean | null;
|
||||||
reasoning: string | null;
|
reasoning: string | null;
|
||||||
|
error: string | null;
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,8 @@
|
|||||||
import { AnswerPiecePacket, ValidQuestionResponse } from "./interfaces";
|
import {
|
||||||
|
AnswerPiecePacket,
|
||||||
|
ErrorMessagePacket,
|
||||||
|
ValidQuestionResponse,
|
||||||
|
} from "./interfaces";
|
||||||
import { processRawChunkString } from "./streamingUtils";
|
import { processRawChunkString } from "./streamingUtils";
|
||||||
|
|
||||||
export interface QuestionValidationArgs {
|
export interface QuestionValidationArgs {
|
||||||
@ -39,6 +43,7 @@ export const questionValidationStreamed = async <T>({
|
|||||||
let previousPartialChunk: string | null = null;
|
let previousPartialChunk: string | null = null;
|
||||||
while (true) {
|
while (true) {
|
||||||
const rawChunk = await reader?.read();
|
const rawChunk = await reader?.read();
|
||||||
|
console.log(rawChunk);
|
||||||
if (!rawChunk) {
|
if (!rawChunk) {
|
||||||
throw new Error("Unable to process chunk");
|
throw new Error("Unable to process chunk");
|
||||||
}
|
}
|
||||||
@ -48,7 +53,7 @@ export const questionValidationStreamed = async <T>({
|
|||||||
}
|
}
|
||||||
|
|
||||||
const [completedChunks, partialChunk] = processRawChunkString<
|
const [completedChunks, partialChunk] = processRawChunkString<
|
||||||
AnswerPiecePacket | ValidQuestionResponse
|
AnswerPiecePacket | ValidQuestionResponse | ErrorMessagePacket
|
||||||
>(decoder.decode(value, { stream: true }), previousPartialChunk);
|
>(decoder.decode(value, { stream: true }), previousPartialChunk);
|
||||||
if (!completedChunks.length && !partialChunk) {
|
if (!completedChunks.length && !partialChunk) {
|
||||||
break;
|
break;
|
||||||
@ -66,6 +71,10 @@ export const questionValidationStreamed = async <T>({
|
|||||||
if (Object.hasOwn(chunk, "answerable")) {
|
if (Object.hasOwn(chunk, "answerable")) {
|
||||||
update({ answerable: (chunk as ValidQuestionResponse).answerable });
|
update({ answerable: (chunk as ValidQuestionResponse).answerable });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Object.hasOwn(chunk, "error")) {
|
||||||
|
update({ error: (chunk as ErrorMessagePacket).error });
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user