mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-12 22:23:01 +02:00
Keyword search (#88)
* Add keyword search support * Fix filters display * Make documents appear immediately
This commit is contained in:
@ -123,6 +123,7 @@ def get_application() -> FastAPI:
|
|||||||
logger.info("Verifying query preprocessing (NLTK) data is downloaded")
|
logger.info("Verifying query preprocessing (NLTK) data is downloaded")
|
||||||
nltk.download("stopwords")
|
nltk.download("stopwords")
|
||||||
nltk.download("wordnet")
|
nltk.download("wordnet")
|
||||||
|
nltk.download("punkt")
|
||||||
|
|
||||||
logger.info("Verifying public credential exists.")
|
logger.info("Verifying public credential exists.")
|
||||||
create_initial_public_credential()
|
create_initial_public_credential()
|
||||||
|
@ -6,7 +6,9 @@ import { DISABLE_AUTH } from "@/lib/constants";
|
|||||||
import { HealthCheckBanner } from "@/components/health/healthcheck";
|
import { HealthCheckBanner } from "@/components/health/healthcheck";
|
||||||
import { ApiKeyModal } from "@/components/openai/ApiKeyModal";
|
import { ApiKeyModal } from "@/components/openai/ApiKeyModal";
|
||||||
import { buildUrl } from "@/lib/utilsSS";
|
import { buildUrl } from "@/lib/utilsSS";
|
||||||
import { User } from "@/lib/types";
|
import { Connector, User } from "@/lib/types";
|
||||||
|
import { cookies } from "next/headers";
|
||||||
|
import { SearchType } from "@/components/search/SearchTypeSelector";
|
||||||
|
|
||||||
export default async function Home() {
|
export default async function Home() {
|
||||||
const tasks = [
|
const tasks = [
|
||||||
@ -24,13 +26,23 @@ export default async function Home() {
|
|||||||
return redirect("/auth/login");
|
return redirect("/auth/login");
|
||||||
}
|
}
|
||||||
|
|
||||||
let connectors = null;
|
let connectors: Connector<any>[] = [];
|
||||||
if (connectorsResponse.ok) {
|
if (connectorsResponse.ok) {
|
||||||
connectors = await connectorsResponse.json();
|
connectors = await connectorsResponse.json();
|
||||||
} else {
|
} else {
|
||||||
console.log(`Failed to fetch connectors - ${connectorsResponse.status}`);
|
console.log(`Failed to fetch connectors - ${connectorsResponse.status}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// needs to be done in a non-client side component due to nextjs
|
||||||
|
const storedSearchType = cookies().get("searchType")?.value as
|
||||||
|
| keyof typeof SearchType
|
||||||
|
| undefined;
|
||||||
|
let searchTypeDefault: SearchType =
|
||||||
|
storedSearchType !== undefined &&
|
||||||
|
SearchType.hasOwnProperty(storedSearchType)
|
||||||
|
? SearchType[storedSearchType]
|
||||||
|
: SearchType.SEMANTIC; // default to semantic search
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<Header user={user} />
|
<Header user={user} />
|
||||||
@ -40,7 +52,10 @@ export default async function Home() {
|
|||||||
<ApiKeyModal />
|
<ApiKeyModal />
|
||||||
<div className="px-24 pt-10 flex flex-col items-center min-h-screen bg-gray-900 text-gray-100">
|
<div className="px-24 pt-10 flex flex-col items-center min-h-screen bg-gray-900 text-gray-100">
|
||||||
<div className="w-full">
|
<div className="w-full">
|
||||||
<SearchSection connectors={connectors} />
|
<SearchSection
|
||||||
|
connectors={connectors}
|
||||||
|
defaultSearchType={searchTypeDefault}
|
||||||
|
/>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</>
|
</>
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
import React from "react";
|
import React from "react";
|
||||||
import { Source } from "./interfaces";
|
|
||||||
import { getSourceIcon } from "../source";
|
import { getSourceIcon } from "../source";
|
||||||
import { Funnel } from "@phosphor-icons/react";
|
import { Funnel } from "@phosphor-icons/react";
|
||||||
import { ValidSources } from "@/lib/types";
|
import { ValidSources } from "@/lib/types";
|
||||||
|
import { Source } from "@/lib/search/interfaces";
|
||||||
|
|
||||||
const sources: Source[] = [
|
const sources: Source[] = [
|
||||||
{ displayName: "Google Drive", internalName: "google_drive" },
|
{ displayName: "Google Drive", internalName: "google_drive" },
|
||||||
@ -34,31 +34,33 @@ export function SourceSelector({
|
|||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="bg-gray-900 p-6">
|
<div className="bg-gray-900 px-6">
|
||||||
<div className="flex mb-3 mx-2">
|
<div className="flex mb-2 pb-1 pl-2 border-b border-gray-800 mx-2">
|
||||||
<h2 className="font-bold my-auto">Filters</h2>
|
<h2 className="font-bold my-auto">Filters</h2>
|
||||||
<Funnel className="my-auto ml-2" size="20" />
|
<Funnel className="my-auto ml-2" size="20" />
|
||||||
</div>
|
</div>
|
||||||
{sources
|
<div className="px-2">
|
||||||
.filter((source) => existingSources.includes(source.internalName))
|
{sources
|
||||||
.map((source) => (
|
.filter((source) => existingSources.includes(source.internalName))
|
||||||
<div
|
.map((source) => (
|
||||||
key={source.internalName}
|
<div
|
||||||
className={
|
key={source.internalName}
|
||||||
"flex cursor-pointer w-full items-center text-white " +
|
className={
|
||||||
"py-1.5 my-1.5 rounded-lg px-2 " +
|
"flex cursor-pointer w-full items-center text-white " +
|
||||||
(selectedSources.includes(source)
|
"py-1.5 my-1.5 rounded-lg px-2 " +
|
||||||
? "bg-gray-700"
|
(selectedSources.includes(source)
|
||||||
: "hover:bg-gray-800")
|
? "bg-gray-700"
|
||||||
}
|
: "hover:bg-gray-800")
|
||||||
onClick={() => handleSelect(source)}
|
}
|
||||||
>
|
onClick={() => handleSelect(source)}
|
||||||
{getSourceIcon(source.internalName, "16")}
|
>
|
||||||
<span className="ml-2 text-sm text-gray-200">
|
{getSourceIcon(source.internalName, "16")}
|
||||||
{source.displayName}
|
<span className="ml-2 text-sm text-gray-200">
|
||||||
</span>
|
{source.displayName}
|
||||||
</div>
|
</span>
|
||||||
))}
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@ export const SearchBar: React.FC<SearchBarProps> = ({ onSearch }) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex justify-center py-4">
|
<div className="flex justify-center py-3">
|
||||||
<div className="flex items-center w-full border-2 border-gray-600 rounded px-4 py-2 focus-within:border-blue-500">
|
<div className="flex items-center w-full border-2 border-gray-600 rounded px-4 py-2 focus-within:border-blue-500">
|
||||||
<MagnifyingGlass className="text-gray-400" />
|
<MagnifyingGlass className="text-gray-400" />
|
||||||
<textarea
|
<textarea
|
||||||
|
@ -1,12 +1,17 @@
|
|||||||
import React from "react";
|
import React from "react";
|
||||||
import { Quote, Document, SearchResponse } from "./types";
|
|
||||||
import { getSourceIcon } from "../source";
|
import { getSourceIcon } from "../source";
|
||||||
import { LoadingAnimation } from "../Loading";
|
import { LoadingAnimation } from "../Loading";
|
||||||
import { InfoIcon } from "../icons/icons";
|
import { InfoIcon } from "../icons/icons";
|
||||||
|
import {
|
||||||
|
DanswerDocument,
|
||||||
|
SearchResponse,
|
||||||
|
Quote,
|
||||||
|
} from "@/lib/search/interfaces";
|
||||||
|
import { SearchType } from "./SearchTypeSelector";
|
||||||
|
|
||||||
const removeDuplicateDocs = (documents: Document[]) => {
|
const removeDuplicateDocs = (documents: DanswerDocument[]) => {
|
||||||
const seen = new Set<string>();
|
const seen = new Set<string>();
|
||||||
const output: Document[] = [];
|
const output: DanswerDocument[] = [];
|
||||||
documents.forEach((document) => {
|
documents.forEach((document) => {
|
||||||
if (
|
if (
|
||||||
document.semantic_identifier &&
|
document.semantic_identifier &&
|
||||||
@ -62,54 +67,58 @@ export const SearchResultsDisplay: React.FC<SearchResultsDisplayProps> = ({
|
|||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
{answer && (
|
{answer && (
|
||||||
<div className="p-4 border-2 rounded-md border-gray-700">
|
<div className="h-56">
|
||||||
<div className="flex mb-1">
|
<div className="p-4 border-2 rounded-md border-gray-700">
|
||||||
<h2 className="text font-bold my-auto">AI Answer</h2>
|
<div className="flex mb-1">
|
||||||
</div>
|
<h2 className="text font-bold my-auto">AI Answer</h2>
|
||||||
<p className="mb-4">{answer}</p>
|
</div>
|
||||||
|
<p className="mb-4">{answer}</p>
|
||||||
|
|
||||||
{quotes !== null && (
|
{quotes !== null && (
|
||||||
<>
|
<>
|
||||||
<h2 className="text-sm font-bold mb-2">Sources</h2>
|
<h2 className="text-sm font-bold mb-2">Sources</h2>
|
||||||
{isFetching && dedupedQuotes.length === 0 ? (
|
{isFetching && dedupedQuotes.length === 0 ? (
|
||||||
<LoadingAnimation text="Finding quotes" size="text-sm" />
|
<LoadingAnimation text="Finding quotes" size="text-sm" />
|
||||||
) : (
|
) : (
|
||||||
<div className="flex">
|
<div className="flex">
|
||||||
{dedupedQuotes.map((quoteInfo) => (
|
{dedupedQuotes.map((quoteInfo) => (
|
||||||
<a
|
<a
|
||||||
key={quoteInfo.document_id}
|
key={quoteInfo.document_id}
|
||||||
className="p-2 ml-1 border border-gray-800 rounded-lg text-sm flex max-w-[280px] hover:bg-gray-800"
|
className="p-2 ml-1 border border-gray-800 rounded-lg text-sm flex max-w-[280px] hover:bg-gray-800"
|
||||||
href={quoteInfo.link}
|
href={quoteInfo.link}
|
||||||
target="_blank"
|
target="_blank"
|
||||||
rel="noopener noreferrer"
|
rel="noopener noreferrer"
|
||||||
>
|
>
|
||||||
{getSourceIcon(quoteInfo.source_type, "20")}
|
{getSourceIcon(quoteInfo.source_type, "20")}
|
||||||
<p className="truncate break-all ml-2">
|
<p className="truncate break-all ml-2">
|
||||||
{quoteInfo.semantic_identifier || quoteInfo.document_id}
|
{quoteInfo.semantic_identifier ||
|
||||||
</p>
|
quoteInfo.document_id}
|
||||||
</a>
|
</p>
|
||||||
))}
|
</a>
|
||||||
</div>
|
))}
|
||||||
)}
|
</div>
|
||||||
</>
|
)}
|
||||||
)}
|
</>
|
||||||
</div>
|
)}
|
||||||
)}
|
|
||||||
|
|
||||||
{!answer && !isFetching && (
|
|
||||||
<div className="flex">
|
|
||||||
<InfoIcon
|
|
||||||
size="20"
|
|
||||||
className="text-red-500 my-auto flex flex-shrink-0"
|
|
||||||
/>
|
|
||||||
<div className="text-red-500 text-xs my-auto ml-1">
|
|
||||||
GPT hurt itself in its confusion :(
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Only display docs once we're done fetching to avoid distracting from the AI answer*/}
|
{!answer &&
|
||||||
{!isFetching && documents && documents.length > 0 && (
|
!isFetching &&
|
||||||
|
searchResponse.searchType === SearchType.SEMANTIC && (
|
||||||
|
<div className="flex">
|
||||||
|
<InfoIcon
|
||||||
|
size="20"
|
||||||
|
className="text-red-500 my-auto flex flex-shrink-0"
|
||||||
|
/>
|
||||||
|
<div className="text-red-500 text-xs my-auto ml-1">
|
||||||
|
GPT hurt itself in its confusion :(
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{documents && documents.length > 0 && (
|
||||||
<div className="mt-4">
|
<div className="mt-4">
|
||||||
<div className="font-bold border-b mb-4 pb-1 border-gray-800">
|
<div className="font-bold border-b mb-4 pb-1 border-gray-800">
|
||||||
Results
|
Results
|
||||||
|
@ -3,169 +3,27 @@
|
|||||||
import { useState } from "react";
|
import { useState } from "react";
|
||||||
import { SearchBar } from "./SearchBar";
|
import { SearchBar } from "./SearchBar";
|
||||||
import { SearchResultsDisplay } from "./SearchResultsDisplay";
|
import { SearchResultsDisplay } from "./SearchResultsDisplay";
|
||||||
import { Quote, Document, SearchResponse } from "./types";
|
|
||||||
import { SourceSelector } from "./Filters";
|
import { SourceSelector } from "./Filters";
|
||||||
import { Source } from "./interfaces";
|
|
||||||
import { Connector } from "@/lib/types";
|
import { Connector } from "@/lib/types";
|
||||||
|
import { SearchType, SearchTypeSelector } from "./SearchTypeSelector";
|
||||||
const initialSearchResponse: SearchResponse = {
|
import {
|
||||||
answer: null,
|
DanswerDocument,
|
||||||
quotes: null,
|
Quote,
|
||||||
documents: null,
|
SearchResponse,
|
||||||
};
|
Source,
|
||||||
|
} from "@/lib/search/interfaces";
|
||||||
const processSingleChunk = (
|
import { aiSearchRequestStreamed } from "@/lib/search/ai";
|
||||||
chunk: string,
|
import Cookies from "js-cookie";
|
||||||
currPartialChunk: string | null
|
|
||||||
): [{ [key: string]: any } | null, string | null] => {
|
|
||||||
const completeChunk = chunk + (currPartialChunk || "");
|
|
||||||
try {
|
|
||||||
// every complete chunk should be valid JSON
|
|
||||||
const chunkJson = JSON.parse(chunk);
|
|
||||||
return [chunkJson, null];
|
|
||||||
} catch (err) {
|
|
||||||
// if it's not valid JSON, then it's probably an incomplete chunk
|
|
||||||
return [null, completeChunk];
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const processRawChunkString = (
|
|
||||||
rawChunkString: string,
|
|
||||||
previousPartialChunk: string | null
|
|
||||||
): [any[], string | null] => {
|
|
||||||
/* This is required because, in practice, we see that nginx does not send over
|
|
||||||
each chunk one at a time even with buffering turned off. Instead,
|
|
||||||
chunks are sometimes in batches or are sometimes incomplete */
|
|
||||||
if (!rawChunkString) {
|
|
||||||
return [[], null];
|
|
||||||
}
|
|
||||||
const chunkSections = rawChunkString
|
|
||||||
.split("\n")
|
|
||||||
.filter((chunk) => chunk.length > 0);
|
|
||||||
let parsedChunkSections: any[] = [];
|
|
||||||
let currPartialChunk = previousPartialChunk;
|
|
||||||
chunkSections.forEach((chunk) => {
|
|
||||||
const [processedChunk, partialChunk] = processSingleChunk(
|
|
||||||
chunk,
|
|
||||||
currPartialChunk
|
|
||||||
);
|
|
||||||
if (processedChunk) {
|
|
||||||
parsedChunkSections.push(processedChunk);
|
|
||||||
} else {
|
|
||||||
currPartialChunk = partialChunk;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return [parsedChunkSections, currPartialChunk];
|
|
||||||
};
|
|
||||||
|
|
||||||
interface SearchRequestStreamedArgs {
|
|
||||||
query: string;
|
|
||||||
sources: Source[];
|
|
||||||
updateCurrentAnswer: (val: string) => void;
|
|
||||||
updateQuotes: (quotes: Record<string, Quote>) => void;
|
|
||||||
updateDocs: (docs: Document[]) => void;
|
|
||||||
}
|
|
||||||
|
|
||||||
const searchRequestStreamed = async ({
|
|
||||||
query,
|
|
||||||
sources,
|
|
||||||
updateCurrentAnswer,
|
|
||||||
updateQuotes,
|
|
||||||
updateDocs,
|
|
||||||
}: SearchRequestStreamedArgs) => {
|
|
||||||
let answer = "";
|
|
||||||
let quotes: Record<string, Quote> | null = null;
|
|
||||||
let relevantDocuments: Document[] | null = null;
|
|
||||||
try {
|
|
||||||
const response = await fetch("/api/stream-direct-qa", {
|
|
||||||
method: "POST",
|
|
||||||
body: JSON.stringify({
|
|
||||||
query,
|
|
||||||
collection: "danswer_index",
|
|
||||||
...(sources.length > 0
|
|
||||||
? {
|
|
||||||
filters: [
|
|
||||||
{
|
|
||||||
source_type: sources.map((source) => source.internalName),
|
|
||||||
},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
: {}),
|
|
||||||
}),
|
|
||||||
headers: {
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
},
|
|
||||||
});
|
|
||||||
const reader = response.body?.getReader();
|
|
||||||
const decoder = new TextDecoder("utf-8");
|
|
||||||
|
|
||||||
let previousPartialChunk = null;
|
|
||||||
while (true) {
|
|
||||||
const rawChunk = await reader?.read();
|
|
||||||
if (!rawChunk) {
|
|
||||||
throw new Error("Unable to process chunk");
|
|
||||||
}
|
|
||||||
const { done, value } = rawChunk;
|
|
||||||
if (done) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process each chunk as it arrives
|
|
||||||
const [completedChunks, partialChunk] = processRawChunkString(
|
|
||||||
decoder.decode(value, { stream: true }),
|
|
||||||
previousPartialChunk
|
|
||||||
);
|
|
||||||
if (!completedChunks.length && !partialChunk) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (partialChunk) {
|
|
||||||
previousPartialChunk = partialChunk;
|
|
||||||
}
|
|
||||||
completedChunks.forEach((chunk) => {
|
|
||||||
// TODO: clean up response / this logic
|
|
||||||
const answerChunk = chunk.answer_data;
|
|
||||||
if (answerChunk) {
|
|
||||||
answer += answerChunk;
|
|
||||||
updateCurrentAnswer(answer);
|
|
||||||
} else if (chunk.answer_finished) {
|
|
||||||
// set quotes as non-null to signify that the answer is finished and
|
|
||||||
// we're now looking for quotes
|
|
||||||
updateQuotes({});
|
|
||||||
if (
|
|
||||||
!answer.endsWith(".") &&
|
|
||||||
!answer.endsWith("?") &&
|
|
||||||
!answer.endsWith("!")
|
|
||||||
) {
|
|
||||||
answer += ".";
|
|
||||||
updateCurrentAnswer(answer);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (Object.hasOwn(chunk, "top_documents")) {
|
|
||||||
const docs = chunk.top_documents as any[] | null;
|
|
||||||
if (docs) {
|
|
||||||
relevantDocuments = docs.map(
|
|
||||||
(doc) => JSON.parse(doc) as Document
|
|
||||||
);
|
|
||||||
updateDocs(relevantDocuments);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
quotes = chunk as Record<string, Quote>;
|
|
||||||
updateQuotes(quotes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
console.error("Fetch error:", err);
|
|
||||||
}
|
|
||||||
return { answer, quotes, relevantDocuments };
|
|
||||||
};
|
|
||||||
|
|
||||||
interface SearchSectionProps {
|
interface SearchSectionProps {
|
||||||
connectors: Connector<any>[];
|
connectors: Connector<any>[];
|
||||||
|
defaultSearchType: SearchType;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const SearchSection: React.FC<SearchSectionProps> = ({ connectors }) => {
|
export const SearchSection: React.FC<SearchSectionProps> = ({
|
||||||
|
connectors,
|
||||||
|
defaultSearchType,
|
||||||
|
}) => {
|
||||||
// Search
|
// Search
|
||||||
const [searchResponse, setSearchResponse] = useState<SearchResponse | null>(
|
const [searchResponse, setSearchResponse] = useState<SearchResponse | null>(
|
||||||
null
|
null
|
||||||
@ -175,47 +33,76 @@ export const SearchSection: React.FC<SearchSectionProps> = ({ connectors }) => {
|
|||||||
// Filters
|
// Filters
|
||||||
const [sources, setSources] = useState<Source[]>([]);
|
const [sources, setSources] = useState<Source[]>([]);
|
||||||
|
|
||||||
|
// Search Type
|
||||||
|
const [selectedSearchType, setSelectedSearchType] =
|
||||||
|
useState<SearchType>(defaultSearchType);
|
||||||
|
|
||||||
|
// helpers
|
||||||
|
const initialSearchResponse: SearchResponse = {
|
||||||
|
answer: null,
|
||||||
|
quotes: null,
|
||||||
|
documents: null,
|
||||||
|
searchType: selectedSearchType,
|
||||||
|
};
|
||||||
|
const updateCurrentAnswer = (answer: string) =>
|
||||||
|
setSearchResponse((prevState) => ({
|
||||||
|
...(prevState || initialSearchResponse),
|
||||||
|
answer,
|
||||||
|
}));
|
||||||
|
const updateQuotes = (quotes: Record<string, Quote>) =>
|
||||||
|
setSearchResponse((prevState) => ({
|
||||||
|
...(prevState || initialSearchResponse),
|
||||||
|
quotes,
|
||||||
|
}));
|
||||||
|
const updateDocs = (documents: DanswerDocument[]) =>
|
||||||
|
setSearchResponse((prevState) => ({
|
||||||
|
...(prevState || initialSearchResponse),
|
||||||
|
documents,
|
||||||
|
}));
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="relative max-w-[1500px] mx-auto">
|
<div className="relative max-w-[1500px] mx-auto">
|
||||||
<div className="absolute left-0 ml-24 hidden 2xl:block">
|
<div className="absolute left-0 ml-24 hidden 2xl:block">
|
||||||
<SourceSelector
|
{connectors.length > 0 && (
|
||||||
selectedSources={sources}
|
<SourceSelector
|
||||||
setSelectedSources={setSources}
|
selectedSources={sources}
|
||||||
existingSources={connectors.map((connector) => connector.source)}
|
setSelectedSources={setSources}
|
||||||
/>
|
existingSources={connectors.map((connector) => connector.source)}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
<div className="w-[800px] mx-auto">
|
<div className="w-[800px] mx-auto">
|
||||||
|
<SearchTypeSelector
|
||||||
|
selectedSearchType={selectedSearchType}
|
||||||
|
setSelectedSearchType={(searchType) => {
|
||||||
|
Cookies.set("searchType", searchType);
|
||||||
|
setSelectedSearchType(searchType);
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
|
||||||
<SearchBar
|
<SearchBar
|
||||||
onSearch={(query) => {
|
onSearch={async (query) => {
|
||||||
setIsFetching(true);
|
setIsFetching(true);
|
||||||
setSearchResponse({
|
setSearchResponse({
|
||||||
answer: null,
|
answer: null,
|
||||||
quotes: null,
|
quotes: null,
|
||||||
documents: null,
|
documents: null,
|
||||||
|
searchType: selectedSearchType,
|
||||||
});
|
});
|
||||||
searchRequestStreamed({
|
|
||||||
|
await aiSearchRequestStreamed({
|
||||||
query,
|
query,
|
||||||
sources,
|
sources,
|
||||||
updateCurrentAnswer: (answer) =>
|
updateCurrentAnswer,
|
||||||
setSearchResponse((prevState) => ({
|
updateQuotes,
|
||||||
...(prevState || initialSearchResponse),
|
updateDocs,
|
||||||
answer,
|
searchType: selectedSearchType,
|
||||||
})),
|
|
||||||
updateQuotes: (quotes) =>
|
|
||||||
setSearchResponse((prevState) => ({
|
|
||||||
...(prevState || initialSearchResponse),
|
|
||||||
quotes,
|
|
||||||
})),
|
|
||||||
updateDocs: (documents) =>
|
|
||||||
setSearchResponse((prevState) => ({
|
|
||||||
...(prevState || initialSearchResponse),
|
|
||||||
documents,
|
|
||||||
})),
|
|
||||||
}).then(() => {
|
|
||||||
setIsFetching(false);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
setIsFetching(false);
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<div className="mt-2">
|
<div className="mt-2">
|
||||||
<SearchResultsDisplay
|
<SearchResultsDisplay
|
||||||
searchResponse={searchResponse}
|
searchResponse={searchResponse}
|
||||||
|
46
web/src/components/search/SearchTypeSelector.tsx
Normal file
46
web/src/components/search/SearchTypeSelector.tsx
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
const defaultStyle =
|
||||||
|
"py-1 px-2 border rounded border-gray-700 cursor-pointer font-bold ";
|
||||||
|
|
||||||
|
export enum SearchType {
|
||||||
|
SEMANTIC = "SEMANTIC",
|
||||||
|
KEYWORD = "KEYWORD",
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Props {
|
||||||
|
selectedSearchType: SearchType;
|
||||||
|
setSelectedSearchType: (searchType: SearchType) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const SearchTypeSelector: React.FC<Props> = ({
|
||||||
|
selectedSearchType,
|
||||||
|
setSelectedSearchType,
|
||||||
|
}) => {
|
||||||
|
return (
|
||||||
|
<div className="flex text-xs">
|
||||||
|
<div
|
||||||
|
className={
|
||||||
|
defaultStyle +
|
||||||
|
(selectedSearchType === SearchType.SEMANTIC
|
||||||
|
? "bg-blue-500"
|
||||||
|
: "bg-gray-800 hover:bg-gray-600")
|
||||||
|
}
|
||||||
|
onClick={() => setSelectedSearchType(SearchType.SEMANTIC)}
|
||||||
|
>
|
||||||
|
AI Search
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div
|
||||||
|
className={
|
||||||
|
defaultStyle +
|
||||||
|
"ml-2 " +
|
||||||
|
(selectedSearchType === SearchType.KEYWORD
|
||||||
|
? "bg-blue-500"
|
||||||
|
: "bg-gray-800 hover:bg-gray-600")
|
||||||
|
}
|
||||||
|
onClick={() => setSelectedSearchType(SearchType.KEYWORD)}
|
||||||
|
>
|
||||||
|
Keyword Search
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
@ -1,6 +0,0 @@
|
|||||||
import { ValidSources } from "@/lib/types";
|
|
||||||
|
|
||||||
export interface Source {
|
|
||||||
displayName: string;
|
|
||||||
internalName: ValidSources;
|
|
||||||
}
|
|
@ -1,23 +0,0 @@
|
|||||||
import { ValidSources } from "@/lib/types";
|
|
||||||
|
|
||||||
export interface Quote {
|
|
||||||
document_id: string;
|
|
||||||
link: string;
|
|
||||||
source_type: ValidSources;
|
|
||||||
blurb: string;
|
|
||||||
semantic_identifier: string | null;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface Document {
|
|
||||||
document_id: string;
|
|
||||||
link: string;
|
|
||||||
source_type: ValidSources;
|
|
||||||
blurb: string;
|
|
||||||
semantic_identifier: string | null;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface SearchResponse {
|
|
||||||
answer: string | null;
|
|
||||||
quotes: Record<string, Quote> | null;
|
|
||||||
documents: Document[] | null;
|
|
||||||
}
|
|
@ -3,3 +3,5 @@ export const INTERNAL_URL = process.env.INTERNAL_URL || "http://127.0.0.1:8080";
|
|||||||
|
|
||||||
export const GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME =
|
export const GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME =
|
||||||
"google_drive_auth_is_admin";
|
"google_drive_auth_is_admin";
|
||||||
|
|
||||||
|
export const SEARCH_TYPE_COOKIE_NAME = "search_type";
|
||||||
|
143
web/src/lib/search/ai.ts
Normal file
143
web/src/lib/search/ai.ts
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
import { SearchType } from "@/components/search/SearchTypeSelector";
|
||||||
|
import { DanswerDocument, Quote, SearchRequestArgs } from "./interfaces";
|
||||||
|
|
||||||
|
const processSingleChunk = (
|
||||||
|
chunk: string,
|
||||||
|
currPartialChunk: string | null
|
||||||
|
): [{ [key: string]: any } | null, string | null] => {
|
||||||
|
const completeChunk = chunk + (currPartialChunk || "");
|
||||||
|
try {
|
||||||
|
// every complete chunk should be valid JSON
|
||||||
|
const chunkJson = JSON.parse(chunk);
|
||||||
|
return [chunkJson, null];
|
||||||
|
} catch (err) {
|
||||||
|
// if it's not valid JSON, then it's probably an incomplete chunk
|
||||||
|
return [null, completeChunk];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const processRawChunkString = (
|
||||||
|
rawChunkString: string,
|
||||||
|
previousPartialChunk: string | null
|
||||||
|
): [any[], string | null] => {
|
||||||
|
/* This is required because, in practice, we see that nginx does not send over
|
||||||
|
each chunk one at a time even with buffering turned off. Instead,
|
||||||
|
chunks are sometimes in batches or are sometimes incomplete */
|
||||||
|
if (!rawChunkString) {
|
||||||
|
return [[], null];
|
||||||
|
}
|
||||||
|
const chunkSections = rawChunkString
|
||||||
|
.split("\n")
|
||||||
|
.filter((chunk) => chunk.length > 0);
|
||||||
|
let parsedChunkSections: any[] = [];
|
||||||
|
let currPartialChunk = previousPartialChunk;
|
||||||
|
chunkSections.forEach((chunk) => {
|
||||||
|
const [processedChunk, partialChunk] = processSingleChunk(
|
||||||
|
chunk,
|
||||||
|
currPartialChunk
|
||||||
|
);
|
||||||
|
if (processedChunk) {
|
||||||
|
parsedChunkSections.push(processedChunk);
|
||||||
|
} else {
|
||||||
|
currPartialChunk = partialChunk;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return [parsedChunkSections, currPartialChunk];
|
||||||
|
};
|
||||||
|
|
||||||
|
export const aiSearchRequestStreamed = async ({
|
||||||
|
query,
|
||||||
|
sources,
|
||||||
|
updateCurrentAnswer,
|
||||||
|
updateQuotes,
|
||||||
|
updateDocs,
|
||||||
|
searchType,
|
||||||
|
}: SearchRequestArgs) => {
|
||||||
|
let answer = "";
|
||||||
|
let quotes: Record<string, Quote> | null = null;
|
||||||
|
let relevantDocuments: DanswerDocument[] | null = null;
|
||||||
|
try {
|
||||||
|
const response = await fetch("/api/stream-direct-qa", {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify({
|
||||||
|
query,
|
||||||
|
collection: "danswer_index",
|
||||||
|
use_keyword: searchType === SearchType.KEYWORD,
|
||||||
|
...(sources.length > 0
|
||||||
|
? {
|
||||||
|
filters: [
|
||||||
|
{
|
||||||
|
source_type: sources.map((source) => source.internalName),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
|
}),
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const reader = response.body?.getReader();
|
||||||
|
const decoder = new TextDecoder("utf-8");
|
||||||
|
|
||||||
|
let previousPartialChunk = null;
|
||||||
|
while (true) {
|
||||||
|
const rawChunk = await reader?.read();
|
||||||
|
if (!rawChunk) {
|
||||||
|
throw new Error("Unable to process chunk");
|
||||||
|
}
|
||||||
|
const { done, value } = rawChunk;
|
||||||
|
if (done) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process each chunk as it arrives
|
||||||
|
const [completedChunks, partialChunk] = processRawChunkString(
|
||||||
|
decoder.decode(value, { stream: true }),
|
||||||
|
previousPartialChunk
|
||||||
|
);
|
||||||
|
if (!completedChunks.length && !partialChunk) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (partialChunk) {
|
||||||
|
previousPartialChunk = partialChunk;
|
||||||
|
}
|
||||||
|
completedChunks.forEach((chunk) => {
|
||||||
|
// TODO: clean up response / this logic
|
||||||
|
const answerChunk = chunk.answer_data;
|
||||||
|
if (answerChunk) {
|
||||||
|
answer += answerChunk;
|
||||||
|
updateCurrentAnswer(answer);
|
||||||
|
} else if (chunk.answer_finished) {
|
||||||
|
// set quotes as non-null to signify that the answer is finished and
|
||||||
|
// we're now looking for quotes
|
||||||
|
updateQuotes({});
|
||||||
|
if (
|
||||||
|
!answer.endsWith(".") &&
|
||||||
|
!answer.endsWith("?") &&
|
||||||
|
!answer.endsWith("!")
|
||||||
|
) {
|
||||||
|
answer += ".";
|
||||||
|
updateCurrentAnswer(answer);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (Object.hasOwn(chunk, "top_documents")) {
|
||||||
|
const docs = chunk.top_documents as any[] | null;
|
||||||
|
if (docs) {
|
||||||
|
relevantDocuments = docs.map(
|
||||||
|
(doc) => JSON.parse(doc) as DanswerDocument
|
||||||
|
);
|
||||||
|
updateDocs(relevantDocuments);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
quotes = chunk as Record<string, Quote>;
|
||||||
|
updateQuotes(quotes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error("Fetch error:", err);
|
||||||
|
}
|
||||||
|
return { answer, quotes, relevantDocuments };
|
||||||
|
};
|
39
web/src/lib/search/interfaces.ts
Normal file
39
web/src/lib/search/interfaces.ts
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import { SearchType } from "@/components/search/SearchTypeSelector";
|
||||||
|
import { ValidSources } from "../types";
|
||||||
|
|
||||||
|
export interface Quote {
|
||||||
|
document_id: string;
|
||||||
|
link: string;
|
||||||
|
source_type: ValidSources;
|
||||||
|
blurb: string;
|
||||||
|
semantic_identifier: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DanswerDocument {
|
||||||
|
document_id: string;
|
||||||
|
link: string;
|
||||||
|
source_type: ValidSources;
|
||||||
|
blurb: string;
|
||||||
|
semantic_identifier: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SearchResponse {
|
||||||
|
searchType: SearchType;
|
||||||
|
answer: string | null;
|
||||||
|
quotes: Record<string, Quote> | null;
|
||||||
|
documents: DanswerDocument[] | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Source {
|
||||||
|
displayName: string;
|
||||||
|
internalName: ValidSources;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SearchRequestArgs {
|
||||||
|
query: string;
|
||||||
|
sources: Source[];
|
||||||
|
updateCurrentAnswer: (val: string) => void;
|
||||||
|
updateQuotes: (quotes: Record<string, Quote>) => void;
|
||||||
|
updateDocs: (documents: DanswerDocument[]) => void;
|
||||||
|
searchType: SearchType;
|
||||||
|
}
|
45
web/src/lib/search/keyword.ts
Normal file
45
web/src/lib/search/keyword.ts
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
import { DanswerDocument, SearchRequestArgs } from "./interfaces";
|
||||||
|
|
||||||
|
interface KeywordResponse {
|
||||||
|
top_ranked_docs: DanswerDocument[];
|
||||||
|
semi_ranked_docs: DanswerDocument[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export const keywordSearch = async ({
|
||||||
|
query,
|
||||||
|
sources,
|
||||||
|
updateDocs,
|
||||||
|
}: SearchRequestArgs): Promise<void> => {
|
||||||
|
const response = await fetch("/api/keyword-search", {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify({
|
||||||
|
query,
|
||||||
|
collection: "danswer_index",
|
||||||
|
...(sources.length > 0
|
||||||
|
? {
|
||||||
|
filters: [
|
||||||
|
{
|
||||||
|
source_type: sources.map((source) => source.internalName),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
|
}),
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const keywordResults = (await response.json()) as KeywordResponse;
|
||||||
|
|
||||||
|
let matchingDocs = keywordResults.top_ranked_docs;
|
||||||
|
if (keywordResults.semi_ranked_docs) {
|
||||||
|
matchingDocs = matchingDocs.concat(keywordResults.semi_ranked_docs);
|
||||||
|
}
|
||||||
|
|
||||||
|
updateDocs(matchingDocs);
|
||||||
|
};
|
Reference in New Issue
Block a user