mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-26 17:51:54 +01:00
Keyword search (#88)
* Add keyword search support * Fix filters display * Make documents appear immediately
This commit is contained in:
parent
e202aa440e
commit
e0ebdc2fc1
@ -123,6 +123,7 @@ def get_application() -> FastAPI:
|
||||
logger.info("Verifying query preprocessing (NLTK) data is downloaded")
|
||||
nltk.download("stopwords")
|
||||
nltk.download("wordnet")
|
||||
nltk.download("punkt")
|
||||
|
||||
logger.info("Verifying public credential exists.")
|
||||
create_initial_public_credential()
|
||||
|
@ -6,7 +6,9 @@ import { DISABLE_AUTH } from "@/lib/constants";
|
||||
import { HealthCheckBanner } from "@/components/health/healthcheck";
|
||||
import { ApiKeyModal } from "@/components/openai/ApiKeyModal";
|
||||
import { buildUrl } from "@/lib/utilsSS";
|
||||
import { User } from "@/lib/types";
|
||||
import { Connector, User } from "@/lib/types";
|
||||
import { cookies } from "next/headers";
|
||||
import { SearchType } from "@/components/search/SearchTypeSelector";
|
||||
|
||||
export default async function Home() {
|
||||
const tasks = [
|
||||
@ -24,13 +26,23 @@ export default async function Home() {
|
||||
return redirect("/auth/login");
|
||||
}
|
||||
|
||||
let connectors = null;
|
||||
let connectors: Connector<any>[] = [];
|
||||
if (connectorsResponse.ok) {
|
||||
connectors = await connectorsResponse.json();
|
||||
} else {
|
||||
console.log(`Failed to fetch connectors - ${connectorsResponse.status}`);
|
||||
}
|
||||
|
||||
// needs to be done in a non-client side component due to nextjs
|
||||
const storedSearchType = cookies().get("searchType")?.value as
|
||||
| keyof typeof SearchType
|
||||
| undefined;
|
||||
let searchTypeDefault: SearchType =
|
||||
storedSearchType !== undefined &&
|
||||
SearchType.hasOwnProperty(storedSearchType)
|
||||
? SearchType[storedSearchType]
|
||||
: SearchType.SEMANTIC; // default to semantic search
|
||||
|
||||
return (
|
||||
<>
|
||||
<Header user={user} />
|
||||
@ -40,7 +52,10 @@ export default async function Home() {
|
||||
<ApiKeyModal />
|
||||
<div className="px-24 pt-10 flex flex-col items-center min-h-screen bg-gray-900 text-gray-100">
|
||||
<div className="w-full">
|
||||
<SearchSection connectors={connectors} />
|
||||
<SearchSection
|
||||
connectors={connectors}
|
||||
defaultSearchType={searchTypeDefault}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
|
@ -1,8 +1,8 @@
|
||||
import React from "react";
|
||||
import { Source } from "./interfaces";
|
||||
import { getSourceIcon } from "../source";
|
||||
import { Funnel } from "@phosphor-icons/react";
|
||||
import { ValidSources } from "@/lib/types";
|
||||
import { Source } from "@/lib/search/interfaces";
|
||||
|
||||
const sources: Source[] = [
|
||||
{ displayName: "Google Drive", internalName: "google_drive" },
|
||||
@ -34,31 +34,33 @@ export function SourceSelector({
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="bg-gray-900 p-6">
|
||||
<div className="flex mb-3 mx-2">
|
||||
<div className="bg-gray-900 px-6">
|
||||
<div className="flex mb-2 pb-1 pl-2 border-b border-gray-800 mx-2">
|
||||
<h2 className="font-bold my-auto">Filters</h2>
|
||||
<Funnel className="my-auto ml-2" size="20" />
|
||||
</div>
|
||||
{sources
|
||||
.filter((source) => existingSources.includes(source.internalName))
|
||||
.map((source) => (
|
||||
<div
|
||||
key={source.internalName}
|
||||
className={
|
||||
"flex cursor-pointer w-full items-center text-white " +
|
||||
"py-1.5 my-1.5 rounded-lg px-2 " +
|
||||
(selectedSources.includes(source)
|
||||
? "bg-gray-700"
|
||||
: "hover:bg-gray-800")
|
||||
}
|
||||
onClick={() => handleSelect(source)}
|
||||
>
|
||||
{getSourceIcon(source.internalName, "16")}
|
||||
<span className="ml-2 text-sm text-gray-200">
|
||||
{source.displayName}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
<div className="px-2">
|
||||
{sources
|
||||
.filter((source) => existingSources.includes(source.internalName))
|
||||
.map((source) => (
|
||||
<div
|
||||
key={source.internalName}
|
||||
className={
|
||||
"flex cursor-pointer w-full items-center text-white " +
|
||||
"py-1.5 my-1.5 rounded-lg px-2 " +
|
||||
(selectedSources.includes(source)
|
||||
? "bg-gray-700"
|
||||
: "hover:bg-gray-800")
|
||||
}
|
||||
onClick={() => handleSelect(source)}
|
||||
>
|
||||
{getSourceIcon(source.internalName, "16")}
|
||||
<span className="ml-2 text-sm text-gray-200">
|
||||
{source.displayName}
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ export const SearchBar: React.FC<SearchBarProps> = ({ onSearch }) => {
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex justify-center py-4">
|
||||
<div className="flex justify-center py-3">
|
||||
<div className="flex items-center w-full border-2 border-gray-600 rounded px-4 py-2 focus-within:border-blue-500">
|
||||
<MagnifyingGlass className="text-gray-400" />
|
||||
<textarea
|
||||
|
@ -1,12 +1,17 @@
|
||||
import React from "react";
|
||||
import { Quote, Document, SearchResponse } from "./types";
|
||||
import { getSourceIcon } from "../source";
|
||||
import { LoadingAnimation } from "../Loading";
|
||||
import { InfoIcon } from "../icons/icons";
|
||||
import {
|
||||
DanswerDocument,
|
||||
SearchResponse,
|
||||
Quote,
|
||||
} from "@/lib/search/interfaces";
|
||||
import { SearchType } from "./SearchTypeSelector";
|
||||
|
||||
const removeDuplicateDocs = (documents: Document[]) => {
|
||||
const removeDuplicateDocs = (documents: DanswerDocument[]) => {
|
||||
const seen = new Set<string>();
|
||||
const output: Document[] = [];
|
||||
const output: DanswerDocument[] = [];
|
||||
documents.forEach((document) => {
|
||||
if (
|
||||
document.semantic_identifier &&
|
||||
@ -62,54 +67,58 @@ export const SearchResultsDisplay: React.FC<SearchResultsDisplayProps> = ({
|
||||
return (
|
||||
<>
|
||||
{answer && (
|
||||
<div className="p-4 border-2 rounded-md border-gray-700">
|
||||
<div className="flex mb-1">
|
||||
<h2 className="text font-bold my-auto">AI Answer</h2>
|
||||
</div>
|
||||
<p className="mb-4">{answer}</p>
|
||||
<div className="h-56">
|
||||
<div className="p-4 border-2 rounded-md border-gray-700">
|
||||
<div className="flex mb-1">
|
||||
<h2 className="text font-bold my-auto">AI Answer</h2>
|
||||
</div>
|
||||
<p className="mb-4">{answer}</p>
|
||||
|
||||
{quotes !== null && (
|
||||
<>
|
||||
<h2 className="text-sm font-bold mb-2">Sources</h2>
|
||||
{isFetching && dedupedQuotes.length === 0 ? (
|
||||
<LoadingAnimation text="Finding quotes" size="text-sm" />
|
||||
) : (
|
||||
<div className="flex">
|
||||
{dedupedQuotes.map((quoteInfo) => (
|
||||
<a
|
||||
key={quoteInfo.document_id}
|
||||
className="p-2 ml-1 border border-gray-800 rounded-lg text-sm flex max-w-[280px] hover:bg-gray-800"
|
||||
href={quoteInfo.link}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
{getSourceIcon(quoteInfo.source_type, "20")}
|
||||
<p className="truncate break-all ml-2">
|
||||
{quoteInfo.semantic_identifier || quoteInfo.document_id}
|
||||
</p>
|
||||
</a>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!answer && !isFetching && (
|
||||
<div className="flex">
|
||||
<InfoIcon
|
||||
size="20"
|
||||
className="text-red-500 my-auto flex flex-shrink-0"
|
||||
/>
|
||||
<div className="text-red-500 text-xs my-auto ml-1">
|
||||
GPT hurt itself in its confusion :(
|
||||
{quotes !== null && (
|
||||
<>
|
||||
<h2 className="text-sm font-bold mb-2">Sources</h2>
|
||||
{isFetching && dedupedQuotes.length === 0 ? (
|
||||
<LoadingAnimation text="Finding quotes" size="text-sm" />
|
||||
) : (
|
||||
<div className="flex">
|
||||
{dedupedQuotes.map((quoteInfo) => (
|
||||
<a
|
||||
key={quoteInfo.document_id}
|
||||
className="p-2 ml-1 border border-gray-800 rounded-lg text-sm flex max-w-[280px] hover:bg-gray-800"
|
||||
href={quoteInfo.link}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
>
|
||||
{getSourceIcon(quoteInfo.source_type, "20")}
|
||||
<p className="truncate break-all ml-2">
|
||||
{quoteInfo.semantic_identifier ||
|
||||
quoteInfo.document_id}
|
||||
</p>
|
||||
</a>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Only display docs once we're done fetching to avoid distracting from the AI answer*/}
|
||||
{!isFetching && documents && documents.length > 0 && (
|
||||
{!answer &&
|
||||
!isFetching &&
|
||||
searchResponse.searchType === SearchType.SEMANTIC && (
|
||||
<div className="flex">
|
||||
<InfoIcon
|
||||
size="20"
|
||||
className="text-red-500 my-auto flex flex-shrink-0"
|
||||
/>
|
||||
<div className="text-red-500 text-xs my-auto ml-1">
|
||||
GPT hurt itself in its confusion :(
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{documents && documents.length > 0 && (
|
||||
<div className="mt-4">
|
||||
<div className="font-bold border-b mb-4 pb-1 border-gray-800">
|
||||
Results
|
||||
|
@ -3,169 +3,27 @@
|
||||
import { useState } from "react";
|
||||
import { SearchBar } from "./SearchBar";
|
||||
import { SearchResultsDisplay } from "./SearchResultsDisplay";
|
||||
import { Quote, Document, SearchResponse } from "./types";
|
||||
import { SourceSelector } from "./Filters";
|
||||
import { Source } from "./interfaces";
|
||||
import { Connector } from "@/lib/types";
|
||||
|
||||
const initialSearchResponse: SearchResponse = {
|
||||
answer: null,
|
||||
quotes: null,
|
||||
documents: null,
|
||||
};
|
||||
|
||||
const processSingleChunk = (
|
||||
chunk: string,
|
||||
currPartialChunk: string | null
|
||||
): [{ [key: string]: any } | null, string | null] => {
|
||||
const completeChunk = chunk + (currPartialChunk || "");
|
||||
try {
|
||||
// every complete chunk should be valid JSON
|
||||
const chunkJson = JSON.parse(chunk);
|
||||
return [chunkJson, null];
|
||||
} catch (err) {
|
||||
// if it's not valid JSON, then it's probably an incomplete chunk
|
||||
return [null, completeChunk];
|
||||
}
|
||||
};
|
||||
|
||||
const processRawChunkString = (
|
||||
rawChunkString: string,
|
||||
previousPartialChunk: string | null
|
||||
): [any[], string | null] => {
|
||||
/* This is required because, in practice, we see that nginx does not send over
|
||||
each chunk one at a time even with buffering turned off. Instead,
|
||||
chunks are sometimes in batches or are sometimes incomplete */
|
||||
if (!rawChunkString) {
|
||||
return [[], null];
|
||||
}
|
||||
const chunkSections = rawChunkString
|
||||
.split("\n")
|
||||
.filter((chunk) => chunk.length > 0);
|
||||
let parsedChunkSections: any[] = [];
|
||||
let currPartialChunk = previousPartialChunk;
|
||||
chunkSections.forEach((chunk) => {
|
||||
const [processedChunk, partialChunk] = processSingleChunk(
|
||||
chunk,
|
||||
currPartialChunk
|
||||
);
|
||||
if (processedChunk) {
|
||||
parsedChunkSections.push(processedChunk);
|
||||
} else {
|
||||
currPartialChunk = partialChunk;
|
||||
}
|
||||
});
|
||||
return [parsedChunkSections, currPartialChunk];
|
||||
};
|
||||
|
||||
interface SearchRequestStreamedArgs {
|
||||
query: string;
|
||||
sources: Source[];
|
||||
updateCurrentAnswer: (val: string) => void;
|
||||
updateQuotes: (quotes: Record<string, Quote>) => void;
|
||||
updateDocs: (docs: Document[]) => void;
|
||||
}
|
||||
|
||||
const searchRequestStreamed = async ({
|
||||
query,
|
||||
sources,
|
||||
updateCurrentAnswer,
|
||||
updateQuotes,
|
||||
updateDocs,
|
||||
}: SearchRequestStreamedArgs) => {
|
||||
let answer = "";
|
||||
let quotes: Record<string, Quote> | null = null;
|
||||
let relevantDocuments: Document[] | null = null;
|
||||
try {
|
||||
const response = await fetch("/api/stream-direct-qa", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
query,
|
||||
collection: "danswer_index",
|
||||
...(sources.length > 0
|
||||
? {
|
||||
filters: [
|
||||
{
|
||||
source_type: sources.map((source) => source.internalName),
|
||||
},
|
||||
],
|
||||
}
|
||||
: {}),
|
||||
}),
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
});
|
||||
const reader = response.body?.getReader();
|
||||
const decoder = new TextDecoder("utf-8");
|
||||
|
||||
let previousPartialChunk = null;
|
||||
while (true) {
|
||||
const rawChunk = await reader?.read();
|
||||
if (!rawChunk) {
|
||||
throw new Error("Unable to process chunk");
|
||||
}
|
||||
const { done, value } = rawChunk;
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Process each chunk as it arrives
|
||||
const [completedChunks, partialChunk] = processRawChunkString(
|
||||
decoder.decode(value, { stream: true }),
|
||||
previousPartialChunk
|
||||
);
|
||||
if (!completedChunks.length && !partialChunk) {
|
||||
break;
|
||||
}
|
||||
if (partialChunk) {
|
||||
previousPartialChunk = partialChunk;
|
||||
}
|
||||
completedChunks.forEach((chunk) => {
|
||||
// TODO: clean up response / this logic
|
||||
const answerChunk = chunk.answer_data;
|
||||
if (answerChunk) {
|
||||
answer += answerChunk;
|
||||
updateCurrentAnswer(answer);
|
||||
} else if (chunk.answer_finished) {
|
||||
// set quotes as non-null to signify that the answer is finished and
|
||||
// we're now looking for quotes
|
||||
updateQuotes({});
|
||||
if (
|
||||
!answer.endsWith(".") &&
|
||||
!answer.endsWith("?") &&
|
||||
!answer.endsWith("!")
|
||||
) {
|
||||
answer += ".";
|
||||
updateCurrentAnswer(answer);
|
||||
}
|
||||
} else {
|
||||
if (Object.hasOwn(chunk, "top_documents")) {
|
||||
const docs = chunk.top_documents as any[] | null;
|
||||
if (docs) {
|
||||
relevantDocuments = docs.map(
|
||||
(doc) => JSON.parse(doc) as Document
|
||||
);
|
||||
updateDocs(relevantDocuments);
|
||||
}
|
||||
} else {
|
||||
quotes = chunk as Record<string, Quote>;
|
||||
updateQuotes(quotes);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Fetch error:", err);
|
||||
}
|
||||
return { answer, quotes, relevantDocuments };
|
||||
};
|
||||
import { SearchType, SearchTypeSelector } from "./SearchTypeSelector";
|
||||
import {
|
||||
DanswerDocument,
|
||||
Quote,
|
||||
SearchResponse,
|
||||
Source,
|
||||
} from "@/lib/search/interfaces";
|
||||
import { aiSearchRequestStreamed } from "@/lib/search/ai";
|
||||
import Cookies from "js-cookie";
|
||||
|
||||
interface SearchSectionProps {
|
||||
connectors: Connector<any>[];
|
||||
defaultSearchType: SearchType;
|
||||
}
|
||||
|
||||
export const SearchSection: React.FC<SearchSectionProps> = ({ connectors }) => {
|
||||
export const SearchSection: React.FC<SearchSectionProps> = ({
|
||||
connectors,
|
||||
defaultSearchType,
|
||||
}) => {
|
||||
// Search
|
||||
const [searchResponse, setSearchResponse] = useState<SearchResponse | null>(
|
||||
null
|
||||
@ -175,47 +33,76 @@ export const SearchSection: React.FC<SearchSectionProps> = ({ connectors }) => {
|
||||
// Filters
|
||||
const [sources, setSources] = useState<Source[]>([]);
|
||||
|
||||
// Search Type
|
||||
const [selectedSearchType, setSelectedSearchType] =
|
||||
useState<SearchType>(defaultSearchType);
|
||||
|
||||
// helpers
|
||||
const initialSearchResponse: SearchResponse = {
|
||||
answer: null,
|
||||
quotes: null,
|
||||
documents: null,
|
||||
searchType: selectedSearchType,
|
||||
};
|
||||
const updateCurrentAnswer = (answer: string) =>
|
||||
setSearchResponse((prevState) => ({
|
||||
...(prevState || initialSearchResponse),
|
||||
answer,
|
||||
}));
|
||||
const updateQuotes = (quotes: Record<string, Quote>) =>
|
||||
setSearchResponse((prevState) => ({
|
||||
...(prevState || initialSearchResponse),
|
||||
quotes,
|
||||
}));
|
||||
const updateDocs = (documents: DanswerDocument[]) =>
|
||||
setSearchResponse((prevState) => ({
|
||||
...(prevState || initialSearchResponse),
|
||||
documents,
|
||||
}));
|
||||
|
||||
return (
|
||||
<div className="relative max-w-[1500px] mx-auto">
|
||||
<div className="absolute left-0 ml-24 hidden 2xl:block">
|
||||
<SourceSelector
|
||||
selectedSources={sources}
|
||||
setSelectedSources={setSources}
|
||||
existingSources={connectors.map((connector) => connector.source)}
|
||||
/>
|
||||
{connectors.length > 0 && (
|
||||
<SourceSelector
|
||||
selectedSources={sources}
|
||||
setSelectedSources={setSources}
|
||||
existingSources={connectors.map((connector) => connector.source)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
<div className="w-[800px] mx-auto">
|
||||
<SearchTypeSelector
|
||||
selectedSearchType={selectedSearchType}
|
||||
setSelectedSearchType={(searchType) => {
|
||||
Cookies.set("searchType", searchType);
|
||||
setSelectedSearchType(searchType);
|
||||
}}
|
||||
/>
|
||||
|
||||
<SearchBar
|
||||
onSearch={(query) => {
|
||||
onSearch={async (query) => {
|
||||
setIsFetching(true);
|
||||
setSearchResponse({
|
||||
answer: null,
|
||||
quotes: null,
|
||||
documents: null,
|
||||
searchType: selectedSearchType,
|
||||
});
|
||||
searchRequestStreamed({
|
||||
|
||||
await aiSearchRequestStreamed({
|
||||
query,
|
||||
sources,
|
||||
updateCurrentAnswer: (answer) =>
|
||||
setSearchResponse((prevState) => ({
|
||||
...(prevState || initialSearchResponse),
|
||||
answer,
|
||||
})),
|
||||
updateQuotes: (quotes) =>
|
||||
setSearchResponse((prevState) => ({
|
||||
...(prevState || initialSearchResponse),
|
||||
quotes,
|
||||
})),
|
||||
updateDocs: (documents) =>
|
||||
setSearchResponse((prevState) => ({
|
||||
...(prevState || initialSearchResponse),
|
||||
documents,
|
||||
})),
|
||||
}).then(() => {
|
||||
setIsFetching(false);
|
||||
updateCurrentAnswer,
|
||||
updateQuotes,
|
||||
updateDocs,
|
||||
searchType: selectedSearchType,
|
||||
});
|
||||
|
||||
setIsFetching(false);
|
||||
}}
|
||||
/>
|
||||
|
||||
<div className="mt-2">
|
||||
<SearchResultsDisplay
|
||||
searchResponse={searchResponse}
|
||||
|
46
web/src/components/search/SearchTypeSelector.tsx
Normal file
46
web/src/components/search/SearchTypeSelector.tsx
Normal file
@ -0,0 +1,46 @@
|
||||
const defaultStyle =
|
||||
"py-1 px-2 border rounded border-gray-700 cursor-pointer font-bold ";
|
||||
|
||||
export enum SearchType {
|
||||
SEMANTIC = "SEMANTIC",
|
||||
KEYWORD = "KEYWORD",
|
||||
}
|
||||
|
||||
interface Props {
|
||||
selectedSearchType: SearchType;
|
||||
setSelectedSearchType: (searchType: SearchType) => void;
|
||||
}
|
||||
|
||||
export const SearchTypeSelector: React.FC<Props> = ({
|
||||
selectedSearchType,
|
||||
setSelectedSearchType,
|
||||
}) => {
|
||||
return (
|
||||
<div className="flex text-xs">
|
||||
<div
|
||||
className={
|
||||
defaultStyle +
|
||||
(selectedSearchType === SearchType.SEMANTIC
|
||||
? "bg-blue-500"
|
||||
: "bg-gray-800 hover:bg-gray-600")
|
||||
}
|
||||
onClick={() => setSelectedSearchType(SearchType.SEMANTIC)}
|
||||
>
|
||||
AI Search
|
||||
</div>
|
||||
|
||||
<div
|
||||
className={
|
||||
defaultStyle +
|
||||
"ml-2 " +
|
||||
(selectedSearchType === SearchType.KEYWORD
|
||||
? "bg-blue-500"
|
||||
: "bg-gray-800 hover:bg-gray-600")
|
||||
}
|
||||
onClick={() => setSelectedSearchType(SearchType.KEYWORD)}
|
||||
>
|
||||
Keyword Search
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
@ -1,6 +0,0 @@
|
||||
import { ValidSources } from "@/lib/types";
|
||||
|
||||
export interface Source {
|
||||
displayName: string;
|
||||
internalName: ValidSources;
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
import { ValidSources } from "@/lib/types";
|
||||
|
||||
export interface Quote {
|
||||
document_id: string;
|
||||
link: string;
|
||||
source_type: ValidSources;
|
||||
blurb: string;
|
||||
semantic_identifier: string | null;
|
||||
}
|
||||
|
||||
export interface Document {
|
||||
document_id: string;
|
||||
link: string;
|
||||
source_type: ValidSources;
|
||||
blurb: string;
|
||||
semantic_identifier: string | null;
|
||||
}
|
||||
|
||||
export interface SearchResponse {
|
||||
answer: string | null;
|
||||
quotes: Record<string, Quote> | null;
|
||||
documents: Document[] | null;
|
||||
}
|
@ -3,3 +3,5 @@ export const INTERNAL_URL = process.env.INTERNAL_URL || "http://127.0.0.1:8080";
|
||||
|
||||
export const GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME =
|
||||
"google_drive_auth_is_admin";
|
||||
|
||||
export const SEARCH_TYPE_COOKIE_NAME = "search_type";
|
||||
|
143
web/src/lib/search/ai.ts
Normal file
143
web/src/lib/search/ai.ts
Normal file
@ -0,0 +1,143 @@
|
||||
import { SearchType } from "@/components/search/SearchTypeSelector";
|
||||
import { DanswerDocument, Quote, SearchRequestArgs } from "./interfaces";
|
||||
|
||||
const processSingleChunk = (
|
||||
chunk: string,
|
||||
currPartialChunk: string | null
|
||||
): [{ [key: string]: any } | null, string | null] => {
|
||||
const completeChunk = chunk + (currPartialChunk || "");
|
||||
try {
|
||||
// every complete chunk should be valid JSON
|
||||
const chunkJson = JSON.parse(chunk);
|
||||
return [chunkJson, null];
|
||||
} catch (err) {
|
||||
// if it's not valid JSON, then it's probably an incomplete chunk
|
||||
return [null, completeChunk];
|
||||
}
|
||||
};
|
||||
|
||||
const processRawChunkString = (
|
||||
rawChunkString: string,
|
||||
previousPartialChunk: string | null
|
||||
): [any[], string | null] => {
|
||||
/* This is required because, in practice, we see that nginx does not send over
|
||||
each chunk one at a time even with buffering turned off. Instead,
|
||||
chunks are sometimes in batches or are sometimes incomplete */
|
||||
if (!rawChunkString) {
|
||||
return [[], null];
|
||||
}
|
||||
const chunkSections = rawChunkString
|
||||
.split("\n")
|
||||
.filter((chunk) => chunk.length > 0);
|
||||
let parsedChunkSections: any[] = [];
|
||||
let currPartialChunk = previousPartialChunk;
|
||||
chunkSections.forEach((chunk) => {
|
||||
const [processedChunk, partialChunk] = processSingleChunk(
|
||||
chunk,
|
||||
currPartialChunk
|
||||
);
|
||||
if (processedChunk) {
|
||||
parsedChunkSections.push(processedChunk);
|
||||
} else {
|
||||
currPartialChunk = partialChunk;
|
||||
}
|
||||
});
|
||||
return [parsedChunkSections, currPartialChunk];
|
||||
};
|
||||
|
||||
export const aiSearchRequestStreamed = async ({
|
||||
query,
|
||||
sources,
|
||||
updateCurrentAnswer,
|
||||
updateQuotes,
|
||||
updateDocs,
|
||||
searchType,
|
||||
}: SearchRequestArgs) => {
|
||||
let answer = "";
|
||||
let quotes: Record<string, Quote> | null = null;
|
||||
let relevantDocuments: DanswerDocument[] | null = null;
|
||||
try {
|
||||
const response = await fetch("/api/stream-direct-qa", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
query,
|
||||
collection: "danswer_index",
|
||||
use_keyword: searchType === SearchType.KEYWORD,
|
||||
...(sources.length > 0
|
||||
? {
|
||||
filters: [
|
||||
{
|
||||
source_type: sources.map((source) => source.internalName),
|
||||
},
|
||||
],
|
||||
}
|
||||
: {}),
|
||||
}),
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
});
|
||||
const reader = response.body?.getReader();
|
||||
const decoder = new TextDecoder("utf-8");
|
||||
|
||||
let previousPartialChunk = null;
|
||||
while (true) {
|
||||
const rawChunk = await reader?.read();
|
||||
if (!rawChunk) {
|
||||
throw new Error("Unable to process chunk");
|
||||
}
|
||||
const { done, value } = rawChunk;
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Process each chunk as it arrives
|
||||
const [completedChunks, partialChunk] = processRawChunkString(
|
||||
decoder.decode(value, { stream: true }),
|
||||
previousPartialChunk
|
||||
);
|
||||
if (!completedChunks.length && !partialChunk) {
|
||||
break;
|
||||
}
|
||||
if (partialChunk) {
|
||||
previousPartialChunk = partialChunk;
|
||||
}
|
||||
completedChunks.forEach((chunk) => {
|
||||
// TODO: clean up response / this logic
|
||||
const answerChunk = chunk.answer_data;
|
||||
if (answerChunk) {
|
||||
answer += answerChunk;
|
||||
updateCurrentAnswer(answer);
|
||||
} else if (chunk.answer_finished) {
|
||||
// set quotes as non-null to signify that the answer is finished and
|
||||
// we're now looking for quotes
|
||||
updateQuotes({});
|
||||
if (
|
||||
!answer.endsWith(".") &&
|
||||
!answer.endsWith("?") &&
|
||||
!answer.endsWith("!")
|
||||
) {
|
||||
answer += ".";
|
||||
updateCurrentAnswer(answer);
|
||||
}
|
||||
} else {
|
||||
if (Object.hasOwn(chunk, "top_documents")) {
|
||||
const docs = chunk.top_documents as any[] | null;
|
||||
if (docs) {
|
||||
relevantDocuments = docs.map(
|
||||
(doc) => JSON.parse(doc) as DanswerDocument
|
||||
);
|
||||
updateDocs(relevantDocuments);
|
||||
}
|
||||
} else {
|
||||
quotes = chunk as Record<string, Quote>;
|
||||
updateQuotes(quotes);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Fetch error:", err);
|
||||
}
|
||||
return { answer, quotes, relevantDocuments };
|
||||
};
|
39
web/src/lib/search/interfaces.ts
Normal file
39
web/src/lib/search/interfaces.ts
Normal file
@ -0,0 +1,39 @@
|
||||
import { SearchType } from "@/components/search/SearchTypeSelector";
|
||||
import { ValidSources } from "../types";
|
||||
|
||||
export interface Quote {
|
||||
document_id: string;
|
||||
link: string;
|
||||
source_type: ValidSources;
|
||||
blurb: string;
|
||||
semantic_identifier: string | null;
|
||||
}
|
||||
|
||||
export interface DanswerDocument {
|
||||
document_id: string;
|
||||
link: string;
|
||||
source_type: ValidSources;
|
||||
blurb: string;
|
||||
semantic_identifier: string | null;
|
||||
}
|
||||
|
||||
export interface SearchResponse {
|
||||
searchType: SearchType;
|
||||
answer: string | null;
|
||||
quotes: Record<string, Quote> | null;
|
||||
documents: DanswerDocument[] | null;
|
||||
}
|
||||
|
||||
export interface Source {
|
||||
displayName: string;
|
||||
internalName: ValidSources;
|
||||
}
|
||||
|
||||
export interface SearchRequestArgs {
|
||||
query: string;
|
||||
sources: Source[];
|
||||
updateCurrentAnswer: (val: string) => void;
|
||||
updateQuotes: (quotes: Record<string, Quote>) => void;
|
||||
updateDocs: (documents: DanswerDocument[]) => void;
|
||||
searchType: SearchType;
|
||||
}
|
45
web/src/lib/search/keyword.ts
Normal file
45
web/src/lib/search/keyword.ts
Normal file
@ -0,0 +1,45 @@
|
||||
import { DanswerDocument, SearchRequestArgs } from "./interfaces";
|
||||
|
||||
interface KeywordResponse {
|
||||
top_ranked_docs: DanswerDocument[];
|
||||
semi_ranked_docs: DanswerDocument[];
|
||||
}
|
||||
|
||||
export const keywordSearch = async ({
|
||||
query,
|
||||
sources,
|
||||
updateDocs,
|
||||
}: SearchRequestArgs): Promise<void> => {
|
||||
const response = await fetch("/api/keyword-search", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
query,
|
||||
collection: "danswer_index",
|
||||
...(sources.length > 0
|
||||
? {
|
||||
filters: [
|
||||
{
|
||||
source_type: sources.map((source) => source.internalName),
|
||||
},
|
||||
],
|
||||
}
|
||||
: {}),
|
||||
}),
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
return;
|
||||
}
|
||||
|
||||
const keywordResults = (await response.json()) as KeywordResponse;
|
||||
|
||||
let matchingDocs = keywordResults.top_ranked_docs;
|
||||
if (keywordResults.semi_ranked_docs) {
|
||||
matchingDocs = matchingDocs.concat(keywordResults.semi_ranked_docs);
|
||||
}
|
||||
|
||||
updateDocs(matchingDocs);
|
||||
};
|
Loading…
x
Reference in New Issue
Block a user