From 3554e29b8d98f4216c59abbbe64d8f5013777766 Mon Sep 17 00:00:00 2001 From: Weves Date: Mon, 23 Oct 2023 23:19:45 -0700 Subject: [PATCH] Add updated_at to UI + add time range selector --- backend/danswer/chunking/models.py | 43 +------ backend/danswer/datastores/vespa/store.py | 71 +++++++---- backend/danswer/search/semantic_search.py | 1 + backend/danswer/server/models.py | 16 +++ .../unit/danswer/direct_qa/test_qa_utils.py | 2 + web/src/components/Dropdown.tsx | 42 +++++++ .../components/search/DateRangeSelector.tsx | 119 ++++++++++++++++++ web/src/components/search/DocumentDisplay.tsx | 22 ++++ web/src/components/search/Filters.tsx | 27 +++- web/src/components/search/SearchSection.tsx | 10 +- web/src/lib/dateUtils.ts | 6 + web/src/lib/hooks.ts | 5 + web/src/lib/search/interfaces.ts | 3 + web/src/lib/search/qa.ts | 3 +- web/src/lib/search/streamingQa.ts | 3 +- web/src/lib/search/utils.ts | 10 +- web/src/lib/time.ts | 30 ++++- 17 files changed, 333 insertions(+), 80 deletions(-) create mode 100644 web/src/components/search/DateRangeSelector.tsx create mode 100644 web/src/lib/dateUtils.ts diff --git a/backend/danswer/chunking/models.py b/backend/danswer/chunking/models.py index ccb0bb40db6a..139ff2da6ee3 100644 --- a/backend/danswer/chunking/models.py +++ b/backend/danswer/chunking/models.py @@ -1,18 +1,9 @@ -import inspect -import json from dataclasses import dataclass from dataclasses import fields +from datetime import datetime from typing import Any -from typing import cast from danswer.access.models import DocumentAccess -from danswer.configs.constants import BLURB -from danswer.configs.constants import BOOST -from danswer.configs.constants import MATCH_HIGHLIGHTS -from danswer.configs.constants import METADATA -from danswer.configs.constants import SCORE -from danswer.configs.constants import SEMANTIC_IDENTIFIER -from danswer.configs.constants import SOURCE_LINKS from danswer.connectors.models import Document from danswer.utils.logger import setup_logger @@ -100,6 +91,8 @@ class InferenceChunk(BaseChunk): # to specify that a set of words should be highlighted. For example: # ["the answer is 42", "he couldn't find an answer"] match_highlights: list[str] + # when the doc was last updated + updated_at: datetime | None def __repr__(self) -> str: blurb_words = self.blurb.split() @@ -112,33 +105,3 @@ class InferenceChunk(BaseChunk): break short_blurb += " " + word return f"Inference Chunk: {self.document_id} - {short_blurb}..." - - @classmethod - def from_dict(cls, init_dict: dict[str, Any]) -> "InferenceChunk": - init_kwargs = { - k: v for k, v in init_dict.items() if k in inspect.signature(cls).parameters - } - if SOURCE_LINKS in init_kwargs: - source_links = init_kwargs[SOURCE_LINKS] - source_links_dict = ( - json.loads(source_links) - if isinstance(source_links, str) - else source_links - ) - init_kwargs[SOURCE_LINKS] = { - int(k): v for k, v in cast(dict[str, str], source_links_dict).items() - } - if METADATA in init_kwargs: - init_kwargs[METADATA] = json.loads(init_kwargs[METADATA]) - else: - init_kwargs[METADATA] = {} - init_kwargs[BOOST] = init_kwargs.get(BOOST, 1) - if SCORE not in init_kwargs: - init_kwargs[SCORE] = None - if MATCH_HIGHLIGHTS not in init_kwargs: - init_kwargs[MATCH_HIGHLIGHTS] = [] - if init_kwargs.get(SEMANTIC_IDENTIFIER) is None: - logger.error( - f"Chunk with blurb: {init_kwargs.get(BLURB, 'Unknown')[:50]}... has no Semantic Identifier" - ) - return cls(**init_kwargs) diff --git a/backend/danswer/datastores/vespa/store.py b/backend/danswer/datastores/vespa/store.py index be0c219e1f57..78fa5f3b6df5 100644 --- a/backend/danswer/datastores/vespa/store.py +++ b/backend/danswer/datastores/vespa/store.py @@ -36,11 +36,9 @@ from danswer.configs.constants import DOCUMENT_ID from danswer.configs.constants import DOCUMENT_SETS from danswer.configs.constants import EMBEDDINGS from danswer.configs.constants import HIDDEN -from danswer.configs.constants import MATCH_HIGHLIGHTS from danswer.configs.constants import METADATA from danswer.configs.constants import PRIMARY_OWNERS from danswer.configs.constants import RECENCY_BIAS -from danswer.configs.constants import SCORE from danswer.configs.constants import SECONDARY_OWNERS from danswer.configs.constants import SECTION_CONTINUATION from danswer.configs.constants import SEMANTIC_IDENTIFIER @@ -373,6 +371,54 @@ def _process_dynamic_summary( return processed_summary +def _vespa_hit_to_inference_chunk(hit: dict[str, Any]) -> InferenceChunk: + fields = cast(dict[str, Any], hit["fields"]) + + # parse fields that are stored as strings, but are really json / datetime + metadata = json.loads(fields[METADATA]) if METADATA in fields else {} + updated_at = ( + datetime.fromtimestamp(fields[DOC_UPDATED_AT], tz=timezone.utc) + if DOC_UPDATED_AT in fields + else None + ) + match_highlights = _process_dynamic_summary( + # fallback to regular `content` if the `content_summary` field + # isn't present + dynamic_summary=hit["fields"].get(CONTENT_SUMMARY, hit["fields"][CONTENT]), + ) + semantic_identifier = fields.get(SEMANTIC_IDENTIFIER, "") + if not semantic_identifier: + logger.error( + f"Chunk with blurb: {fields.get(BLURB, 'Unknown')[:50]}... has no Semantic Identifier" + ) + source_links = fields.get(SOURCE_LINKS, {}) + source_links_dict_unprocessed = ( + json.loads(source_links) if isinstance(source_links, str) else source_links + ) + source_links_dict = { + int(k): v + for k, v in cast(dict[str, str], source_links_dict_unprocessed).items() + } + + return InferenceChunk( + chunk_id=fields[CHUNK_ID], + blurb=fields[BLURB], + content=fields[CONTENT], + source_links=source_links_dict, + section_continuation=fields[SECTION_CONTINUATION], + document_id=fields[DOCUMENT_ID], + source_type=fields[SOURCE_TYPE], + semantic_identifier=fields[SEMANTIC_IDENTIFIER], + boost=fields.get(BOOST, 1), + recency_bias=fields["matchfeatures"][RECENCY_BIAS], + score=hit["relevance"], + hidden=fields.get(HIDDEN, False), + metadata=metadata, + match_highlights=match_highlights, + updated_at=updated_at, + ) + + def _query_vespa(query_params: Mapping[str, str | int]) -> list[InferenceChunk]: if "query" in query_params and not cast(str, query_params["query"]).strip(): raise ValueError("No/empty query received") @@ -391,26 +437,7 @@ def _query_vespa(query_params: Mapping[str, str | int]) -> list[InferenceChunk]: filtered_hits = [hit for hit in hits if hit["fields"].get(CONTENT) is not None] - inference_chunks = [ - InferenceChunk.from_dict( - dict( - hit["fields"], - **{RECENCY_BIAS: hit["fields"]["matchfeatures"][RECENCY_BIAS]}, - **{SCORE: hit["relevance"]}, - **{ - MATCH_HIGHLIGHTS: _process_dynamic_summary( - # fallback to regular `content` if the `content_summary` field - # isn't present - dynamic_summary=hit["fields"].get( - CONTENT_SUMMARY, hit["fields"][CONTENT] - ), - ) - }, - ) - ) - for hit in filtered_hits - ] - + inference_chunks = [_vespa_hit_to_inference_chunk(hit) for hit in filtered_hits] return inference_chunks diff --git a/backend/danswer/search/semantic_search.py b/backend/danswer/search/semantic_search.py index 59fb90d18be9..5249d02c9646 100644 --- a/backend/danswer/search/semantic_search.py +++ b/backend/danswer/search/semantic_search.py @@ -50,6 +50,7 @@ def chunks_to_search_docs(chunks: list[InferenceChunk] | None) -> list[SearchDoc hidden=chunk.hidden, score=chunk.score, match_highlights=chunk.match_highlights, + updated_at=chunk.updated_at, ) # semantic identifier should always exist but for really old indices, it was not enforced for chunk in chunks diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index 35194d9a567f..df217670db4e 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -155,6 +155,15 @@ class SearchDoc(BaseModel): # to specify that a set of words should be highlighted. For example: # ["the answer is 42", "the answer is 42""] match_highlights: list[str] + # when the doc was last updated + updated_at: datetime | None + + def dict(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore + initial_dict = super().dict(*args, **kwargs) # type: ignore + initial_dict["updated_at"] = ( + self.updated_at.isoformat() if self.updated_at else None + ) + return initial_dict class RetrievalDocs(BaseModel): @@ -168,6 +177,13 @@ class RerankedRetrievalDocs(RetrievalDocs): time_cutoff: datetime | None favor_recent: bool + def dict(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore + initial_dict = super().dict(*args, **kwargs) # type: ignore + initial_dict["time_cutoff"] = ( + self.time_cutoff.isoformat() if self.time_cutoff else None + ) + return initial_dict + class CreateChatSessionID(BaseModel): chat_session_id: int diff --git a/backend/tests/unit/danswer/direct_qa/test_qa_utils.py b/backend/tests/unit/danswer/direct_qa/test_qa_utils.py index 0200de5e1e08..d6b56bb4653b 100644 --- a/backend/tests/unit/danswer/direct_qa/test_qa_utils.py +++ b/backend/tests/unit/danswer/direct_qa/test_qa_utils.py @@ -121,6 +121,7 @@ class TestQAPostprocessing(unittest.TestCase): score=1, metadata={}, match_highlights=[], + updated_at=None, ) test_chunk_1 = InferenceChunk( document_id="test doc 1", @@ -137,6 +138,7 @@ class TestQAPostprocessing(unittest.TestCase): score=1, metadata={}, match_highlights=[], + updated_at=None, ) test_quotes = [ diff --git a/web/src/components/Dropdown.tsx b/web/src/components/Dropdown.tsx index 9903bcbf653c..1391bf205813 100644 --- a/web/src/components/Dropdown.tsx +++ b/web/src/components/Dropdown.tsx @@ -278,3 +278,45 @@ export const SearchMultiSelectDropdown: FC = ({ ); }; + +export const CustomDropdown = ({ + children, + dropdown, +}: { + children: JSX.Element | string; + dropdown: JSX.Element | string; +}) => { + const [isOpen, setIsOpen] = useState(false); + const dropdownRef = useRef(null); + + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if ( + dropdownRef.current && + !dropdownRef.current.contains(event.target as Node) + ) { + setIsOpen(false); + } + }; + + document.addEventListener("mousedown", handleClickOutside); + return () => { + document.removeEventListener("mousedown", handleClickOutside); + }; + }, []); + + return ( +
+
setIsOpen(!isOpen)}>{children}
+ + {isOpen && ( +
setIsOpen(!isOpen)} + className="pt-2 absolute bottom w-full z-30 bg-gray-900" + > + {dropdown} +
+ )} +
+ ); +}; diff --git a/web/src/components/search/DateRangeSelector.tsx b/web/src/components/search/DateRangeSelector.tsx new file mode 100644 index 000000000000..99d67a9f323a --- /dev/null +++ b/web/src/components/search/DateRangeSelector.tsx @@ -0,0 +1,119 @@ +import { getXDaysAgo } from "@/lib/dateUtils"; +import { DateRangePickerValue } from "@tremor/react"; +import { FiCalendar, FiChevronDown, FiXCircle } from "react-icons/fi"; +import { CustomDropdown } from "../Dropdown"; + +function DateSelectorItem({ + children, + onClick, + skipBottomBorder, +}: { + children: string | JSX.Element; + onClick?: () => void; + skipBottomBorder?: boolean; +}) { + return ( +
+ {children} +
+ ); +} + +export function DateRangeSelector({ + value, + onValueChange, +}: { + value: DateRangePickerValue | null; + onValueChange: (value: DateRangePickerValue | null) => void; +}) { + return ( +
+ + + onValueChange({ + to: new Date(), + from: getXDaysAgo(30), + selectValue: "Last 30 days", + }) + } + > + Last 30 days + + + onValueChange({ + to: new Date(), + from: getXDaysAgo(7), + selectValue: "Last 7 days", + }) + } + > + Last 7 days + + + onValueChange({ + to: new Date(), + from: getXDaysAgo(1), + selectValue: "Today", + }) + } + skipBottomBorder={true} + > + Today + +
+ } + > +
+ {" "} + {value?.selectValue ? ( +
{value.selectValue}
+ ) : ( + "Any time..." + )} + {value?.selectValue ? ( +
{ + onValueChange(null); + e.stopPropagation(); + }} + > + +
+ ) : ( + + )} +
+ + + ); +} diff --git a/web/src/components/search/DocumentDisplay.tsx b/web/src/components/search/DocumentDisplay.tsx index 4675515d8720..e3b9203a2d6b 100644 --- a/web/src/components/search/DocumentDisplay.tsx +++ b/web/src/components/search/DocumentDisplay.tsx @@ -3,6 +3,7 @@ import { DocumentFeedbackBlock } from "./DocumentFeedbackBlock"; import { getSourceIcon } from "../source"; import { useState } from "react"; import { PopupSpec } from "../admin/connectors/Popup"; +import { timeAgo } from "@/lib/time"; export const buildDocumentSummaryDisplay = ( matchHighlights: string[], @@ -165,6 +166,27 @@ export const DocumentDisplay = ({ )} + {document.updated_at && ( +
+
+
+ {"Updated " + timeAgo(document.updated_at)} +
+
+
+ )}

{buildDocumentSummaryDisplay(document.match_highlights, document.blurb)}

diff --git a/web/src/components/search/Filters.tsx b/web/src/components/search/Filters.tsx index f6e3821cdbbd..fa9596cffd42 100644 --- a/web/src/components/search/Filters.tsx +++ b/web/src/components/search/Filters.tsx @@ -5,6 +5,8 @@ import { Source } from "@/lib/search/interfaces"; import { InfoIcon, defaultTailwindCSS } from "../icons/icons"; import { HoverPopup } from "../HoverPopup"; import { FiFilter } from "react-icons/fi"; +import { DateRangeSelector } from "./DateRangeSelector"; +import { DateRangePickerValue } from "@tremor/react"; const sources: Source[] = [ { displayName: "Google Drive", internalName: "google_drive" }, @@ -27,7 +29,15 @@ const sources: Source[] = [ { displayName: "Google Sites", internalName: "google_sites" }, ]; +const SectionTitle = ({ children }: { children: string }) => ( +
{children}
+); + interface SourceSelectorProps { + timeRange: DateRangePickerValue | null; + setTimeRange: React.Dispatch< + React.SetStateAction + >; selectedSources: Source[]; setSelectedSources: React.Dispatch>; selectedDocumentSets: string[]; @@ -37,6 +47,8 @@ interface SourceSelectorProps { } export function SourceSelector({ + timeRange, + setTimeRange, selectedSources, setSelectedSources, selectedDocumentSets, @@ -71,9 +83,16 @@ export function SourceSelector({ + <> + Time Range +
+ +
+ + {existingSources.length > 0 && ( - <> -
Sources
+
+ Sources
{sources .filter((source) => existingSources.includes(source.internalName)) @@ -96,13 +115,13 @@ export function SourceSelector({
))}
- + )} {availableDocumentSets.length > 0 && ( <>
-
Knowledge Sets
+ Knowledge Sets
{availableDocumentSets.map((documentSet) => ( diff --git a/web/src/components/search/SearchSection.tsx b/web/src/components/search/SearchSection.tsx index 400f1c7e5dd2..32545d081a4c 100644 --- a/web/src/components/search/SearchSection.tsx +++ b/web/src/components/search/SearchSection.tsx @@ -23,7 +23,7 @@ import { SearchHelper } from "./SearchHelper"; import { CancellationToken, cancellable } from "@/lib/search/cancellable"; import { NEXT_PUBLIC_DISABLE_STREAMING } from "@/lib/constants"; import { searchRequest } from "@/lib/search/qa"; -import { useObjectState } from "@/lib/hooks"; +import { useObjectState, useTimeRange } from "@/lib/hooks"; import { questionValidationStreamed } from "@/lib/search/streamingQuestionValidation"; const SEARCH_DEFAULT_OVERRIDES_START: SearchDefaultOverrides = { @@ -60,6 +60,7 @@ export const SearchSection: React.FC = ({ useObjectState(VALID_QUESTION_RESPONSE_DEFAULT); // Filters + const [timeRange, setTimeRange] = useTimeRange(); const [sources, setSources] = useState([]); const [selectedDocumentSets, setSelectedDocumentSets] = useState( [] @@ -141,6 +142,7 @@ export const SearchSection: React.FC = ({ query, sources, documentSets: selectedDocumentSets, + timeRange, updateCurrentAnswer: cancellable({ cancellationToken: lastSearchCancellationToken.current, fn: updateCurrentAnswer, @@ -188,9 +190,11 @@ export const SearchSection: React.FC = ({ return (
-
+
{(connectors.length > 0 || documentSets.length > 0) && ( = ({ /> )} -
+
mutate(INDEXING_STATUS_URL), }; }; + +export const useTimeRange = (initialValue?: DateRangePickerValue) => { + return useState(null); +}; diff --git a/web/src/lib/search/interfaces.ts b/web/src/lib/search/interfaces.ts index 8a4cd78b314b..3e3c6875fd6d 100644 --- a/web/src/lib/search/interfaces.ts +++ b/web/src/lib/search/interfaces.ts @@ -1,3 +1,4 @@ +import { DateRangePickerValue } from "@tremor/react"; import { ValidSources } from "../types"; export const FlowType = { @@ -35,6 +36,7 @@ export interface DanswerDocument { hidden: boolean; score: number; match_highlights: string[]; + updated_at: string | null; } export interface SearchResponse { @@ -61,6 +63,7 @@ export interface SearchRequestArgs { query: string; sources: Source[]; documentSets: string[]; + timeRange: DateRangePickerValue | null; updateCurrentAnswer: (val: string) => void; updateQuotes: (quotes: Quote[]) => void; updateDocs: (documents: DanswerDocument[]) => void; diff --git a/web/src/lib/search/qa.ts b/web/src/lib/search/qa.ts index d594bccbdfe1..ff2bf50ef070 100644 --- a/web/src/lib/search/qa.ts +++ b/web/src/lib/search/qa.ts @@ -11,6 +11,7 @@ export const searchRequest = async ({ query, sources, documentSets, + timeRange, updateCurrentAnswer, updateQuotes, updateDocs, @@ -29,7 +30,7 @@ export const searchRequest = async ({ let quotes: Quote[] | null = null; let relevantDocuments: DanswerDocument[] | null = null; try { - const filters = buildFilters(sources, documentSets); + const filters = buildFilters(sources, documentSets, timeRange); const response = await fetch("/api/direct-qa", { method: "POST", body: JSON.stringify({ diff --git a/web/src/lib/search/streamingQa.ts b/web/src/lib/search/streamingQa.ts index 92815db1e2df..da83ecd74e8e 100644 --- a/web/src/lib/search/streamingQa.ts +++ b/web/src/lib/search/streamingQa.ts @@ -56,6 +56,7 @@ export const searchRequestStreamed = async ({ query, sources, documentSets, + timeRange, updateCurrentAnswer, updateQuotes, updateDocs, @@ -75,7 +76,7 @@ export const searchRequestStreamed = async ({ let quotes: Quote[] | null = null; let relevantDocuments: DanswerDocument[] | null = null; try { - const filters = buildFilters(sources, documentSets); + const filters = buildFilters(sources, documentSets, timeRange); const response = await fetch("/api/stream-direct-qa", { method: "POST", body: JSON.stringify({ diff --git a/web/src/lib/search/utils.ts b/web/src/lib/search/utils.ts index a179025a576d..f1be0fcb6270 100644 --- a/web/src/lib/search/utils.ts +++ b/web/src/lib/search/utils.ts @@ -1,12 +1,16 @@ import { Source } from "./interfaces"; +import { DateRangePickerValue } from "@tremor/react"; -export const buildFilters = (sources: Source[], documentSets: string[]) => { +export const buildFilters = ( + sources: Source[], + documentSets: string[], + timeRange: DateRangePickerValue | null +) => { const filters = { source_type: sources.length > 0 ? sources.map((source) => source.internalName) : null, document_set: documentSets.length > 0 ? documentSets : null, - // TODO make this a date selector - time_cutoff: null, + time_cutoff: timeRange?.from ? timeRange.from : null, }; return filters; diff --git a/web/src/lib/time.ts b/web/src/lib/time.ts index 26e5204fa8ab..8c53d1b609e8 100644 --- a/web/src/lib/time.ts +++ b/web/src/lib/time.ts @@ -1,3 +1,10 @@ +const conditionallyAddPlural = (noun: string, cnt: number) => { + if (cnt > 1) { + return `${noun}s`; + } + return noun; +}; + export const timeAgo = ( dateString: string | undefined | null ): string | null => { @@ -10,29 +17,40 @@ export const timeAgo = ( const secondsDiff = Math.floor((now.getTime() - date.getTime()) / 1000); if (secondsDiff < 60) { - return `${secondsDiff} second(s) ago`; + return `${secondsDiff} ${conditionallyAddPlural( + "second", + secondsDiff + )} ago`; } const minutesDiff = Math.floor(secondsDiff / 60); if (minutesDiff < 60) { - return `${minutesDiff} minute(s) ago`; + return `${minutesDiff} ${conditionallyAddPlural( + "minute", + secondsDiff + )} ago`; } const hoursDiff = Math.floor(minutesDiff / 60); if (hoursDiff < 24) { - return `${hoursDiff} hour(s) ago`; + return `${hoursDiff} ${conditionallyAddPlural("hour", hoursDiff)} ago`; } const daysDiff = Math.floor(hoursDiff / 24); if (daysDiff < 30) { - return `${daysDiff} day(s) ago`; + return `${daysDiff} ${conditionallyAddPlural("day", daysDiff)} ago`; + } + + const weeksDiff = Math.floor(daysDiff / 7); + if (weeksDiff < 4) { + return `${weeksDiff} ${conditionallyAddPlural("week", weeksDiff)} ago`; } const monthsDiff = Math.floor(daysDiff / 30); if (monthsDiff < 12) { - return `${monthsDiff} month(s) ago`; + return `${monthsDiff} ${conditionallyAddPlural("month", monthsDiff)} ago`; } const yearsDiff = Math.floor(monthsDiff / 12); - return `${yearsDiff} year(s) ago`; + return `${yearsDiff} ${conditionallyAddPlural("year", yearsDiff)} ago`; };