Add updated_at to UI + add time range selector

This commit is contained in:
Weves
2023-10-23 23:19:45 -07:00
committed by Chris Weaver
parent 88eaae62d9
commit 3554e29b8d
17 changed files with 333 additions and 80 deletions

View File

@@ -1,18 +1,9 @@
import inspect
import json
from dataclasses import dataclass
from dataclasses import fields
from datetime import datetime
from typing import Any
from typing import cast
from danswer.access.models import DocumentAccess
from danswer.configs.constants import BLURB
from danswer.configs.constants import BOOST
from danswer.configs.constants import MATCH_HIGHLIGHTS
from danswer.configs.constants import METADATA
from danswer.configs.constants import SCORE
from danswer.configs.constants import SEMANTIC_IDENTIFIER
from danswer.configs.constants import SOURCE_LINKS
from danswer.connectors.models import Document
from danswer.utils.logger import setup_logger
@@ -100,6 +91,8 @@ class InferenceChunk(BaseChunk):
# to specify that a set of words should be highlighted. For example:
# ["<hi>the</hi> <hi>answer</hi> is 42", "he couldn't find an <hi>answer</hi>"]
match_highlights: list[str]
# when the doc was last updated
updated_at: datetime | None
def __repr__(self) -> str:
blurb_words = self.blurb.split()
@@ -112,33 +105,3 @@ class InferenceChunk(BaseChunk):
break
short_blurb += " " + word
return f"Inference Chunk: {self.document_id} - {short_blurb}..."
@classmethod
def from_dict(cls, init_dict: dict[str, Any]) -> "InferenceChunk":
init_kwargs = {
k: v for k, v in init_dict.items() if k in inspect.signature(cls).parameters
}
if SOURCE_LINKS in init_kwargs:
source_links = init_kwargs[SOURCE_LINKS]
source_links_dict = (
json.loads(source_links)
if isinstance(source_links, str)
else source_links
)
init_kwargs[SOURCE_LINKS] = {
int(k): v for k, v in cast(dict[str, str], source_links_dict).items()
}
if METADATA in init_kwargs:
init_kwargs[METADATA] = json.loads(init_kwargs[METADATA])
else:
init_kwargs[METADATA] = {}
init_kwargs[BOOST] = init_kwargs.get(BOOST, 1)
if SCORE not in init_kwargs:
init_kwargs[SCORE] = None
if MATCH_HIGHLIGHTS not in init_kwargs:
init_kwargs[MATCH_HIGHLIGHTS] = []
if init_kwargs.get(SEMANTIC_IDENTIFIER) is None:
logger.error(
f"Chunk with blurb: {init_kwargs.get(BLURB, 'Unknown')[:50]}... has no Semantic Identifier"
)
return cls(**init_kwargs)

View File

@@ -36,11 +36,9 @@ from danswer.configs.constants import DOCUMENT_ID
from danswer.configs.constants import DOCUMENT_SETS
from danswer.configs.constants import EMBEDDINGS
from danswer.configs.constants import HIDDEN
from danswer.configs.constants import MATCH_HIGHLIGHTS
from danswer.configs.constants import METADATA
from danswer.configs.constants import PRIMARY_OWNERS
from danswer.configs.constants import RECENCY_BIAS
from danswer.configs.constants import SCORE
from danswer.configs.constants import SECONDARY_OWNERS
from danswer.configs.constants import SECTION_CONTINUATION
from danswer.configs.constants import SEMANTIC_IDENTIFIER
@@ -373,6 +371,54 @@ def _process_dynamic_summary(
return processed_summary
def _vespa_hit_to_inference_chunk(hit: dict[str, Any]) -> InferenceChunk:
fields = cast(dict[str, Any], hit["fields"])
# parse fields that are stored as strings, but are really json / datetime
metadata = json.loads(fields[METADATA]) if METADATA in fields else {}
updated_at = (
datetime.fromtimestamp(fields[DOC_UPDATED_AT], tz=timezone.utc)
if DOC_UPDATED_AT in fields
else None
)
match_highlights = _process_dynamic_summary(
# fallback to regular `content` if the `content_summary` field
# isn't present
dynamic_summary=hit["fields"].get(CONTENT_SUMMARY, hit["fields"][CONTENT]),
)
semantic_identifier = fields.get(SEMANTIC_IDENTIFIER, "")
if not semantic_identifier:
logger.error(
f"Chunk with blurb: {fields.get(BLURB, 'Unknown')[:50]}... has no Semantic Identifier"
)
source_links = fields.get(SOURCE_LINKS, {})
source_links_dict_unprocessed = (
json.loads(source_links) if isinstance(source_links, str) else source_links
)
source_links_dict = {
int(k): v
for k, v in cast(dict[str, str], source_links_dict_unprocessed).items()
}
return InferenceChunk(
chunk_id=fields[CHUNK_ID],
blurb=fields[BLURB],
content=fields[CONTENT],
source_links=source_links_dict,
section_continuation=fields[SECTION_CONTINUATION],
document_id=fields[DOCUMENT_ID],
source_type=fields[SOURCE_TYPE],
semantic_identifier=fields[SEMANTIC_IDENTIFIER],
boost=fields.get(BOOST, 1),
recency_bias=fields["matchfeatures"][RECENCY_BIAS],
score=hit["relevance"],
hidden=fields.get(HIDDEN, False),
metadata=metadata,
match_highlights=match_highlights,
updated_at=updated_at,
)
def _query_vespa(query_params: Mapping[str, str | int]) -> list[InferenceChunk]:
if "query" in query_params and not cast(str, query_params["query"]).strip():
raise ValueError("No/empty query received")
@@ -391,26 +437,7 @@ def _query_vespa(query_params: Mapping[str, str | int]) -> list[InferenceChunk]:
filtered_hits = [hit for hit in hits if hit["fields"].get(CONTENT) is not None]
inference_chunks = [
InferenceChunk.from_dict(
dict(
hit["fields"],
**{RECENCY_BIAS: hit["fields"]["matchfeatures"][RECENCY_BIAS]},
**{SCORE: hit["relevance"]},
**{
MATCH_HIGHLIGHTS: _process_dynamic_summary(
# fallback to regular `content` if the `content_summary` field
# isn't present
dynamic_summary=hit["fields"].get(
CONTENT_SUMMARY, hit["fields"][CONTENT]
),
)
},
)
)
for hit in filtered_hits
]
inference_chunks = [_vespa_hit_to_inference_chunk(hit) for hit in filtered_hits]
return inference_chunks

View File

@@ -50,6 +50,7 @@ def chunks_to_search_docs(chunks: list[InferenceChunk] | None) -> list[SearchDoc
hidden=chunk.hidden,
score=chunk.score,
match_highlights=chunk.match_highlights,
updated_at=chunk.updated_at,
)
# semantic identifier should always exist but for really old indices, it was not enforced
for chunk in chunks

View File

@@ -155,6 +155,15 @@ class SearchDoc(BaseModel):
# to specify that a set of words should be highlighted. For example:
# ["<hi>the</hi> <hi>answer</hi> is 42", "the answer is <hi>42</hi>""]
match_highlights: list[str]
# when the doc was last updated
updated_at: datetime | None
def dict(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore
initial_dict = super().dict(*args, **kwargs) # type: ignore
initial_dict["updated_at"] = (
self.updated_at.isoformat() if self.updated_at else None
)
return initial_dict
class RetrievalDocs(BaseModel):
@@ -168,6 +177,13 @@ class RerankedRetrievalDocs(RetrievalDocs):
time_cutoff: datetime | None
favor_recent: bool
def dict(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore
initial_dict = super().dict(*args, **kwargs) # type: ignore
initial_dict["time_cutoff"] = (
self.time_cutoff.isoformat() if self.time_cutoff else None
)
return initial_dict
class CreateChatSessionID(BaseModel):
chat_session_id: int

View File

@@ -121,6 +121,7 @@ class TestQAPostprocessing(unittest.TestCase):
score=1,
metadata={},
match_highlights=[],
updated_at=None,
)
test_chunk_1 = InferenceChunk(
document_id="test doc 1",
@@ -137,6 +138,7 @@ class TestQAPostprocessing(unittest.TestCase):
score=1,
metadata={},
match_highlights=[],
updated_at=None,
)
test_quotes = [

View File

@@ -278,3 +278,45 @@ export const SearchMultiSelectDropdown: FC<MultiSelectDropdownProps> = ({
</div>
);
};
export const CustomDropdown = ({
children,
dropdown,
}: {
children: JSX.Element | string;
dropdown: JSX.Element | string;
}) => {
const [isOpen, setIsOpen] = useState(false);
const dropdownRef = useRef<HTMLDivElement>(null);
useEffect(() => {
const handleClickOutside = (event: MouseEvent) => {
if (
dropdownRef.current &&
!dropdownRef.current.contains(event.target as Node)
) {
setIsOpen(false);
}
};
document.addEventListener("mousedown", handleClickOutside);
return () => {
document.removeEventListener("mousedown", handleClickOutside);
};
}, []);
return (
<div className="relative inline-block text-left w-full" ref={dropdownRef}>
<div onClick={() => setIsOpen(!isOpen)}>{children}</div>
{isOpen && (
<div
onClick={() => setIsOpen(!isOpen)}
className="pt-2 absolute bottom w-full z-30 bg-gray-900"
>
{dropdown}
</div>
)}
</div>
);
};

View File

@@ -0,0 +1,119 @@
import { getXDaysAgo } from "@/lib/dateUtils";
import { DateRangePickerValue } from "@tremor/react";
import { FiCalendar, FiChevronDown, FiXCircle } from "react-icons/fi";
import { CustomDropdown } from "../Dropdown";
function DateSelectorItem({
children,
onClick,
skipBottomBorder,
}: {
children: string | JSX.Element;
onClick?: () => void;
skipBottomBorder?: boolean;
}) {
return (
<div
className={`
px-3
text-sm
text-gray-200
hover:bg-dark-tremor-background-muted
py-2.5
select-none
cursor-pointer
${skipBottomBorder ? "" : "border-b border-gray-800"}
`}
onClick={onClick}
>
{children}
</div>
);
}
export function DateRangeSelector({
value,
onValueChange,
}: {
value: DateRangePickerValue | null;
onValueChange: (value: DateRangePickerValue | null) => void;
}) {
return (
<div>
<CustomDropdown
dropdown={
<div className="border border-gray-800 rounded-lg flex flex-col">
<DateSelectorItem
onClick={() =>
onValueChange({
to: new Date(),
from: getXDaysAgo(30),
selectValue: "Last 30 days",
})
}
>
Last 30 days
</DateSelectorItem>
<DateSelectorItem
onClick={() =>
onValueChange({
to: new Date(),
from: getXDaysAgo(7),
selectValue: "Last 7 days",
})
}
>
Last 7 days
</DateSelectorItem>
<DateSelectorItem
onClick={() =>
onValueChange({
to: new Date(),
from: getXDaysAgo(1),
selectValue: "Today",
})
}
skipBottomBorder={true}
>
Today
</DateSelectorItem>
</div>
}
>
<div
className={`
flex
text-sm
text-gray-400
px-3
py-1.5
rounded-lg
border
border-gray-800
cursor-pointer
hover:bg-dark-tremor-background-muted`}
>
<FiCalendar className="my-auto mr-2 text-gray-500" />{" "}
{value?.selectValue ? (
<div className="text-gray-200">{value.selectValue}</div>
) : (
"Any time..."
)}
{value?.selectValue ? (
<div
className="my-auto ml-auto hover:text-gray-300 hover:bg-gray-700 p-0.5 rounded-full w-fit"
onClick={(e) => {
onValueChange(null);
e.stopPropagation();
}}
>
<FiXCircle />
</div>
) : (
<FiChevronDown className="my-auto ml-auto" />
)}
</div>
</CustomDropdown>
</div>
);
}

View File

@@ -3,6 +3,7 @@ import { DocumentFeedbackBlock } from "./DocumentFeedbackBlock";
import { getSourceIcon } from "../source";
import { useState } from "react";
import { PopupSpec } from "../admin/connectors/Popup";
import { timeAgo } from "@/lib/time";
export const buildDocumentSummaryDisplay = (
matchHighlights: string[],
@@ -165,6 +166,27 @@ export const DocumentDisplay = ({
)}
</div>
</div>
{document.updated_at && (
<div className="flex flex-wrap gap-x-2 mt-1">
<div
className={`
text-xs
text-gray-200
bg-gray-800
rounded-full
px-1
py-0.5
w-fit
my-auto
select-none
mr-2`}
>
<div className="mr-1 my-auto flex">
{"Updated " + timeAgo(document.updated_at)}
</div>
</div>
</div>
)}
<p className="pl-1 pt-2 pb-3 text-gray-200 break-words">
{buildDocumentSummaryDisplay(document.match_highlights, document.blurb)}
</p>

View File

@@ -5,6 +5,8 @@ import { Source } from "@/lib/search/interfaces";
import { InfoIcon, defaultTailwindCSS } from "../icons/icons";
import { HoverPopup } from "../HoverPopup";
import { FiFilter } from "react-icons/fi";
import { DateRangeSelector } from "./DateRangeSelector";
import { DateRangePickerValue } from "@tremor/react";
const sources: Source[] = [
{ displayName: "Google Drive", internalName: "google_drive" },
@@ -27,7 +29,15 @@ const sources: Source[] = [
{ displayName: "Google Sites", internalName: "google_sites" },
];
const SectionTitle = ({ children }: { children: string }) => (
<div className="font-medium text-sm flex">{children}</div>
);
interface SourceSelectorProps {
timeRange: DateRangePickerValue | null;
setTimeRange: React.Dispatch<
React.SetStateAction<DateRangePickerValue | null>
>;
selectedSources: Source[];
setSelectedSources: React.Dispatch<React.SetStateAction<Source[]>>;
selectedDocumentSets: string[];
@@ -37,6 +47,8 @@ interface SourceSelectorProps {
}
export function SourceSelector({
timeRange,
setTimeRange,
selectedSources,
setSelectedSources,
selectedDocumentSets,
@@ -71,9 +83,16 @@ export function SourceSelector({
<FiFilter className="my-auto ml-2" size="18" />
</div>
<>
<SectionTitle>Time Range</SectionTitle>
<div className="mt-2">
<DateRangeSelector value={timeRange} onValueChange={setTimeRange} />
</div>
</>
{existingSources.length > 0 && (
<>
<div className="font-medium text-sm flex">Sources</div>
<div className="mt-4">
<SectionTitle>Sources</SectionTitle>
<div className="px-1">
{sources
.filter((source) => existingSources.includes(source.internalName))
@@ -96,13 +115,13 @@ export function SourceSelector({
</div>
))}
</div>
</>
</div>
)}
{availableDocumentSets.length > 0 && (
<>
<div className="mt-4">
<div className="font-medium text-sm flex">Knowledge Sets</div>
<SectionTitle>Knowledge Sets</SectionTitle>
</div>
<div className="px-1">
{availableDocumentSets.map((documentSet) => (

View File

@@ -23,7 +23,7 @@ import { SearchHelper } from "./SearchHelper";
import { CancellationToken, cancellable } from "@/lib/search/cancellable";
import { NEXT_PUBLIC_DISABLE_STREAMING } from "@/lib/constants";
import { searchRequest } from "@/lib/search/qa";
import { useObjectState } from "@/lib/hooks";
import { useObjectState, useTimeRange } from "@/lib/hooks";
import { questionValidationStreamed } from "@/lib/search/streamingQuestionValidation";
const SEARCH_DEFAULT_OVERRIDES_START: SearchDefaultOverrides = {
@@ -60,6 +60,7 @@ export const SearchSection: React.FC<SearchSectionProps> = ({
useObjectState<ValidQuestionResponse>(VALID_QUESTION_RESPONSE_DEFAULT);
// Filters
const [timeRange, setTimeRange] = useTimeRange();
const [sources, setSources] = useState<Source[]>([]);
const [selectedDocumentSets, setSelectedDocumentSets] = useState<string[]>(
[]
@@ -141,6 +142,7 @@ export const SearchSection: React.FC<SearchSectionProps> = ({
query,
sources,
documentSets: selectedDocumentSets,
timeRange,
updateCurrentAnswer: cancellable({
cancellationToken: lastSearchCancellationToken.current,
fn: updateCurrentAnswer,
@@ -188,9 +190,11 @@ export const SearchSection: React.FC<SearchSectionProps> = ({
return (
<div className="relative max-w-[2000px] xl:max-w-[1400px] mx-auto">
<div className="absolute left-0 hidden 2xl:block w-64">
<div className="absolute left-0 2xl:block w-64">
{(connectors.length > 0 || documentSets.length > 0) && (
<SourceSelector
timeRange={timeRange}
setTimeRange={setTimeRange}
selectedSources={sources}
setSelectedSources={setSources}
selectedDocumentSets={selectedDocumentSets}
@@ -200,7 +204,7 @@ export const SearchSection: React.FC<SearchSectionProps> = ({
/>
)}
<div className="mt-10">
<div className="mt-10 pr-2">
<SearchHelper
isFetching={isFetching}
searchResponse={searchResponse}

6
web/src/lib/dateUtils.ts Normal file
View File

@@ -0,0 +1,6 @@
export function getXDaysAgo(daysAgo: number) {
const today = new Date();
const daysAgoDate = new Date(today);
daysAgoDate.setDate(today.getDate() - daysAgo);
return daysAgoDate;
}

View File

@@ -6,6 +6,7 @@ import {
import useSWR, { mutate, useSWRConfig } from "swr";
import { fetcher } from "./fetcher";
import { useState } from "react";
import { DateRangePickerValue } from "@tremor/react";
const CREDENTIAL_URL = "/api/manage/admin/credential";
@@ -68,3 +69,7 @@ export const useConnectorCredentialIndexingStatus = (
refreshIndexingStatus: () => mutate(INDEXING_STATUS_URL),
};
};
export const useTimeRange = (initialValue?: DateRangePickerValue) => {
return useState<DateRangePickerValue | null>(null);
};

View File

@@ -1,3 +1,4 @@
import { DateRangePickerValue } from "@tremor/react";
import { ValidSources } from "../types";
export const FlowType = {
@@ -35,6 +36,7 @@ export interface DanswerDocument {
hidden: boolean;
score: number;
match_highlights: string[];
updated_at: string | null;
}
export interface SearchResponse {
@@ -61,6 +63,7 @@ export interface SearchRequestArgs {
query: string;
sources: Source[];
documentSets: string[];
timeRange: DateRangePickerValue | null;
updateCurrentAnswer: (val: string) => void;
updateQuotes: (quotes: Quote[]) => void;
updateDocs: (documents: DanswerDocument[]) => void;

View File

@@ -11,6 +11,7 @@ export const searchRequest = async ({
query,
sources,
documentSets,
timeRange,
updateCurrentAnswer,
updateQuotes,
updateDocs,
@@ -29,7 +30,7 @@ export const searchRequest = async ({
let quotes: Quote[] | null = null;
let relevantDocuments: DanswerDocument[] | null = null;
try {
const filters = buildFilters(sources, documentSets);
const filters = buildFilters(sources, documentSets, timeRange);
const response = await fetch("/api/direct-qa", {
method: "POST",
body: JSON.stringify({

View File

@@ -56,6 +56,7 @@ export const searchRequestStreamed = async ({
query,
sources,
documentSets,
timeRange,
updateCurrentAnswer,
updateQuotes,
updateDocs,
@@ -75,7 +76,7 @@ export const searchRequestStreamed = async ({
let quotes: Quote[] | null = null;
let relevantDocuments: DanswerDocument[] | null = null;
try {
const filters = buildFilters(sources, documentSets);
const filters = buildFilters(sources, documentSets, timeRange);
const response = await fetch("/api/stream-direct-qa", {
method: "POST",
body: JSON.stringify({

View File

@@ -1,12 +1,16 @@
import { Source } from "./interfaces";
import { DateRangePickerValue } from "@tremor/react";
export const buildFilters = (sources: Source[], documentSets: string[]) => {
export const buildFilters = (
sources: Source[],
documentSets: string[],
timeRange: DateRangePickerValue | null
) => {
const filters = {
source_type:
sources.length > 0 ? sources.map((source) => source.internalName) : null,
document_set: documentSets.length > 0 ? documentSets : null,
// TODO make this a date selector
time_cutoff: null,
time_cutoff: timeRange?.from ? timeRange.from : null,
};
return filters;

View File

@@ -1,3 +1,10 @@
const conditionallyAddPlural = (noun: string, cnt: number) => {
if (cnt > 1) {
return `${noun}s`;
}
return noun;
};
export const timeAgo = (
dateString: string | undefined | null
): string | null => {
@@ -10,29 +17,40 @@ export const timeAgo = (
const secondsDiff = Math.floor((now.getTime() - date.getTime()) / 1000);
if (secondsDiff < 60) {
return `${secondsDiff} second(s) ago`;
return `${secondsDiff} ${conditionallyAddPlural(
"second",
secondsDiff
)} ago`;
}
const minutesDiff = Math.floor(secondsDiff / 60);
if (minutesDiff < 60) {
return `${minutesDiff} minute(s) ago`;
return `${minutesDiff} ${conditionallyAddPlural(
"minute",
secondsDiff
)} ago`;
}
const hoursDiff = Math.floor(minutesDiff / 60);
if (hoursDiff < 24) {
return `${hoursDiff} hour(s) ago`;
return `${hoursDiff} ${conditionallyAddPlural("hour", hoursDiff)} ago`;
}
const daysDiff = Math.floor(hoursDiff / 24);
if (daysDiff < 30) {
return `${daysDiff} day(s) ago`;
return `${daysDiff} ${conditionallyAddPlural("day", daysDiff)} ago`;
}
const weeksDiff = Math.floor(daysDiff / 7);
if (weeksDiff < 4) {
return `${weeksDiff} ${conditionallyAddPlural("week", weeksDiff)} ago`;
}
const monthsDiff = Math.floor(daysDiff / 30);
if (monthsDiff < 12) {
return `${monthsDiff} month(s) ago`;
return `${monthsDiff} ${conditionallyAddPlural("month", monthsDiff)} ago`;
}
const yearsDiff = Math.floor(monthsDiff / 12);
return `${yearsDiff} year(s) ago`;
return `${yearsDiff} ${conditionallyAddPlural("year", yearsDiff)} ago`;
};