mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-07 11:28:09 +02:00
fast but broken
This commit is contained in:
parent
48b07462e8
commit
7fd59894c9
@ -24,10 +24,10 @@ from onyx.chat.chat_utils import prepare_chat_message_request
|
||||
from onyx.chat.models import PersonaOverrideConfig
|
||||
from onyx.chat.process_message import ChatPacketStream
|
||||
from onyx.chat.process_message import stream_chat_message_objects
|
||||
from onyx.configs.app_configs import FAST_SEARCH_MAX_HITS
|
||||
from onyx.configs.onyxbot_configs import MAX_THREAD_CONTEXT_PERCENTAGE
|
||||
from onyx.context.search.fast_search import FAST_SEARCH_MAX_HITS
|
||||
from onyx.context.search.fast_search import run_fast_search
|
||||
from onyx.context.search.models import RetrievalOptions
|
||||
from onyx.context.search.enums import LLMEvaluationType
|
||||
from onyx.context.search.models import BaseFilters
|
||||
from onyx.context.search.models import SavedSearchDocWithContent
|
||||
from onyx.context.search.models import SearchRequest
|
||||
from onyx.context.search.pipeline import SearchPipeline
|
||||
@ -35,19 +35,16 @@ from onyx.context.search.utils import dedupe_documents
|
||||
from onyx.context.search.utils import drop_llm_indices
|
||||
from onyx.context.search.utils import relevant_sections_to_indices
|
||||
from onyx.db.chat import get_prompt_by_id
|
||||
from onyx.db.dependencies import get_session
|
||||
from onyx.db.engine import get_session
|
||||
from onyx.db.models import Persona
|
||||
from onyx.db.models import User
|
||||
from onyx.db.persona import get_persona_by_id
|
||||
from onyx.llm.factory import AllLLMs
|
||||
from onyx.llm.factory import AllModelProviders
|
||||
from onyx.llm.factory import get_default_llms
|
||||
from onyx.llm.factory import get_llms_for_persona
|
||||
from onyx.llm.factory import get_main_llm_from_tuple
|
||||
from onyx.llm.utils import get_max_input_tokens
|
||||
from onyx.natural_language_processing.utils import get_tokenizer
|
||||
from onyx.server.utils import get_json_line
|
||||
from onyx.utils.license import check_user_license_if_ee_feature
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
|
||||
@ -297,7 +294,9 @@ class FastSearchRequest(BaseModel):
|
||||
"""Request for fast search endpoint that returns raw search results without section merging."""
|
||||
|
||||
query: str
|
||||
retrieval_options: Optional[RetrievalOptions] = None
|
||||
filters: BaseFilters | None = (
|
||||
None # Direct filter options instead of retrieval_options
|
||||
)
|
||||
max_results: Optional[
|
||||
int
|
||||
] = None # If not provided, defaults to FAST_SEARCH_MAX_HITS
|
||||
@ -309,7 +308,7 @@ class FastSearchResult(BaseModel):
|
||||
document_id: str
|
||||
chunk_id: int
|
||||
content: str
|
||||
source_links: list[str] = []
|
||||
source_links: dict[int, str] | None = None
|
||||
score: Optional[float] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
@ -333,54 +332,57 @@ def get_fast_search_response(
|
||||
of section expansion, reranking, relevance evaluation, and merging.
|
||||
"""
|
||||
try:
|
||||
# Set up the search request
|
||||
# Set up the search request with optimized settings
|
||||
max_results = request.max_results or FAST_SEARCH_MAX_HITS
|
||||
|
||||
# Create a search request with optimized settings
|
||||
search_request = SearchRequest(
|
||||
query=request.query,
|
||||
retrieval_options=request.retrieval_options,
|
||||
human_selected_filters=request.filters,
|
||||
# Skip section expansion
|
||||
chunks_above=0,
|
||||
chunks_below=0,
|
||||
# Skip LLM evaluation
|
||||
evaluation_type=LLMEvaluationType.SKIP,
|
||||
# Limit the number of results
|
||||
limit=max_results,
|
||||
)
|
||||
|
||||
# Set up the LLM instances
|
||||
with AllModelProviders() as all_model_providers:
|
||||
with AllLLMs(
|
||||
model_providers=all_model_providers,
|
||||
persona=Persona(
|
||||
id="default",
|
||||
name="Default",
|
||||
llm_relevance_filter=False,
|
||||
),
|
||||
db_session=db_session,
|
||||
) as llm_instances:
|
||||
# Get user's license status
|
||||
check_user_license_if_ee_feature(user, db_session, "fast_search")
|
||||
|
||||
# Run the fast search
|
||||
max_results = request.max_results or FAST_SEARCH_MAX_HITS
|
||||
chunks = run_fast_search(
|
||||
search_request=search_request,
|
||||
user=user,
|
||||
llm=llm_instances.llm,
|
||||
fast_llm=llm_instances.fast_llm,
|
||||
db_session=db_session,
|
||||
max_results=max_results,
|
||||
)
|
||||
llm, fast_llm = get_default_llms()
|
||||
|
||||
# Convert chunks to response format
|
||||
results = [
|
||||
FastSearchResult(
|
||||
document_id=chunk.document_id,
|
||||
chunk_id=chunk.chunk_id,
|
||||
content=chunk.content,
|
||||
source_links=chunk.source_links,
|
||||
score=chunk.score,
|
||||
metadata=chunk.metadata,
|
||||
)
|
||||
for chunk in chunks
|
||||
]
|
||||
# Create the search pipeline with optimized settings
|
||||
search_pipeline = SearchPipeline(
|
||||
search_request=search_request,
|
||||
user=user,
|
||||
llm=llm,
|
||||
fast_llm=fast_llm,
|
||||
skip_query_analysis=True, # Skip expensive query analysis
|
||||
db_session=db_session,
|
||||
bypass_acl=False,
|
||||
)
|
||||
|
||||
return FastSearchResponse(
|
||||
results=results,
|
||||
total_found=len(results),
|
||||
)
|
||||
# Only retrieve chunks without further processing
|
||||
chunks = search_pipeline._get_chunks()
|
||||
|
||||
# Convert chunks to response format
|
||||
results = [
|
||||
FastSearchResult(
|
||||
document_id=chunk.document_id,
|
||||
chunk_id=chunk.chunk_id,
|
||||
content=chunk.content,
|
||||
source_links=chunk.source_links,
|
||||
score=chunk.score,
|
||||
metadata=chunk.metadata,
|
||||
)
|
||||
for chunk in chunks
|
||||
]
|
||||
|
||||
return FastSearchResponse(
|
||||
results=results,
|
||||
total_found=len(results),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("Error in fast search")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
@ -667,3 +667,5 @@ IMAGE_ANALYSIS_SYSTEM_PROMPT = os.environ.get(
|
||||
"IMAGE_ANALYSIS_SYSTEM_PROMPT",
|
||||
DEFAULT_IMAGE_ANALYSIS_SYSTEM_PROMPT,
|
||||
)
|
||||
|
||||
FAST_SEARCH_MAX_HITS = 300
|
||||
|
@ -1,182 +0,0 @@
|
||||
from collections.abc import Callable
|
||||
from typing import cast
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.context.search.enums import QueryFlow
|
||||
from onyx.context.search.enums import SearchType
|
||||
from onyx.context.search.models import IndexFilters
|
||||
from onyx.context.search.models import InferenceChunk
|
||||
from onyx.context.search.models import RetrievalMetricsContainer
|
||||
from onyx.context.search.models import SearchQuery
|
||||
from onyx.context.search.models import SearchRequest
|
||||
from onyx.context.search.retrieval.search_runner import retrieve_chunks
|
||||
from onyx.db.models import User
|
||||
from onyx.db.search_settings import get_current_search_settings
|
||||
from onyx.document_index.factory import get_default_document_index
|
||||
from onyx.llm.interfaces import LLM
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
# Constant for the maximum number of search results to return in fast search
|
||||
FAST_SEARCH_MAX_HITS = 300
|
||||
|
||||
|
||||
class FastSearchPipeline:
|
||||
"""A streamlined version of SearchPipeline that only retrieves chunks without section expansion or merging.
|
||||
|
||||
This is optimized for quickly returning a large number of search results without the overhead
|
||||
of section expansion, reranking, and relevance evaluation.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
search_request: SearchRequest,
|
||||
user: User | None,
|
||||
llm: LLM,
|
||||
fast_llm: LLM,
|
||||
skip_query_analysis: bool,
|
||||
db_session: Session,
|
||||
bypass_acl: bool = False,
|
||||
retrieval_metrics_callback: Optional[
|
||||
Callable[[RetrievalMetricsContainer], None]
|
||||
] = None,
|
||||
max_results: int = FAST_SEARCH_MAX_HITS,
|
||||
):
|
||||
self.search_request = search_request
|
||||
self.user = user
|
||||
self.llm = llm
|
||||
self.fast_llm = fast_llm
|
||||
self.skip_query_analysis = skip_query_analysis
|
||||
self.db_session = db_session
|
||||
self.bypass_acl = bypass_acl
|
||||
self.retrieval_metrics_callback = retrieval_metrics_callback
|
||||
self.max_results = max_results
|
||||
|
||||
self.search_settings = get_current_search_settings(db_session)
|
||||
self.document_index = get_default_document_index(self.search_settings, None)
|
||||
|
||||
# Preprocessing steps generate this
|
||||
self._search_query: Optional[SearchQuery] = None
|
||||
self._predicted_search_type: Optional[SearchType] = None
|
||||
|
||||
# Initial document index retrieval chunks
|
||||
self._retrieved_chunks: Optional[list[InferenceChunk]] = None
|
||||
|
||||
# Default flow type
|
||||
self._predicted_flow: Optional[QueryFlow] = QueryFlow.QUESTION_ANSWER
|
||||
|
||||
def _run_preprocessing(self) -> None:
|
||||
"""Run a simplified version of preprocessing that only prepares the search query.
|
||||
|
||||
This skips complex query analysis and just focuses on preparing the basic search parameters.
|
||||
"""
|
||||
# Create a simplified search query with the necessary parameters
|
||||
self._search_query = SearchQuery(
|
||||
query=self.search_request.query,
|
||||
search_type=self.search_request.search_type,
|
||||
filters=self.search_request.human_selected_filters
|
||||
or IndexFilters(access_control_list=None),
|
||||
hybrid_alpha=0.5, # Default hybrid search balance
|
||||
recency_bias_multiplier=self.search_request.recency_bias_multiplier or 1.0,
|
||||
num_hits=self.max_results, # Use the higher limit here
|
||||
offset=self.search_request.offset or 0,
|
||||
chunks_above=0, # Skip section expansion
|
||||
chunks_below=0, # Skip section expansion
|
||||
precomputed_query_embedding=self.search_request.precomputed_query_embedding,
|
||||
precomputed_is_keyword=self.search_request.precomputed_is_keyword,
|
||||
processed_keywords=self.search_request.precomputed_keywords,
|
||||
)
|
||||
self._predicted_search_type = self._search_query.search_type
|
||||
|
||||
@property
|
||||
def search_query(self) -> SearchQuery:
|
||||
"""Get the search query, running preprocessing if necessary."""
|
||||
if self._search_query is not None:
|
||||
return self._search_query
|
||||
|
||||
self._run_preprocessing()
|
||||
return cast(SearchQuery, self._search_query)
|
||||
|
||||
@property
|
||||
def predicted_search_type(self) -> SearchType:
|
||||
"""Get the predicted search type."""
|
||||
if self._predicted_search_type is not None:
|
||||
return self._predicted_search_type
|
||||
|
||||
self._run_preprocessing()
|
||||
return cast(SearchType, self._predicted_search_type)
|
||||
|
||||
@property
|
||||
def predicted_flow(self) -> QueryFlow:
|
||||
"""Get the predicted query flow."""
|
||||
if self._predicted_flow is not None:
|
||||
return self._predicted_flow
|
||||
|
||||
self._run_preprocessing()
|
||||
return cast(QueryFlow, self._predicted_flow)
|
||||
|
||||
@property
|
||||
def retrieved_chunks(self) -> list[InferenceChunk]:
|
||||
"""Get the retrieved chunks from the document index."""
|
||||
if self._retrieved_chunks is not None:
|
||||
return self._retrieved_chunks
|
||||
|
||||
# Use the existing retrieve_chunks function with our search query
|
||||
self._retrieved_chunks = retrieve_chunks(
|
||||
query=self.search_query,
|
||||
document_index=self.document_index,
|
||||
db_session=self.db_session,
|
||||
retrieval_metrics_callback=self.retrieval_metrics_callback,
|
||||
)
|
||||
|
||||
return self._retrieved_chunks
|
||||
|
||||
|
||||
def run_fast_search(
|
||||
search_request: SearchRequest,
|
||||
user: User | None,
|
||||
llm: LLM,
|
||||
fast_llm: LLM,
|
||||
db_session: Session,
|
||||
max_results: int = FAST_SEARCH_MAX_HITS,
|
||||
) -> list[InferenceChunk]:
|
||||
"""Run a fast search that returns up to 300 results without section expansion or merging.
|
||||
|
||||
Args:
|
||||
search_request: The search request containing the query and filters
|
||||
user: The current user
|
||||
llm: The main LLM instance
|
||||
fast_llm: The faster LLM instance for some operations
|
||||
db_session: The database session
|
||||
max_results: Maximum number of results to return (default: 300)
|
||||
|
||||
Returns:
|
||||
A list of InferenceChunk objects representing the search results
|
||||
"""
|
||||
# Create a modified search request with optimized parameters
|
||||
# Skip unnecessary processing by setting these properties
|
||||
modified_request = search_request.model_copy(
|
||||
update={
|
||||
"chunks_above": 0, # Skip section expansion
|
||||
"chunks_below": 0, # Skip section expansion
|
||||
"evaluation_type": None, # Skip LLM evaluation
|
||||
"limit": max_results, # Use higher limit
|
||||
}
|
||||
)
|
||||
|
||||
# Create and run the fast search pipeline
|
||||
pipeline = FastSearchPipeline(
|
||||
search_request=modified_request,
|
||||
user=user,
|
||||
llm=llm,
|
||||
fast_llm=fast_llm,
|
||||
skip_query_analysis=True, # Skip complex query analysis
|
||||
db_session=db_session,
|
||||
max_results=max_results,
|
||||
)
|
||||
|
||||
# Just get the retrieved chunks without further processing
|
||||
return pipeline.retrieved_chunks
|
@ -317,14 +317,10 @@ export function ChatPage({
|
||||
(assistant) => assistant.id === existingChatSessionAssistantId
|
||||
)
|
||||
: defaultAssistantId !== undefined
|
||||
? availableAssistants.find(
|
||||
(assistant) => assistant.id === defaultAssistantId
|
||||
)
|
||||
: undefined
|
||||
);
|
||||
// Gather default temperature settings
|
||||
const search_param_temperature = searchParams.get(
|
||||
SEARCH_PARAM_NAMES.TEMPERATURE
|
||||
? availableAssistants.find(
|
||||
(assistant) => assistant.id === defaultAssistantId
|
||||
)
|
||||
: undefined
|
||||
);
|
||||
|
||||
const setSelectedAssistantFromId = (assistantId: number) => {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -43,12 +43,8 @@ export function SearchResults({
|
||||
|
||||
return (
|
||||
<div className="flex flex-col w-full">
|
||||
{documents.map((doc) => (
|
||||
<SearchResultItem
|
||||
key={doc.document_id}
|
||||
document={doc}
|
||||
onClick={onDocumentClick}
|
||||
/>
|
||||
{documents.map((doc, ind) => (
|
||||
<SearchResultItem key={ind} document={doc} onClick={onDocumentClick} />
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
|
@ -17,6 +17,25 @@ export interface SearchStreamResponse {
|
||||
error: string | null;
|
||||
}
|
||||
|
||||
// Define interface matching FastSearchResult
|
||||
interface FastSearchResult {
|
||||
document_id: string;
|
||||
chunk_id: number;
|
||||
content: string;
|
||||
source_links: string[];
|
||||
score?: number;
|
||||
metadata?: {
|
||||
source_type?: string;
|
||||
semantic_identifier?: string;
|
||||
boost?: number;
|
||||
hidden?: boolean;
|
||||
updated_at?: string;
|
||||
primary_owners?: string[];
|
||||
secondary_owners?: string[];
|
||||
[key: string]: any;
|
||||
};
|
||||
}
|
||||
|
||||
export async function* streamSearchWithCitation({
|
||||
query,
|
||||
persona,
|
||||
@ -34,24 +53,16 @@ export async function* streamSearchWithCitation({
|
||||
}): AsyncGenerator<SearchStreamResponse> {
|
||||
const filters = buildFilters(sources, documentSets, timeRange, tags);
|
||||
|
||||
const response = await fetch("/api/query/search", {
|
||||
// Use the fast-search endpoint instead
|
||||
const response = await fetch("/api/query/fast-search", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
persona_id: persona.id,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
message: query,
|
||||
},
|
||||
],
|
||||
retrieval_options: {
|
||||
filters: filters,
|
||||
favor_recent: true,
|
||||
},
|
||||
skip_gen_ai_answer_generation: false,
|
||||
query: query,
|
||||
filters: filters,
|
||||
max_results: 300, // Use the default max results for fast search
|
||||
}),
|
||||
});
|
||||
|
||||
@ -65,43 +76,59 @@ export async function* streamSearchWithCitation({
|
||||
return;
|
||||
}
|
||||
|
||||
let currentAnswer = "";
|
||||
let documents: OnyxDocument[] = [];
|
||||
let error: string | null = null;
|
||||
// Since fast-search is not streaming, we need to process the complete response
|
||||
const searchResults = await response.json();
|
||||
|
||||
for await (const packet of handleSSEStream(response)) {
|
||||
if ("error" in packet && packet.error) {
|
||||
error = (packet as StreamingError).error;
|
||||
yield {
|
||||
answer: currentAnswer,
|
||||
documents,
|
||||
error,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
// Convert results to OnyxDocument format
|
||||
const documents: OnyxDocument[] = searchResults.results.map(
|
||||
(result: FastSearchResult) => {
|
||||
// Create a blurb from the content (first 200 chars)
|
||||
const blurb =
|
||||
result.content.substring(0, 200) +
|
||||
(result.content.length > 200 ? "..." : "");
|
||||
|
||||
if ("answer_piece" in packet && packet.answer_piece) {
|
||||
currentAnswer += (packet as AnswerPiecePacket).answer_piece;
|
||||
yield {
|
||||
answer: currentAnswer,
|
||||
documents,
|
||||
error,
|
||||
// Get the source link if available
|
||||
const link =
|
||||
result.source_links && result.source_links.length > 0
|
||||
? result.source_links[0]
|
||||
: null;
|
||||
|
||||
// Convert to OnyxDocument format
|
||||
return {
|
||||
document_id: result.document_id,
|
||||
chunk_ind: result.chunk_id,
|
||||
content: result.content,
|
||||
source_type: result.metadata?.source_type || "unknown",
|
||||
semantic_identifier: result.metadata?.semantic_identifier || "Unknown",
|
||||
score: result.score || 0,
|
||||
metadata: result.metadata || {},
|
||||
match_highlights: [],
|
||||
is_internet: false,
|
||||
link: link,
|
||||
updated_at: result.metadata?.updated_at
|
||||
? new Date(result.metadata.updated_at).toISOString()
|
||||
: null,
|
||||
blurb: blurb,
|
||||
primary_owners: result.metadata?.primary_owners || [],
|
||||
secondary_owners: result.metadata?.secondary_owners || [],
|
||||
boost: result.metadata?.boost || 0,
|
||||
hidden: result.metadata?.hidden || false,
|
||||
validationState: null,
|
||||
};
|
||||
}
|
||||
);
|
||||
|
||||
if ("top_documents" in packet && packet.top_documents) {
|
||||
documents = (packet as DocumentInfoPacket).top_documents;
|
||||
yield {
|
||||
answer: currentAnswer,
|
||||
documents,
|
||||
error,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// First yield just the documents to maintain similar streaming behavior
|
||||
yield {
|
||||
answer: currentAnswer,
|
||||
answer: null,
|
||||
documents,
|
||||
error,
|
||||
error: null,
|
||||
};
|
||||
|
||||
// Final yield with completed results
|
||||
yield {
|
||||
answer: null,
|
||||
documents,
|
||||
error: null,
|
||||
};
|
||||
}
|
||||
|
@ -10,7 +10,12 @@ export function SourceIcon({
|
||||
sourceType: ValidSources;
|
||||
iconSize: number;
|
||||
}) {
|
||||
return getSourceMetadata(sourceType).icon({
|
||||
size: iconSize,
|
||||
});
|
||||
try {
|
||||
return getSourceMetadata(sourceType).icon({
|
||||
size: iconSize,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Error getting source icon:", error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user