fast but broken

This commit is contained in:
pablonyx 2025-03-14 12:28:11 -07:00
parent 48b07462e8
commit 7fd59894c9
8 changed files with 147 additions and 1330 deletions

View File

@ -24,10 +24,10 @@ from onyx.chat.chat_utils import prepare_chat_message_request
from onyx.chat.models import PersonaOverrideConfig
from onyx.chat.process_message import ChatPacketStream
from onyx.chat.process_message import stream_chat_message_objects
from onyx.configs.app_configs import FAST_SEARCH_MAX_HITS
from onyx.configs.onyxbot_configs import MAX_THREAD_CONTEXT_PERCENTAGE
from onyx.context.search.fast_search import FAST_SEARCH_MAX_HITS
from onyx.context.search.fast_search import run_fast_search
from onyx.context.search.models import RetrievalOptions
from onyx.context.search.enums import LLMEvaluationType
from onyx.context.search.models import BaseFilters
from onyx.context.search.models import SavedSearchDocWithContent
from onyx.context.search.models import SearchRequest
from onyx.context.search.pipeline import SearchPipeline
@ -35,19 +35,16 @@ from onyx.context.search.utils import dedupe_documents
from onyx.context.search.utils import drop_llm_indices
from onyx.context.search.utils import relevant_sections_to_indices
from onyx.db.chat import get_prompt_by_id
from onyx.db.dependencies import get_session
from onyx.db.engine import get_session
from onyx.db.models import Persona
from onyx.db.models import User
from onyx.db.persona import get_persona_by_id
from onyx.llm.factory import AllLLMs
from onyx.llm.factory import AllModelProviders
from onyx.llm.factory import get_default_llms
from onyx.llm.factory import get_llms_for_persona
from onyx.llm.factory import get_main_llm_from_tuple
from onyx.llm.utils import get_max_input_tokens
from onyx.natural_language_processing.utils import get_tokenizer
from onyx.server.utils import get_json_line
from onyx.utils.license import check_user_license_if_ee_feature
from onyx.utils.logger import setup_logger
@ -297,7 +294,9 @@ class FastSearchRequest(BaseModel):
"""Request for fast search endpoint that returns raw search results without section merging."""
query: str
retrieval_options: Optional[RetrievalOptions] = None
filters: BaseFilters | None = (
None # Direct filter options instead of retrieval_options
)
max_results: Optional[
int
] = None # If not provided, defaults to FAST_SEARCH_MAX_HITS
@ -309,7 +308,7 @@ class FastSearchResult(BaseModel):
document_id: str
chunk_id: int
content: str
source_links: list[str] = []
source_links: dict[int, str] | None = None
score: Optional[float] = None
metadata: Optional[dict] = None
@ -333,54 +332,57 @@ def get_fast_search_response(
of section expansion, reranking, relevance evaluation, and merging.
"""
try:
# Set up the search request
# Set up the search request with optimized settings
max_results = request.max_results or FAST_SEARCH_MAX_HITS
# Create a search request with optimized settings
search_request = SearchRequest(
query=request.query,
retrieval_options=request.retrieval_options,
human_selected_filters=request.filters,
# Skip section expansion
chunks_above=0,
chunks_below=0,
# Skip LLM evaluation
evaluation_type=LLMEvaluationType.SKIP,
# Limit the number of results
limit=max_results,
)
# Set up the LLM instances
with AllModelProviders() as all_model_providers:
with AllLLMs(
model_providers=all_model_providers,
persona=Persona(
id="default",
name="Default",
llm_relevance_filter=False,
),
db_session=db_session,
) as llm_instances:
# Get user's license status
check_user_license_if_ee_feature(user, db_session, "fast_search")
# Run the fast search
max_results = request.max_results or FAST_SEARCH_MAX_HITS
chunks = run_fast_search(
search_request=search_request,
user=user,
llm=llm_instances.llm,
fast_llm=llm_instances.fast_llm,
db_session=db_session,
max_results=max_results,
)
llm, fast_llm = get_default_llms()
# Convert chunks to response format
results = [
FastSearchResult(
document_id=chunk.document_id,
chunk_id=chunk.chunk_id,
content=chunk.content,
source_links=chunk.source_links,
score=chunk.score,
metadata=chunk.metadata,
)
for chunk in chunks
]
# Create the search pipeline with optimized settings
search_pipeline = SearchPipeline(
search_request=search_request,
user=user,
llm=llm,
fast_llm=fast_llm,
skip_query_analysis=True, # Skip expensive query analysis
db_session=db_session,
bypass_acl=False,
)
return FastSearchResponse(
results=results,
total_found=len(results),
)
# Only retrieve chunks without further processing
chunks = search_pipeline._get_chunks()
# Convert chunks to response format
results = [
FastSearchResult(
document_id=chunk.document_id,
chunk_id=chunk.chunk_id,
content=chunk.content,
source_links=chunk.source_links,
score=chunk.score,
metadata=chunk.metadata,
)
for chunk in chunks
]
return FastSearchResponse(
results=results,
total_found=len(results),
)
except Exception as e:
logger.exception("Error in fast search")
raise HTTPException(status_code=500, detail=str(e))

View File

@ -667,3 +667,5 @@ IMAGE_ANALYSIS_SYSTEM_PROMPT = os.environ.get(
"IMAGE_ANALYSIS_SYSTEM_PROMPT",
DEFAULT_IMAGE_ANALYSIS_SYSTEM_PROMPT,
)
FAST_SEARCH_MAX_HITS = 300

View File

@ -1,182 +0,0 @@
from collections.abc import Callable
from typing import cast
from typing import Optional
from sqlalchemy.orm import Session
from onyx.context.search.enums import QueryFlow
from onyx.context.search.enums import SearchType
from onyx.context.search.models import IndexFilters
from onyx.context.search.models import InferenceChunk
from onyx.context.search.models import RetrievalMetricsContainer
from onyx.context.search.models import SearchQuery
from onyx.context.search.models import SearchRequest
from onyx.context.search.retrieval.search_runner import retrieve_chunks
from onyx.db.models import User
from onyx.db.search_settings import get_current_search_settings
from onyx.document_index.factory import get_default_document_index
from onyx.llm.interfaces import LLM
from onyx.utils.logger import setup_logger
logger = setup_logger()
# Constant for the maximum number of search results to return in fast search
FAST_SEARCH_MAX_HITS = 300
class FastSearchPipeline:
"""A streamlined version of SearchPipeline that only retrieves chunks without section expansion or merging.
This is optimized for quickly returning a large number of search results without the overhead
of section expansion, reranking, and relevance evaluation.
"""
def __init__(
self,
search_request: SearchRequest,
user: User | None,
llm: LLM,
fast_llm: LLM,
skip_query_analysis: bool,
db_session: Session,
bypass_acl: bool = False,
retrieval_metrics_callback: Optional[
Callable[[RetrievalMetricsContainer], None]
] = None,
max_results: int = FAST_SEARCH_MAX_HITS,
):
self.search_request = search_request
self.user = user
self.llm = llm
self.fast_llm = fast_llm
self.skip_query_analysis = skip_query_analysis
self.db_session = db_session
self.bypass_acl = bypass_acl
self.retrieval_metrics_callback = retrieval_metrics_callback
self.max_results = max_results
self.search_settings = get_current_search_settings(db_session)
self.document_index = get_default_document_index(self.search_settings, None)
# Preprocessing steps generate this
self._search_query: Optional[SearchQuery] = None
self._predicted_search_type: Optional[SearchType] = None
# Initial document index retrieval chunks
self._retrieved_chunks: Optional[list[InferenceChunk]] = None
# Default flow type
self._predicted_flow: Optional[QueryFlow] = QueryFlow.QUESTION_ANSWER
def _run_preprocessing(self) -> None:
"""Run a simplified version of preprocessing that only prepares the search query.
This skips complex query analysis and just focuses on preparing the basic search parameters.
"""
# Create a simplified search query with the necessary parameters
self._search_query = SearchQuery(
query=self.search_request.query,
search_type=self.search_request.search_type,
filters=self.search_request.human_selected_filters
or IndexFilters(access_control_list=None),
hybrid_alpha=0.5, # Default hybrid search balance
recency_bias_multiplier=self.search_request.recency_bias_multiplier or 1.0,
num_hits=self.max_results, # Use the higher limit here
offset=self.search_request.offset or 0,
chunks_above=0, # Skip section expansion
chunks_below=0, # Skip section expansion
precomputed_query_embedding=self.search_request.precomputed_query_embedding,
precomputed_is_keyword=self.search_request.precomputed_is_keyword,
processed_keywords=self.search_request.precomputed_keywords,
)
self._predicted_search_type = self._search_query.search_type
@property
def search_query(self) -> SearchQuery:
"""Get the search query, running preprocessing if necessary."""
if self._search_query is not None:
return self._search_query
self._run_preprocessing()
return cast(SearchQuery, self._search_query)
@property
def predicted_search_type(self) -> SearchType:
"""Get the predicted search type."""
if self._predicted_search_type is not None:
return self._predicted_search_type
self._run_preprocessing()
return cast(SearchType, self._predicted_search_type)
@property
def predicted_flow(self) -> QueryFlow:
"""Get the predicted query flow."""
if self._predicted_flow is not None:
return self._predicted_flow
self._run_preprocessing()
return cast(QueryFlow, self._predicted_flow)
@property
def retrieved_chunks(self) -> list[InferenceChunk]:
"""Get the retrieved chunks from the document index."""
if self._retrieved_chunks is not None:
return self._retrieved_chunks
# Use the existing retrieve_chunks function with our search query
self._retrieved_chunks = retrieve_chunks(
query=self.search_query,
document_index=self.document_index,
db_session=self.db_session,
retrieval_metrics_callback=self.retrieval_metrics_callback,
)
return self._retrieved_chunks
def run_fast_search(
search_request: SearchRequest,
user: User | None,
llm: LLM,
fast_llm: LLM,
db_session: Session,
max_results: int = FAST_SEARCH_MAX_HITS,
) -> list[InferenceChunk]:
"""Run a fast search that returns up to 300 results without section expansion or merging.
Args:
search_request: The search request containing the query and filters
user: The current user
llm: The main LLM instance
fast_llm: The faster LLM instance for some operations
db_session: The database session
max_results: Maximum number of results to return (default: 300)
Returns:
A list of InferenceChunk objects representing the search results
"""
# Create a modified search request with optimized parameters
# Skip unnecessary processing by setting these properties
modified_request = search_request.model_copy(
update={
"chunks_above": 0, # Skip section expansion
"chunks_below": 0, # Skip section expansion
"evaluation_type": None, # Skip LLM evaluation
"limit": max_results, # Use higher limit
}
)
# Create and run the fast search pipeline
pipeline = FastSearchPipeline(
search_request=modified_request,
user=user,
llm=llm,
fast_llm=fast_llm,
skip_query_analysis=True, # Skip complex query analysis
db_session=db_session,
max_results=max_results,
)
# Just get the retrieved chunks without further processing
return pipeline.retrieved_chunks

View File

@ -317,14 +317,10 @@ export function ChatPage({
(assistant) => assistant.id === existingChatSessionAssistantId
)
: defaultAssistantId !== undefined
? availableAssistants.find(
(assistant) => assistant.id === defaultAssistantId
)
: undefined
);
// Gather default temperature settings
const search_param_temperature = searchParams.get(
SEARCH_PARAM_NAMES.TEMPERATURE
? availableAssistants.find(
(assistant) => assistant.id === defaultAssistantId
)
: undefined
);
const setSelectedAssistantFromId = (assistantId: number) => {

File diff suppressed because it is too large Load Diff

View File

@ -43,12 +43,8 @@ export function SearchResults({
return (
<div className="flex flex-col w-full">
{documents.map((doc) => (
<SearchResultItem
key={doc.document_id}
document={doc}
onClick={onDocumentClick}
/>
{documents.map((doc, ind) => (
<SearchResultItem key={ind} document={doc} onClick={onDocumentClick} />
))}
</div>
);

View File

@ -17,6 +17,25 @@ export interface SearchStreamResponse {
error: string | null;
}
// Define interface matching FastSearchResult
interface FastSearchResult {
document_id: string;
chunk_id: number;
content: string;
source_links: string[];
score?: number;
metadata?: {
source_type?: string;
semantic_identifier?: string;
boost?: number;
hidden?: boolean;
updated_at?: string;
primary_owners?: string[];
secondary_owners?: string[];
[key: string]: any;
};
}
export async function* streamSearchWithCitation({
query,
persona,
@ -34,24 +53,16 @@ export async function* streamSearchWithCitation({
}): AsyncGenerator<SearchStreamResponse> {
const filters = buildFilters(sources, documentSets, timeRange, tags);
const response = await fetch("/api/query/search", {
// Use the fast-search endpoint instead
const response = await fetch("/api/query/fast-search", {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({
persona_id: persona.id,
messages: [
{
role: "user",
message: query,
},
],
retrieval_options: {
filters: filters,
favor_recent: true,
},
skip_gen_ai_answer_generation: false,
query: query,
filters: filters,
max_results: 300, // Use the default max results for fast search
}),
});
@ -65,43 +76,59 @@ export async function* streamSearchWithCitation({
return;
}
let currentAnswer = "";
let documents: OnyxDocument[] = [];
let error: string | null = null;
// Since fast-search is not streaming, we need to process the complete response
const searchResults = await response.json();
for await (const packet of handleSSEStream(response)) {
if ("error" in packet && packet.error) {
error = (packet as StreamingError).error;
yield {
answer: currentAnswer,
documents,
error,
};
continue;
}
// Convert results to OnyxDocument format
const documents: OnyxDocument[] = searchResults.results.map(
(result: FastSearchResult) => {
// Create a blurb from the content (first 200 chars)
const blurb =
result.content.substring(0, 200) +
(result.content.length > 200 ? "..." : "");
if ("answer_piece" in packet && packet.answer_piece) {
currentAnswer += (packet as AnswerPiecePacket).answer_piece;
yield {
answer: currentAnswer,
documents,
error,
// Get the source link if available
const link =
result.source_links && result.source_links.length > 0
? result.source_links[0]
: null;
// Convert to OnyxDocument format
return {
document_id: result.document_id,
chunk_ind: result.chunk_id,
content: result.content,
source_type: result.metadata?.source_type || "unknown",
semantic_identifier: result.metadata?.semantic_identifier || "Unknown",
score: result.score || 0,
metadata: result.metadata || {},
match_highlights: [],
is_internet: false,
link: link,
updated_at: result.metadata?.updated_at
? new Date(result.metadata.updated_at).toISOString()
: null,
blurb: blurb,
primary_owners: result.metadata?.primary_owners || [],
secondary_owners: result.metadata?.secondary_owners || [],
boost: result.metadata?.boost || 0,
hidden: result.metadata?.hidden || false,
validationState: null,
};
}
);
if ("top_documents" in packet && packet.top_documents) {
documents = (packet as DocumentInfoPacket).top_documents;
yield {
answer: currentAnswer,
documents,
error,
};
}
}
// First yield just the documents to maintain similar streaming behavior
yield {
answer: currentAnswer,
answer: null,
documents,
error,
error: null,
};
// Final yield with completed results
yield {
answer: null,
documents,
error: null,
};
}

View File

@ -10,7 +10,12 @@ export function SourceIcon({
sourceType: ValidSources;
iconSize: number;
}) {
return getSourceMetadata(sourceType).icon({
size: iconSize,
});
try {
return getSourceMetadata(sourceType).icon({
size: iconSize,
});
} catch (error) {
console.error("Error getting source icon:", error);
return null;
}
}