fast but broken

2025-04-07 11:28:09 +02:00 · 2025-03-14 12:28:11 -07:00 · 2025-03-14 12:28:11 -07:00 · 7fd59894c9
commit 7fd59894c9
parent 48b07462e8
8 changed files with 147 additions and 1330 deletions
--- a/backend/ee/onyx/server/query_and_chat/query_backend.py
+++ b/backend/ee/onyx/server/query_and_chat/query_backend.py
@ -24,10 +24,10 @@ from onyx.chat.chat_utils import prepare_chat_message_request
 from onyx.chat.models import PersonaOverrideConfig
 from onyx.chat.process_message import ChatPacketStream
 from onyx.chat.process_message import stream_chat_message_objects
+from onyx.configs.app_configs import FAST_SEARCH_MAX_HITS
 from onyx.configs.onyxbot_configs import MAX_THREAD_CONTEXT_PERCENTAGE
-from onyx.context.search.fast_search import FAST_SEARCH_MAX_HITS
-from onyx.context.search.fast_search import run_fast_search
-from onyx.context.search.models import RetrievalOptions
+from onyx.context.search.enums import LLMEvaluationType
+from onyx.context.search.models import BaseFilters
 from onyx.context.search.models import SavedSearchDocWithContent
 from onyx.context.search.models import SearchRequest
 from onyx.context.search.pipeline import SearchPipeline
@ -35,19 +35,16 @@ from onyx.context.search.utils import dedupe_documents
 from onyx.context.search.utils import drop_llm_indices
 from onyx.context.search.utils import relevant_sections_to_indices
 from onyx.db.chat import get_prompt_by_id
-from onyx.db.dependencies import get_session
+from onyx.db.engine import get_session
 from onyx.db.models import Persona
 from onyx.db.models import User
 from onyx.db.persona import get_persona_by_id
-from onyx.llm.factory import AllLLMs
-from onyx.llm.factory import AllModelProviders
 from onyx.llm.factory import get_default_llms
 from onyx.llm.factory import get_llms_for_persona
 from onyx.llm.factory import get_main_llm_from_tuple
 from onyx.llm.utils import get_max_input_tokens
 from onyx.natural_language_processing.utils import get_tokenizer
 from onyx.server.utils import get_json_line
-from onyx.utils.license import check_user_license_if_ee_feature
 from onyx.utils.logger import setup_logger


@ -297,7 +294,9 @@ class FastSearchRequest(BaseModel):
    """Request for fast search endpoint that returns raw search results without section merging."""

    query: str
-    retrieval_options: Optional[RetrievalOptions] = None
+    filters: BaseFilters | None = (
+        None  # Direct filter options instead of retrieval_options
+    )
    max_results: Optional[
        int
    ] = None  # If not provided, defaults to FAST_SEARCH_MAX_HITS
@ -309,7 +308,7 @@ class FastSearchResult(BaseModel):
    document_id: str
    chunk_id: int
    content: str
-    source_links: list[str] = []
+    source_links: dict[int, str] | None = None
    score: Optional[float] = None
    metadata: Optional[dict] = None

@ -333,54 +332,57 @@ def get_fast_search_response(
    of section expansion, reranking, relevance evaluation, and merging.
    """
    try:
-        # Set up the search request
+        # Set up the search request with optimized settings
+        max_results = request.max_results or FAST_SEARCH_MAX_HITS
+
+        # Create a search request with optimized settings
        search_request = SearchRequest(
            query=request.query,
-            retrieval_options=request.retrieval_options,
+            human_selected_filters=request.filters,
+            # Skip section expansion
+            chunks_above=0,
+            chunks_below=0,
+            # Skip LLM evaluation
+            evaluation_type=LLMEvaluationType.SKIP,
+            # Limit the number of results
+            limit=max_results,
        )

        # Set up the LLM instances
-        with AllModelProviders() as all_model_providers:
-            with AllLLMs(
-                model_providers=all_model_providers,
-                persona=Persona(
-                    id="default",
-                    name="Default",
-                    llm_relevance_filter=False,
-                ),
-                db_session=db_session,
-            ) as llm_instances:
-                # Get user's license status
-                check_user_license_if_ee_feature(user, db_session, "fast_search")

-                # Run the fast search
-                max_results = request.max_results or FAST_SEARCH_MAX_HITS
-                chunks = run_fast_search(
-                    search_request=search_request,
-                    user=user,
-                    llm=llm_instances.llm,
-                    fast_llm=llm_instances.fast_llm,
-                    db_session=db_session,
-                    max_results=max_results,
-                )
+        llm, fast_llm = get_default_llms()

-                # Convert chunks to response format
-                results = [
-                    FastSearchResult(
-                        document_id=chunk.document_id,
-                        chunk_id=chunk.chunk_id,
-                        content=chunk.content,
-                        source_links=chunk.source_links,
-                        score=chunk.score,
-                        metadata=chunk.metadata,
-                    )
-                    for chunk in chunks
-                ]
+        # Create the search pipeline with optimized settings
+        search_pipeline = SearchPipeline(
+            search_request=search_request,
+            user=user,
+            llm=llm,
+            fast_llm=fast_llm,
+            skip_query_analysis=True,  # Skip expensive query analysis
+            db_session=db_session,
+            bypass_acl=False,
+        )

-                return FastSearchResponse(
-                    results=results,
-                    total_found=len(results),
-                )
+        # Only retrieve chunks without further processing
+        chunks = search_pipeline._get_chunks()
+
+        # Convert chunks to response format
+        results = [
+            FastSearchResult(
+                document_id=chunk.document_id,
+                chunk_id=chunk.chunk_id,
+                content=chunk.content,
+                source_links=chunk.source_links,
+                score=chunk.score,
+                metadata=chunk.metadata,
+            )
+            for chunk in chunks
+        ]
+
+        return FastSearchResponse(
+            results=results,
+            total_found=len(results),
+        )
    except Exception as e:
        logger.exception("Error in fast search")
        raise HTTPException(status_code=500, detail=str(e))
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@ -667,3 +667,5 @@ IMAGE_ANALYSIS_SYSTEM_PROMPT = os.environ.get(
    "IMAGE_ANALYSIS_SYSTEM_PROMPT",
    DEFAULT_IMAGE_ANALYSIS_SYSTEM_PROMPT,
 )
+
+FAST_SEARCH_MAX_HITS = 300
--- a/backend/onyx/context/search/fast_search.py
+++ b/backend/onyx/context/search/fast_search.py
@ -1,182 +0,0 @@
-from collections.abc import Callable
-from typing import cast
-from typing import Optional
-
-from sqlalchemy.orm import Session
-
-from onyx.context.search.enums import QueryFlow
-from onyx.context.search.enums import SearchType
-from onyx.context.search.models import IndexFilters
-from onyx.context.search.models import InferenceChunk
-from onyx.context.search.models import RetrievalMetricsContainer
-from onyx.context.search.models import SearchQuery
-from onyx.context.search.models import SearchRequest
-from onyx.context.search.retrieval.search_runner import retrieve_chunks
-from onyx.db.models import User
-from onyx.db.search_settings import get_current_search_settings
-from onyx.document_index.factory import get_default_document_index
-from onyx.llm.interfaces import LLM
-from onyx.utils.logger import setup_logger
-
-logger = setup_logger()
-
-# Constant for the maximum number of search results to return in fast search
-FAST_SEARCH_MAX_HITS = 300
-
-
-class FastSearchPipeline:
-    """A streamlined version of SearchPipeline that only retrieves chunks without section expansion or merging.
-
-    This is optimized for quickly returning a large number of search results without the overhead
-    of section expansion, reranking, and relevance evaluation.
-    """
-
-    def __init__(
-        self,
-        search_request: SearchRequest,
-        user: User | None,
-        llm: LLM,
-        fast_llm: LLM,
-        skip_query_analysis: bool,
-        db_session: Session,
-        bypass_acl: bool = False,
-        retrieval_metrics_callback: Optional[
-            Callable[[RetrievalMetricsContainer], None]
-        ] = None,
-        max_results: int = FAST_SEARCH_MAX_HITS,
-    ):
-        self.search_request = search_request
-        self.user = user
-        self.llm = llm
-        self.fast_llm = fast_llm
-        self.skip_query_analysis = skip_query_analysis
-        self.db_session = db_session
-        self.bypass_acl = bypass_acl
-        self.retrieval_metrics_callback = retrieval_metrics_callback
-        self.max_results = max_results
-
-        self.search_settings = get_current_search_settings(db_session)
-        self.document_index = get_default_document_index(self.search_settings, None)
-
-        # Preprocessing steps generate this
-        self._search_query: Optional[SearchQuery] = None
-        self._predicted_search_type: Optional[SearchType] = None
-
-        # Initial document index retrieval chunks
-        self._retrieved_chunks: Optional[list[InferenceChunk]] = None
-
-        # Default flow type
-        self._predicted_flow: Optional[QueryFlow] = QueryFlow.QUESTION_ANSWER
-
-    def _run_preprocessing(self) -> None:
-        """Run a simplified version of preprocessing that only prepares the search query.
-
-        This skips complex query analysis and just focuses on preparing the basic search parameters.
-        """
-        # Create a simplified search query with the necessary parameters
-        self._search_query = SearchQuery(
-            query=self.search_request.query,
-            search_type=self.search_request.search_type,
-            filters=self.search_request.human_selected_filters
-            or IndexFilters(access_control_list=None),
-            hybrid_alpha=0.5,  # Default hybrid search balance
-            recency_bias_multiplier=self.search_request.recency_bias_multiplier or 1.0,
-            num_hits=self.max_results,  # Use the higher limit here
-            offset=self.search_request.offset or 0,
-            chunks_above=0,  # Skip section expansion
-            chunks_below=0,  # Skip section expansion
-            precomputed_query_embedding=self.search_request.precomputed_query_embedding,
-            precomputed_is_keyword=self.search_request.precomputed_is_keyword,
-            processed_keywords=self.search_request.precomputed_keywords,
-        )
-        self._predicted_search_type = self._search_query.search_type
-
-    @property
-    def search_query(self) -> SearchQuery:
-        """Get the search query, running preprocessing if necessary."""
-        if self._search_query is not None:
-            return self._search_query
-
-        self._run_preprocessing()
-        return cast(SearchQuery, self._search_query)
-
-    @property
-    def predicted_search_type(self) -> SearchType:
-        """Get the predicted search type."""
-        if self._predicted_search_type is not None:
-            return self._predicted_search_type
-
-        self._run_preprocessing()
-        return cast(SearchType, self._predicted_search_type)
-
-    @property
-    def predicted_flow(self) -> QueryFlow:
-        """Get the predicted query flow."""
-        if self._predicted_flow is not None:
-            return self._predicted_flow
-
-        self._run_preprocessing()
-        return cast(QueryFlow, self._predicted_flow)
-
-    @property
-    def retrieved_chunks(self) -> list[InferenceChunk]:
-        """Get the retrieved chunks from the document index."""
-        if self._retrieved_chunks is not None:
-            return self._retrieved_chunks
-
-        # Use the existing retrieve_chunks function with our search query
-        self._retrieved_chunks = retrieve_chunks(
-            query=self.search_query,
-            document_index=self.document_index,
-            db_session=self.db_session,
-            retrieval_metrics_callback=self.retrieval_metrics_callback,
-        )
-
-        return self._retrieved_chunks
-
-
-def run_fast_search(
-    search_request: SearchRequest,
-    user: User | None,
-    llm: LLM,
-    fast_llm: LLM,
-    db_session: Session,
-    max_results: int = FAST_SEARCH_MAX_HITS,
-) -> list[InferenceChunk]:
-    """Run a fast search that returns up to 300 results without section expansion or merging.
-
-    Args:
-        search_request: The search request containing the query and filters
-        user: The current user
-        llm: The main LLM instance
-        fast_llm: The faster LLM instance for some operations
-        db_session: The database session
-        max_results: Maximum number of results to return (default: 300)
-
-    Returns:
-        A list of InferenceChunk objects representing the search results
-    """
-    # Create a modified search request with optimized parameters
-    # Skip unnecessary processing by setting these properties
-    modified_request = search_request.model_copy(
-        update={
-            "chunks_above": 0,  # Skip section expansion
-            "chunks_below": 0,  # Skip section expansion
-            "evaluation_type": None,  # Skip LLM evaluation
-            "limit": max_results,  # Use higher limit
-        }
-    )
-
-    # Create and run the fast search pipeline
-    pipeline = FastSearchPipeline(
-        search_request=modified_request,
-        user=user,
-        llm=llm,
-        fast_llm=fast_llm,
-        skip_query_analysis=True,  # Skip complex query analysis
-        db_session=db_session,
-        max_results=max_results,
-    )
-
-    # Just get the retrieved chunks without further processing
-    return pipeline.retrieved_chunks
--- a/web/src/app/chat/ChatPage.tsx
+++ b/web/src/app/chat/ChatPage.tsx
@ -317,14 +317,10 @@ export function ChatPage({
          (assistant) => assistant.id === existingChatSessionAssistantId
        )
      : defaultAssistantId !== undefined
-      ? availableAssistants.find(
-          (assistant) => assistant.id === defaultAssistantId
-        )
-      : undefined
-  );
-  // Gather default temperature settings
-  const search_param_temperature = searchParams.get(
-    SEARCH_PARAM_NAMES.TEMPERATURE
+        ? availableAssistants.find(
+            (assistant) => assistant.id === defaultAssistantId
+          )
+        : undefined
  );

  const setSelectedAssistantFromId = (assistantId: number) => {
--- a/web/src/app/search/SearchPage.tsx
+++ b/web/src/app/search/SearchPage.tsx
--- a/web/src/app/search/components/SearchResults.tsx
+++ b/web/src/app/search/components/SearchResults.tsx
@ -43,12 +43,8 @@ export function SearchResults({

  return (
    <div className="flex flex-col w-full">
-      {documents.map((doc) => (
-        <SearchResultItem
-          key={doc.document_id}
-          document={doc}
-          onClick={onDocumentClick}
-        />
+      {documents.map((doc, ind) => (
+        <SearchResultItem key={ind} document={doc} onClick={onDocumentClick} />
      ))}
    </div>
  );
--- a/web/src/app/search/searchUtils.ts
+++ b/web/src/app/search/searchUtils.ts
@ -17,6 +17,25 @@ export interface SearchStreamResponse {
  error: string | null;
 }

+// Define interface matching FastSearchResult
+interface FastSearchResult {
+  document_id: string;
+  chunk_id: number;
+  content: string;
+  source_links: string[];
+  score?: number;
+  metadata?: {
+    source_type?: string;
+    semantic_identifier?: string;
+    boost?: number;
+    hidden?: boolean;
+    updated_at?: string;
+    primary_owners?: string[];
+    secondary_owners?: string[];
+    [key: string]: any;
+  };
+}
+
 export async function* streamSearchWithCitation({
  query,
  persona,
@ -34,24 +53,16 @@ export async function* streamSearchWithCitation({
 }): AsyncGenerator<SearchStreamResponse> {
  const filters = buildFilters(sources, documentSets, timeRange, tags);

-  const response = await fetch("/api/query/search", {
+  // Use the fast-search endpoint instead
+  const response = await fetch("/api/query/fast-search", {
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify({
-      persona_id: persona.id,
-      messages: [
-        {
-          role: "user",
-          message: query,
-        },
-      ],
-      retrieval_options: {
-        filters: filters,
-        favor_recent: true,
-      },
-      skip_gen_ai_answer_generation: false,
+      query: query,
+      filters: filters,
+      max_results: 300, // Use the default max results for fast search
    }),
  });

@ -65,43 +76,59 @@ export async function* streamSearchWithCitation({
    return;
  }

-  let currentAnswer = "";
-  let documents: OnyxDocument[] = [];
-  let error: string | null = null;
+  // Since fast-search is not streaming, we need to process the complete response
+  const searchResults = await response.json();

-  for await (const packet of handleSSEStream(response)) {
-    if ("error" in packet && packet.error) {
-      error = (packet as StreamingError).error;
-      yield {
-        answer: currentAnswer,
-        documents,
-        error,
-      };
-      continue;
-    }
+  // Convert results to OnyxDocument format
+  const documents: OnyxDocument[] = searchResults.results.map(
+    (result: FastSearchResult) => {
+      // Create a blurb from the content (first 200 chars)
+      const blurb =
+        result.content.substring(0, 200) +
+        (result.content.length > 200 ? "..." : "");

-    if ("answer_piece" in packet && packet.answer_piece) {
-      currentAnswer += (packet as AnswerPiecePacket).answer_piece;
-      yield {
-        answer: currentAnswer,
-        documents,
-        error,
+      // Get the source link if available
+      const link =
+        result.source_links && result.source_links.length > 0
+          ? result.source_links[0]
+          : null;
+
+      // Convert to OnyxDocument format
+      return {
+        document_id: result.document_id,
+        chunk_ind: result.chunk_id,
+        content: result.content,
+        source_type: result.metadata?.source_type || "unknown",
+        semantic_identifier: result.metadata?.semantic_identifier || "Unknown",
+        score: result.score || 0,
+        metadata: result.metadata || {},
+        match_highlights: [],
+        is_internet: false,
+        link: link,
+        updated_at: result.metadata?.updated_at
+          ? new Date(result.metadata.updated_at).toISOString()
+          : null,
+        blurb: blurb,
+        primary_owners: result.metadata?.primary_owners || [],
+        secondary_owners: result.metadata?.secondary_owners || [],
+        boost: result.metadata?.boost || 0,
+        hidden: result.metadata?.hidden || false,
+        validationState: null,
      };
    }
+  );

-    if ("top_documents" in packet && packet.top_documents) {
-      documents = (packet as DocumentInfoPacket).top_documents;
-      yield {
-        answer: currentAnswer,
-        documents,
-        error,
-      };
-    }
-  }
-
+  // First yield just the documents to maintain similar streaming behavior
  yield {
-    answer: currentAnswer,
+    answer: null,
    documents,
-    error,
+    error: null,
+  };
+
+  // Final yield with completed results
+  yield {
+    answer: null,
+    documents,
+    error: null,
  };
 }
--- a/web/src/components/SourceIcon.tsx
+++ b/web/src/components/SourceIcon.tsx
@ -10,7 +10,12 @@ export function SourceIcon({
  sourceType: ValidSources;
  iconSize: number;
 }) {
-  return getSourceMetadata(sourceType).icon({
-    size: iconSize,
-  });
+  try {
+    return getSourceMetadata(sourceType).icon({
+      size: iconSize,
+    });
+  } catch (error) {
+    console.error("Error getting source icon:", error);
+    return null;
+  }
 }