Split chunks up by newline

2025-05-21 17:20:07 +02:00 · 2023-05-13 16:09:56 -07:00 · 2023-05-13 16:09:56 -07:00 · c68220103d
commit c68220103d
parent b825b39763
1 changed files with 72 additions and 14 deletions
--- a/web/src/components/search/SearchSection.tsx
+++ b/web/src/components/search/SearchSection.tsx
@ -5,6 +5,50 @@ import { SearchBar } from "./SearchBar";
 import { SearchResultsDisplay } from "./SearchResultsDisplay";
 import { Quote, Document } from "./types";

+const processSingleChunk = (
+  chunk: string,
+  currPartialChunk: string | null
+): [{ [key: string]: any } | null, string | null] => {
+  const completeChunk = chunk + (currPartialChunk || "");
+  try {
+    // every complete chunk should be valid JSON
+    const chunkJson = JSON.parse(chunk);
+    return [chunkJson, null];
+  } catch (err) {
+    // if it's not valid JSON, then it's probably an incomplete chunk
+    return [null, completeChunk];
+  }
+};
+
+const processRawChunkString = (
+  rawChunkString: string,
+  previousPartialChunk: string | null
+): [any[], string | null] => {
+  /* This is required because, in practice, we see that nginx does not send over
+  each chunk one at a time even with buffering turned off. Instead,
+  chunks are sometimes in batches or are sometimes incomplete */
+  if (!rawChunkString) {
+    return [[], null];
+  }
+  const chunkSections = rawChunkString
+    .split("\n")
+    .filter((chunk) => chunk.length > 0);
+  let parsedChunkSections: any[] = [];
+  let currPartialChunk = previousPartialChunk;
+  chunkSections.forEach((chunk) => {
+    const [processedChunk, partialChunk] = processSingleChunk(
+      chunk,
+      currPartialChunk
+    );
+    if (processedChunk) {
+      parsedChunkSections.push(processedChunk);
+    } else {
+      currPartialChunk = partialChunk;
+    }
+  });
+  return [parsedChunkSections, currPartialChunk];
+};
+
 const searchRequestStreamed = async (
  query: string,
  updateCurrentAnswer: (val: string) => void,
@ -24,6 +68,7 @@ const searchRequestStreamed = async (
    const reader = response.body?.getReader();
    const decoder = new TextDecoder("utf-8");

+    let previousPartialChunk = null;
    while (true) {
      const rawChunk = await reader?.read();
      if (!rawChunk) {
@ -35,23 +80,31 @@ const searchRequestStreamed = async (
      }

      // Process each chunk as it arrives
-      const chunk = decoder.decode(value, { stream: true });
-      if (!chunk) {
+      const [completedChunks, partialChunk] = processRawChunkString(
+        decoder.decode(value, { stream: true }),
+        previousPartialChunk
+      );
+      if (!completedChunks.length && !partialChunk) {
        break;
      }
-      const chunkJson = JSON.parse(chunk);
-      const answerChunk = chunkJson.answer_data;
+      if (partialChunk) {
+        previousPartialChunk = partialChunk;
+      }
+      completedChunks.forEach((chunk) => {
+        // TODO: clean up response / this logic
+        const answerChunk = chunk.answer_data;
        if (answerChunk) {
          answer += answerChunk;
          updateCurrentAnswer(answer);
        } else {
-        const docs = chunkJson.top_documents as any[];
+          const docs = chunk.top_documents as any[];
          if (docs) {
-          updateDocs(docs.map((doc) => JSON.parse(doc)));
+            updateDocs(docs.map((doc) => JSON.parse(doc) as Document));
          } else {
-          updateQuotes(chunkJson);
+            updateQuotes(chunk as Record<string, Quote>);
          }
        }
+      });
    }
  } catch (err) {
    console.error("Fetch error:", err);
@ -76,6 +129,11 @@ export const SearchSection: React.FC<{}> = () => {
          searchRequestStreamed(query, setAnswer, setQuotes, setDocuments).then(
            () => {
              setIsFetching(false);
+              // if no quotes were given, set to empty object so that the SearchResultsDisplay
+              // component knows that the search was successful but no quotes were found
+              if (!quotes) {
+                setQuotes({});
+              }
            }
          );
        }}