From 05970157f634193b073c8e141bdf19f5fc854a8b Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Thu, 3 Oct 2024 23:06:47 -0700
Subject: [PATCH] refac

---
 backend/open_webui/apps/retrieval/main.py         | 11 +++++++----
 backend/open_webui/apps/retrieval/utils.py        | 15 ++++++++++-----
 .../apps/retrieval/vector/dbs/chroma.py           | 11 +++++++----
 .../open_webui/apps/webui/routers/knowledge.py    |  9 ++++++++-
 backend/open_webui/constants.py                   |  4 +++-
 .../workspace/Knowledge/Collection.svelte         |  5 +++--
 6 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py
index c9ba33211..cd27b5530 100644
--- a/backend/open_webui/apps/retrieval/main.py
+++ b/backend/open_webui/apps/retrieval/main.py
@@ -643,13 +643,16 @@ def save_docs_to_vector_db(
 
     # Check if entries with the same hash (metadata.hash) already exist
     if metadata and "hash" in metadata:
-        existing_docs = VECTOR_DB_CLIENT.query(
+        result = VECTOR_DB_CLIENT.query(
             collection_name=collection_name,
             filter={"hash": metadata["hash"]},
         )
-        if existing_docs:
-            log.info(f"Document with hash {metadata['hash']} already exists")
-            raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
+
+        if result:
+            existing_doc_ids = result.ids[0]
+            if existing_doc_ids:
+                log.info(f"Document with hash {metadata['hash']} already exists")
+                raise ValueError(ERROR_MESSAGES.DUPLICATE_CONTENT)
 
     if split:
         text_splitter = RecursiveCharacterTextSplitter(
diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py
index c671b03b4..f8c9ded5b 100644
--- a/backend/open_webui/apps/retrieval/utils.py
+++ b/backend/open_webui/apps/retrieval/utils.py
@@ -325,11 +325,16 @@ def get_rag_context(
         else:
             context = None
 
-            collection_names = (
-                file["collection_names"]
-                if file["type"] == "collection"
-                else [file["collection_name"]] if file["collection_name"] else []
-            )
+            collection_names = []
+            if file.get("type") == "collection":
+                if file.get("legacy"):
+                    collection_names = file.get("collection_names", [])
+                else:
+                    collection_names.append(file["id"])
+            elif file.get("collection_name"):
+                collection_names.append(file["collection_name"])
+            elif file.get("id"):
+                collection_names.append(f"file-{file['id']}")
 
             collection_names = set(collection_names).difference(extracted_collections)
             if not collection_names:
diff --git a/backend/open_webui/apps/retrieval/vector/dbs/chroma.py b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py
index 4a85c1251..00b4af441 100644
--- a/backend/open_webui/apps/retrieval/vector/dbs/chroma.py
+++ b/backend/open_webui/apps/retrieval/vector/dbs/chroma.py
@@ -70,7 +70,7 @@ class ChromaClient:
             return None
 
     def query(
-        self, collection_name: str, filter: dict, limit: int = 1
+        self, collection_name: str, filter: dict, limit: int = 2
     ) -> Optional[GetResult]:
         # Query the items from the collection based on the filter.
 
@@ -82,15 +82,18 @@ class ChromaClient:
                     limit=limit,
                 )
 
+                print(result)
+
                 return GetResult(
                     **{
-                        "ids": result["ids"],
-                        "documents": result["documents"],
-                        "metadatas": result["metadatas"],
+                        "ids": [result["ids"]],
+                        "documents": [result["documents"]],
+                        "metadatas": [result["metadatas"]],
                     }
                 )
             return None
         except Exception as e:
+            print(e)
             return None
 
     def get(self, collection_name: str) -> Optional[GetResult]:
diff --git a/backend/open_webui/apps/webui/routers/knowledge.py b/backend/open_webui/apps/webui/routers/knowledge.py
index c55d84d22..1fb23b40b 100644
--- a/backend/open_webui/apps/webui/routers/knowledge.py
+++ b/backend/open_webui/apps/webui/routers/knowledge.py
@@ -152,7 +152,13 @@ def add_file_to_knowledge_by_id(
         )
 
     # Add content to the vector database
-    process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id))
+    try:
+        process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id))
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=str(e),
+        )
 
     if knowledge:
         data = knowledge.data or {}
@@ -263,5 +269,6 @@ def remove_file_from_knowledge_by_id(
 
 @router.delete("/{id}/delete", response_model=bool)
 async def delete_knowledge_by_id(id: str, user=Depends(get_admin_user)):
+    VECTOR_DB_CLIENT.delete_collection(collection_name=id)
     result = Knowledges.delete_knowledge_by_id(id=id)
     return result
diff --git a/backend/open_webui/constants.py b/backend/open_webui/constants.py
index 0326ae96e..37461402b 100644
--- a/backend/open_webui/constants.py
+++ b/backend/open_webui/constants.py
@@ -94,7 +94,9 @@ class ERROR_MESSAGES(str, Enum):
         lambda size="": f"Oops! The file you're trying to upload is too large. Please upload a file that is less than {size}."
     )
 
-    DUPLICATE_CONTENT = "The content provided is a duplicate. Please ensure that the content is unique before proceeding."
+    DUPLICATE_CONTENT = (
+        "Duplicate content detected. Please provide unique content to proceed."
+    )
     FILE_NOT_PROCESSED = "Extracted content is not available for this file. Please ensure that the file is processed before proceeding."
 
 
diff --git a/src/lib/components/workspace/Knowledge/Collection.svelte b/src/lib/components/workspace/Knowledge/Collection.svelte
index 46bfafc36..e343c0099 100644
--- a/src/lib/components/workspace/Knowledge/Collection.svelte
+++ b/src/lib/components/workspace/Knowledge/Collection.svelte
@@ -94,7 +94,7 @@
 	const addFileHandler = async (fileId) => {
 		const updatedKnowledge = await addFileToKnowledgeById(localStorage.token, id, fileId).catch(
 			(e) => {
-				console.error(e);
+				toast.error(e);
 			}
 		);
 
@@ -110,7 +110,7 @@
 			id,
 			fileId
 		).catch((e) => {
-			console.error(e);
+			toast.error(e);
 		});
 
 		if (updatedKnowledge) {
@@ -341,6 +341,7 @@
 										on:delete={(e) => {
 											console.log(e.detail);
 
+											selectedFileId = null;
 											deleteFileHandler(e.detail);
 										}}
 									/>