瀏覽代碼

recreate rag collection is now optional and only used for web requests

Firat Birlik 1 年之前
父節點
當前提交
6782e95c75
共有 1 個文件被更改,包括 7 次插入5 次删除
  1. 7 5
      backend/apps/rag/main.py

+ 7 - 5
backend/apps/rag/main.py

@@ -108,7 +108,7 @@ class StoreWebForm(CollectionNameForm):
     url: str
 
 
-def store_data_in_vector_db(data, collection_name) -> bool:
+def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> bool:
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=app.state.CHUNK_SIZE, chunk_overlap=app.state.CHUNK_OVERLAP
     )
@@ -118,9 +118,11 @@ def store_data_in_vector_db(data, collection_name) -> bool:
     metadatas = [doc.metadata for doc in docs]
 
     try:
-        for collection in CHROMA_CLIENT.list_collections():
-            if collection_name == collection.name:
-                CHROMA_CLIENT.delete_collection(name=collection_name)
+        if overwrite:
+            for collection in CHROMA_CLIENT.list_collections():
+                if collection_name == collection.name:
+                    print(f"deleting existing collection {collection_name}")
+                    CHROMA_CLIENT.delete_collection(name=collection_name)
 
         collection = CHROMA_CLIENT.create_collection(
             name=collection_name,
@@ -359,7 +361,7 @@ def store_web(form_data: StoreWebForm, user=Depends(get_current_user)):
         if collection_name == "":
             collection_name = calculate_sha256_string(form_data.url)[:63]
 
-        store_data_in_vector_db(data, collection_name)
+        store_data_in_vector_db(data, collection_name, overwrite=True)
         return {
             "status": True,
             "collection_name": collection_name,