Преглед на файлове

Avoid logging file contents at level INFO

I had problems with document handling in rootless containers. Long documents caused the container to hang. Reducing the verbosity of logging from retrieval.main seemed to fix the issues I was experiencing.
execgit преди 6 месеца
родител
ревизия
bc7622c0fe
променени са 1 файла, в които са добавени 20 реда и са изтрити 1 реда
  1. 20 1
      backend/open_webui/apps/retrieval/main.py

+ 20 - 1
backend/open_webui/apps/retrieval/main.py

@@ -636,6 +636,25 @@ async def update_query_settings(
 ####################################
 
 
+def _get_docs_info(
+    docs: list[Document]
+) -> str:
+    docs_info = set()
+
+    # Trying to select relevant metadata identifying the document.
+    for doc in docs:
+        metadata = getattr(doc, 'metadata', {})
+        doc_name = metadata.get('name', '')
+        if not doc_name:
+            doc_name = metadata.get('title', '')
+        if not doc_name:
+            doc_name = metadata.get('source', '')
+        if doc_name:
+            docs_info.add(doc_name)
+
+    return ', '.join(docs_info)
+
+
 def save_docs_to_vector_db(
     docs,
     collection_name,
@@ -644,7 +663,7 @@ def save_docs_to_vector_db(
     split: bool = True,
     add: bool = False,
 ) -> bool:
-    log.info(f"save_docs_to_vector_db {docs} {collection_name}")
+    log.info(f"save_docs_to_vector_db: document {_get_docs_info(docs)} {collection_name}")
 
     # Check if entries with the same hash (metadata.hash) already exist
     if metadata and "hash" in metadata: