main.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. from fastapi import (
  2. FastAPI,
  3. Depends,
  4. HTTPException,
  5. status,
  6. UploadFile,
  7. File,
  8. Form,
  9. )
  10. from fastapi.middleware.cors import CORSMiddleware
  11. import os, shutil, logging, re
  12. from pathlib import Path
  13. from typing import List
  14. from chromadb.utils.batch_utils import create_batches
  15. from langchain_community.document_loaders import (
  16. WebBaseLoader,
  17. TextLoader,
  18. PyPDFLoader,
  19. CSVLoader,
  20. BSHTMLLoader,
  21. Docx2txtLoader,
  22. UnstructuredEPubLoader,
  23. UnstructuredWordDocumentLoader,
  24. UnstructuredMarkdownLoader,
  25. UnstructuredXMLLoader,
  26. UnstructuredRSTLoader,
  27. UnstructuredExcelLoader,
  28. )
  29. from langchain.text_splitter import RecursiveCharacterTextSplitter
  30. from pydantic import BaseModel
  31. from typing import Optional
  32. import mimetypes
  33. import uuid
  34. import json
  35. import sentence_transformers
  36. from apps.ollama.main import generate_ollama_embeddings, GenerateEmbeddingsForm
  37. from apps.web.models.documents import (
  38. Documents,
  39. DocumentForm,
  40. DocumentResponse,
  41. )
  42. from apps.rag.utils import (
  43. query_embeddings_doc,
  44. query_embeddings_collection,
  45. generate_openai_embeddings,
  46. )
  47. from utils.misc import (
  48. calculate_sha256,
  49. calculate_sha256_string,
  50. sanitize_filename,
  51. extract_folders_after_data_docs,
  52. )
  53. from utils.utils import get_current_user, get_admin_user
  54. from config import (
  55. SRC_LOG_LEVELS,
  56. UPLOAD_DIR,
  57. DOCS_DIR,
  58. RAG_EMBEDDING_ENGINE,
  59. RAG_EMBEDDING_MODEL,
  60. RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
  61. RAG_OPENAI_API_BASE_URL,
  62. RAG_OPENAI_API_KEY,
  63. DEVICE_TYPE,
  64. CHROMA_CLIENT,
  65. CHUNK_SIZE,
  66. CHUNK_OVERLAP,
  67. RAG_TEMPLATE,
  68. )
  69. from constants import ERROR_MESSAGES
  70. log = logging.getLogger(__name__)
  71. log.setLevel(SRC_LOG_LEVELS["RAG"])
  72. app = FastAPI()
  73. app.state.TOP_K = 4
  74. app.state.CHUNK_SIZE = CHUNK_SIZE
  75. app.state.CHUNK_OVERLAP = CHUNK_OVERLAP
  76. app.state.RAG_EMBEDDING_ENGINE = RAG_EMBEDDING_ENGINE
  77. app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL
  78. app.state.RAG_TEMPLATE = RAG_TEMPLATE
  79. app.state.OPENAI_API_BASE_URL = RAG_OPENAI_API_BASE_URL
  80. app.state.OPENAI_API_KEY = RAG_OPENAI_API_KEY
  81. app.state.PDF_EXTRACT_IMAGES = False
  82. if app.state.RAG_EMBEDDING_ENGINE == "":
  83. app.state.sentence_transformer_ef = sentence_transformers.SentenceTransformer(
  84. app.state.RAG_EMBEDDING_MODEL,
  85. device=DEVICE_TYPE,
  86. trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
  87. )
  88. origins = ["*"]
  89. app.add_middleware(
  90. CORSMiddleware,
  91. allow_origins=origins,
  92. allow_credentials=True,
  93. allow_methods=["*"],
  94. allow_headers=["*"],
  95. )
  96. class CollectionNameForm(BaseModel):
  97. collection_name: Optional[str] = "test"
  98. class StoreWebForm(CollectionNameForm):
  99. url: str
  100. @app.get("/")
  101. async def get_status():
  102. return {
  103. "status": True,
  104. "chunk_size": app.state.CHUNK_SIZE,
  105. "chunk_overlap": app.state.CHUNK_OVERLAP,
  106. "template": app.state.RAG_TEMPLATE,
  107. "embedding_engine": app.state.RAG_EMBEDDING_ENGINE,
  108. "embedding_model": app.state.RAG_EMBEDDING_MODEL,
  109. }
  110. @app.get("/embedding")
  111. async def get_embedding_config(user=Depends(get_admin_user)):
  112. return {
  113. "status": True,
  114. "embedding_engine": app.state.RAG_EMBEDDING_ENGINE,
  115. "embedding_model": app.state.RAG_EMBEDDING_MODEL,
  116. "openai_config": {
  117. "url": app.state.OPENAI_API_BASE_URL,
  118. "key": app.state.OPENAI_API_KEY,
  119. },
  120. }
  121. class OpenAIConfigForm(BaseModel):
  122. url: str
  123. key: str
  124. class EmbeddingModelUpdateForm(BaseModel):
  125. openai_config: Optional[OpenAIConfigForm] = None
  126. embedding_engine: str
  127. embedding_model: str
  128. @app.post("/embedding/update")
  129. async def update_embedding_config(
  130. form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user)
  131. ):
  132. log.info(
  133. f"Updating embedding model: {app.state.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}"
  134. )
  135. try:
  136. app.state.RAG_EMBEDDING_ENGINE = form_data.embedding_engine
  137. if app.state.RAG_EMBEDDING_ENGINE in ["ollama", "openai"]:
  138. app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model
  139. app.state.sentence_transformer_ef = None
  140. if form_data.openai_config != None:
  141. app.state.OPENAI_API_BASE_URL = form_data.openai_config.url
  142. app.state.OPENAI_API_KEY = form_data.openai_config.key
  143. else:
  144. sentence_transformer_ef = sentence_transformers.SentenceTransformer(
  145. app.state.RAG_EMBEDDING_MODEL,
  146. device=DEVICE_TYPE,
  147. trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
  148. )
  149. app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model
  150. app.state.sentence_transformer_ef = sentence_transformer_ef
  151. return {
  152. "status": True,
  153. "embedding_engine": app.state.RAG_EMBEDDING_ENGINE,
  154. "embedding_model": app.state.RAG_EMBEDDING_MODEL,
  155. "openai_config": {
  156. "url": app.state.OPENAI_API_BASE_URL,
  157. "key": app.state.OPENAI_API_KEY,
  158. },
  159. }
  160. except Exception as e:
  161. log.exception(f"Problem updating embedding model: {e}")
  162. raise HTTPException(
  163. status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
  164. detail=ERROR_MESSAGES.DEFAULT(e),
  165. )
  166. @app.get("/config")
  167. async def get_rag_config(user=Depends(get_admin_user)):
  168. return {
  169. "status": True,
  170. "pdf_extract_images": app.state.PDF_EXTRACT_IMAGES,
  171. "chunk": {
  172. "chunk_size": app.state.CHUNK_SIZE,
  173. "chunk_overlap": app.state.CHUNK_OVERLAP,
  174. },
  175. }
  176. class ChunkParamUpdateForm(BaseModel):
  177. chunk_size: int
  178. chunk_overlap: int
  179. class ConfigUpdateForm(BaseModel):
  180. pdf_extract_images: bool
  181. chunk: ChunkParamUpdateForm
  182. @app.post("/config/update")
  183. async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_user)):
  184. app.state.PDF_EXTRACT_IMAGES = form_data.pdf_extract_images
  185. app.state.CHUNK_SIZE = form_data.chunk.chunk_size
  186. app.state.CHUNK_OVERLAP = form_data.chunk.chunk_overlap
  187. return {
  188. "status": True,
  189. "pdf_extract_images": app.state.PDF_EXTRACT_IMAGES,
  190. "chunk": {
  191. "chunk_size": app.state.CHUNK_SIZE,
  192. "chunk_overlap": app.state.CHUNK_OVERLAP,
  193. },
  194. }
  195. @app.get("/template")
  196. async def get_rag_template(user=Depends(get_current_user)):
  197. return {
  198. "status": True,
  199. "template": app.state.RAG_TEMPLATE,
  200. }
  201. @app.get("/query/settings")
  202. async def get_query_settings(user=Depends(get_admin_user)):
  203. return {
  204. "status": True,
  205. "template": app.state.RAG_TEMPLATE,
  206. "k": app.state.TOP_K,
  207. }
  208. class QuerySettingsForm(BaseModel):
  209. k: Optional[int] = None
  210. template: Optional[str] = None
  211. @app.post("/query/settings/update")
  212. async def update_query_settings(
  213. form_data: QuerySettingsForm, user=Depends(get_admin_user)
  214. ):
  215. app.state.RAG_TEMPLATE = form_data.template if form_data.template else RAG_TEMPLATE
  216. app.state.TOP_K = form_data.k if form_data.k else 4
  217. return {"status": True, "template": app.state.RAG_TEMPLATE}
  218. class QueryDocForm(BaseModel):
  219. collection_name: str
  220. query: str
  221. k: Optional[int] = None
  222. @app.post("/query/doc")
  223. def query_doc_handler(
  224. form_data: QueryDocForm,
  225. user=Depends(get_current_user),
  226. ):
  227. try:
  228. if app.state.RAG_EMBEDDING_ENGINE == "":
  229. query_embeddings = app.state.sentence_transformer_ef.encode(
  230. form_data.query
  231. ).tolist()
  232. elif app.state.RAG_EMBEDDING_ENGINE == "ollama":
  233. query_embeddings = generate_ollama_embeddings(
  234. GenerateEmbeddingsForm(
  235. **{
  236. "model": app.state.RAG_EMBEDDING_MODEL,
  237. "prompt": form_data.query,
  238. }
  239. )
  240. )
  241. elif app.state.RAG_EMBEDDING_ENGINE == "openai":
  242. query_embeddings = generate_openai_embeddings(
  243. model=app.state.RAG_EMBEDDING_MODEL,
  244. text=form_data.query,
  245. key=app.state.OPENAI_API_KEY,
  246. url=app.state.OPENAI_API_BASE_URL,
  247. )
  248. return query_embeddings_doc(
  249. collection_name=form_data.collection_name,
  250. query=form_data.query,
  251. query_embeddings=query_embeddings,
  252. k=form_data.k if form_data.k else app.state.TOP_K,
  253. )
  254. except Exception as e:
  255. log.exception(e)
  256. raise HTTPException(
  257. status_code=status.HTTP_400_BAD_REQUEST,
  258. detail=ERROR_MESSAGES.DEFAULT(e),
  259. )
  260. class QueryCollectionsForm(BaseModel):
  261. collection_names: List[str]
  262. query: str
  263. k: Optional[int] = None
  264. @app.post("/query/collection")
  265. def query_collection_handler(
  266. form_data: QueryCollectionsForm,
  267. user=Depends(get_current_user),
  268. ):
  269. try:
  270. if app.state.RAG_EMBEDDING_ENGINE == "":
  271. query_embeddings = app.state.sentence_transformer_ef.encode(
  272. form_data.query
  273. ).tolist()
  274. elif app.state.RAG_EMBEDDING_ENGINE == "ollama":
  275. query_embeddings = generate_ollama_embeddings(
  276. GenerateEmbeddingsForm(
  277. **{
  278. "model": app.state.RAG_EMBEDDING_MODEL,
  279. "prompt": form_data.query,
  280. }
  281. )
  282. )
  283. elif app.state.RAG_EMBEDDING_ENGINE == "openai":
  284. query_embeddings = generate_openai_embeddings(
  285. model=app.state.RAG_EMBEDDING_MODEL,
  286. text=form_data.query,
  287. key=app.state.OPENAI_API_KEY,
  288. url=app.state.OPENAI_API_BASE_URL,
  289. )
  290. return query_embeddings_collection(
  291. collection_names=form_data.collection_names,
  292. query_embeddings=query_embeddings,
  293. k=form_data.k if form_data.k else app.state.TOP_K,
  294. )
  295. except Exception as e:
  296. log.exception(e)
  297. raise HTTPException(
  298. status_code=status.HTTP_400_BAD_REQUEST,
  299. detail=ERROR_MESSAGES.DEFAULT(e),
  300. )
  301. @app.post("/web")
  302. def store_web(form_data: StoreWebForm, user=Depends(get_current_user)):
  303. # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
  304. try:
  305. loader = WebBaseLoader(form_data.url)
  306. data = loader.load()
  307. collection_name = form_data.collection_name
  308. if collection_name == "":
  309. collection_name = calculate_sha256_string(form_data.url)[:63]
  310. store_data_in_vector_db(data, collection_name, overwrite=True)
  311. return {
  312. "status": True,
  313. "collection_name": collection_name,
  314. "filename": form_data.url,
  315. }
  316. except Exception as e:
  317. log.exception(e)
  318. raise HTTPException(
  319. status_code=status.HTTP_400_BAD_REQUEST,
  320. detail=ERROR_MESSAGES.DEFAULT(e),
  321. )
  322. def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> bool:
  323. text_splitter = RecursiveCharacterTextSplitter(
  324. chunk_size=app.state.CHUNK_SIZE,
  325. chunk_overlap=app.state.CHUNK_OVERLAP,
  326. add_start_index=True,
  327. )
  328. docs = text_splitter.split_documents(data)
  329. if len(docs) > 0:
  330. log.info(f"store_data_in_vector_db {docs}")
  331. return store_docs_in_vector_db(docs, collection_name, overwrite), None
  332. else:
  333. raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT)
  334. def store_text_in_vector_db(
  335. text, metadata, collection_name, overwrite: bool = False
  336. ) -> bool:
  337. text_splitter = RecursiveCharacterTextSplitter(
  338. chunk_size=app.state.CHUNK_SIZE,
  339. chunk_overlap=app.state.CHUNK_OVERLAP,
  340. add_start_index=True,
  341. )
  342. docs = text_splitter.create_documents([text], metadatas=[metadata])
  343. return store_docs_in_vector_db(docs, collection_name, overwrite)
  344. def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> bool:
  345. log.info(f"store_docs_in_vector_db {docs} {collection_name}")
  346. texts = [doc.page_content for doc in docs]
  347. texts = list(map(lambda x: x.replace("\n", " "), texts))
  348. metadatas = [doc.metadata for doc in docs]
  349. try:
  350. if overwrite:
  351. for collection in CHROMA_CLIENT.list_collections():
  352. if collection_name == collection.name:
  353. log.info(f"deleting existing collection {collection_name}")
  354. CHROMA_CLIENT.delete_collection(name=collection_name)
  355. collection = CHROMA_CLIENT.create_collection(name=collection_name)
  356. if app.state.RAG_EMBEDDING_ENGINE == "":
  357. embeddings = app.state.sentence_transformer_ef.encode(texts).tolist()
  358. elif app.state.RAG_EMBEDDING_ENGINE == "ollama":
  359. embeddings = [
  360. generate_ollama_embeddings(
  361. GenerateEmbeddingsForm(
  362. **{"model": app.state.RAG_EMBEDDING_MODEL, "prompt": text}
  363. )
  364. )
  365. for text in texts
  366. ]
  367. elif app.state.RAG_EMBEDDING_ENGINE == "openai":
  368. embeddings = [
  369. generate_openai_embeddings(
  370. model=app.state.RAG_EMBEDDING_MODEL,
  371. text=text,
  372. key=app.state.OPENAI_API_KEY,
  373. url=app.state.OPENAI_API_BASE_URL,
  374. )
  375. for text in texts
  376. ]
  377. for batch in create_batches(
  378. api=CHROMA_CLIENT,
  379. ids=[str(uuid.uuid1()) for _ in texts],
  380. metadatas=metadatas,
  381. embeddings=embeddings,
  382. documents=texts,
  383. ):
  384. collection.add(*batch)
  385. return True
  386. except Exception as e:
  387. log.exception(e)
  388. if e.__class__.__name__ == "UniqueConstraintError":
  389. return True
  390. return False
  391. def get_loader(filename: str, file_content_type: str, file_path: str):
  392. file_ext = filename.split(".")[-1].lower()
  393. known_type = True
  394. known_source_ext = [
  395. "go",
  396. "py",
  397. "java",
  398. "sh",
  399. "bat",
  400. "ps1",
  401. "cmd",
  402. "js",
  403. "ts",
  404. "css",
  405. "cpp",
  406. "hpp",
  407. "h",
  408. "c",
  409. "cs",
  410. "sql",
  411. "log",
  412. "ini",
  413. "pl",
  414. "pm",
  415. "r",
  416. "dart",
  417. "dockerfile",
  418. "env",
  419. "php",
  420. "hs",
  421. "hsc",
  422. "lua",
  423. "nginxconf",
  424. "conf",
  425. "m",
  426. "mm",
  427. "plsql",
  428. "perl",
  429. "rb",
  430. "rs",
  431. "db2",
  432. "scala",
  433. "bash",
  434. "swift",
  435. "vue",
  436. "svelte",
  437. ]
  438. if file_ext == "pdf":
  439. loader = PyPDFLoader(file_path, extract_images=app.state.PDF_EXTRACT_IMAGES)
  440. elif file_ext == "csv":
  441. loader = CSVLoader(file_path)
  442. elif file_ext == "rst":
  443. loader = UnstructuredRSTLoader(file_path, mode="elements")
  444. elif file_ext == "xml":
  445. loader = UnstructuredXMLLoader(file_path)
  446. elif file_ext in ["htm", "html"]:
  447. loader = BSHTMLLoader(file_path, open_encoding="unicode_escape")
  448. elif file_ext == "md":
  449. loader = UnstructuredMarkdownLoader(file_path)
  450. elif file_content_type == "application/epub+zip":
  451. loader = UnstructuredEPubLoader(file_path)
  452. elif (
  453. file_content_type
  454. == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
  455. or file_ext in ["doc", "docx"]
  456. ):
  457. loader = Docx2txtLoader(file_path)
  458. elif file_content_type in [
  459. "application/vnd.ms-excel",
  460. "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
  461. ] or file_ext in ["xls", "xlsx"]:
  462. loader = UnstructuredExcelLoader(file_path)
  463. elif file_ext in known_source_ext or (
  464. file_content_type and file_content_type.find("text/") >= 0
  465. ):
  466. loader = TextLoader(file_path, autodetect_encoding=True)
  467. else:
  468. loader = TextLoader(file_path, autodetect_encoding=True)
  469. known_type = False
  470. return loader, known_type
  471. @app.post("/doc")
  472. def store_doc(
  473. collection_name: Optional[str] = Form(None),
  474. file: UploadFile = File(...),
  475. user=Depends(get_current_user),
  476. ):
  477. # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
  478. log.info(f"file.content_type: {file.content_type}")
  479. try:
  480. unsanitized_filename = file.filename
  481. filename = os.path.basename(unsanitized_filename)
  482. file_path = f"{UPLOAD_DIR}/{filename}"
  483. contents = file.file.read()
  484. with open(file_path, "wb") as f:
  485. f.write(contents)
  486. f.close()
  487. f = open(file_path, "rb")
  488. if collection_name == None:
  489. collection_name = calculate_sha256(f)[:63]
  490. f.close()
  491. loader, known_type = get_loader(filename, file.content_type, file_path)
  492. data = loader.load()
  493. try:
  494. result = store_data_in_vector_db(data, collection_name)
  495. if result:
  496. return {
  497. "status": True,
  498. "collection_name": collection_name,
  499. "filename": filename,
  500. "known_type": known_type,
  501. }
  502. except Exception as e:
  503. raise HTTPException(
  504. status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
  505. detail=e,
  506. )
  507. except Exception as e:
  508. log.exception(e)
  509. if "No pandoc was found" in str(e):
  510. raise HTTPException(
  511. status_code=status.HTTP_400_BAD_REQUEST,
  512. detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED,
  513. )
  514. else:
  515. raise HTTPException(
  516. status_code=status.HTTP_400_BAD_REQUEST,
  517. detail=ERROR_MESSAGES.DEFAULT(e),
  518. )
  519. class TextRAGForm(BaseModel):
  520. name: str
  521. content: str
  522. collection_name: Optional[str] = None
  523. @app.post("/text")
  524. def store_text(
  525. form_data: TextRAGForm,
  526. user=Depends(get_current_user),
  527. ):
  528. collection_name = form_data.collection_name
  529. if collection_name == None:
  530. collection_name = calculate_sha256_string(form_data.content)
  531. result = store_text_in_vector_db(
  532. form_data.content,
  533. metadata={"name": form_data.name, "created_by": user.id},
  534. collection_name=collection_name,
  535. )
  536. if result:
  537. return {"status": True, "collection_name": collection_name}
  538. else:
  539. raise HTTPException(
  540. status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
  541. detail=ERROR_MESSAGES.DEFAULT(),
  542. )
  543. @app.get("/scan")
  544. def scan_docs_dir(user=Depends(get_admin_user)):
  545. for path in Path(DOCS_DIR).rglob("./**/*"):
  546. try:
  547. if path.is_file() and not path.name.startswith("."):
  548. tags = extract_folders_after_data_docs(path)
  549. filename = path.name
  550. file_content_type = mimetypes.guess_type(path)
  551. f = open(path, "rb")
  552. collection_name = calculate_sha256(f)[:63]
  553. f.close()
  554. loader, known_type = get_loader(
  555. filename, file_content_type[0], str(path)
  556. )
  557. data = loader.load()
  558. try:
  559. result = store_data_in_vector_db(data, collection_name)
  560. if result:
  561. sanitized_filename = sanitize_filename(filename)
  562. doc = Documents.get_doc_by_name(sanitized_filename)
  563. if doc == None:
  564. doc = Documents.insert_new_doc(
  565. user.id,
  566. DocumentForm(
  567. **{
  568. "name": sanitized_filename,
  569. "title": filename,
  570. "collection_name": collection_name,
  571. "filename": filename,
  572. "content": (
  573. json.dumps(
  574. {
  575. "tags": list(
  576. map(
  577. lambda name: {"name": name},
  578. tags,
  579. )
  580. )
  581. }
  582. )
  583. if len(tags)
  584. else "{}"
  585. ),
  586. }
  587. ),
  588. )
  589. except Exception as e:
  590. log.exception(e)
  591. pass
  592. except Exception as e:
  593. log.exception(e)
  594. return True
  595. @app.get("/reset/db")
  596. def reset_vector_db(user=Depends(get_admin_user)):
  597. CHROMA_CLIENT.reset()
  598. @app.get("/reset")
  599. def reset(user=Depends(get_admin_user)) -> bool:
  600. folder = f"{UPLOAD_DIR}"
  601. for filename in os.listdir(folder):
  602. file_path = os.path.join(folder, filename)
  603. try:
  604. if os.path.isfile(file_path) or os.path.islink(file_path):
  605. os.unlink(file_path)
  606. elif os.path.isdir(file_path):
  607. shutil.rmtree(file_path)
  608. except Exception as e:
  609. log.error("Failed to delete %s. Reason: %s" % (file_path, e))
  610. try:
  611. CHROMA_CLIENT.reset()
  612. except Exception as e:
  613. log.exception(e)
  614. return True