knowledge.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. import json
  2. from typing import Optional, Union
  3. from pydantic import BaseModel
  4. from fastapi import APIRouter, Depends, HTTPException, status
  5. import logging
  6. from open_webui.apps.webui.models.knowledge import (
  7. Knowledges,
  8. KnowledgeUpdateForm,
  9. KnowledgeForm,
  10. KnowledgeResponse,
  11. )
  12. from open_webui.apps.webui.models.files import Files, FileModel
  13. from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT
  14. from open_webui.apps.retrieval.main import process_file, ProcessFileForm
  15. from open_webui.constants import ERROR_MESSAGES
  16. from open_webui.utils.utils import get_admin_user, get_verified_user
  17. from open_webui.env import SRC_LOG_LEVELS
  18. log = logging.getLogger(__name__)
  19. log.setLevel(SRC_LOG_LEVELS["MODELS"])
  20. router = APIRouter()
  21. ############################
  22. # GetKnowledgeItems
  23. ############################
  24. @router.get(
  25. "/", response_model=Optional[Union[list[KnowledgeResponse], KnowledgeResponse]]
  26. )
  27. async def get_knowledge_items(
  28. id: Optional[str] = None, user=Depends(get_verified_user)
  29. ):
  30. if id:
  31. knowledge = Knowledges.get_knowledge_by_id(id=id)
  32. if knowledge:
  33. return knowledge
  34. else:
  35. raise HTTPException(
  36. status_code=status.HTTP_401_UNAUTHORIZED,
  37. detail=ERROR_MESSAGES.NOT_FOUND,
  38. )
  39. else:
  40. knowledge_bases = []
  41. for knowledge in Knowledges.get_knowledge_items():
  42. files = Files.get_file_metadatas_by_ids(
  43. knowledge.data.get("file_ids", []) if knowledge.data else []
  44. )
  45. # Check if all files exist
  46. if len(files) != len(knowledge.data.get("file_ids", [])):
  47. missing_files = list(
  48. set(knowledge.data.get("file_ids", []))
  49. - set([file.id for file in files])
  50. )
  51. if missing_files:
  52. data = knowledge.data or {}
  53. file_ids = data.get("file_ids", [])
  54. for missing_file in missing_files:
  55. file_ids.remove(missing_file)
  56. data["file_ids"] = file_ids
  57. Knowledges.update_knowledge_by_id(
  58. id=knowledge.id, form_data=KnowledgeUpdateForm(data=data)
  59. )
  60. files = Files.get_file_metadatas_by_ids(file_ids)
  61. knowledge_bases.append(
  62. KnowledgeResponse(
  63. **knowledge.model_dump(),
  64. files=files,
  65. )
  66. )
  67. return knowledge_bases
  68. ############################
  69. # CreateNewKnowledge
  70. ############################
  71. @router.post("/create", response_model=Optional[KnowledgeResponse])
  72. async def create_new_knowledge(form_data: KnowledgeForm, user=Depends(get_admin_user)):
  73. knowledge = Knowledges.insert_new_knowledge(user.id, form_data)
  74. if knowledge:
  75. return knowledge
  76. else:
  77. raise HTTPException(
  78. status_code=status.HTTP_400_BAD_REQUEST,
  79. detail=ERROR_MESSAGES.FILE_EXISTS,
  80. )
  81. ############################
  82. # GetKnowledgeById
  83. ############################
  84. class KnowledgeFilesResponse(KnowledgeResponse):
  85. files: list[FileModel]
  86. @router.get("/{id}", response_model=Optional[KnowledgeFilesResponse])
  87. async def get_knowledge_by_id(id: str, user=Depends(get_verified_user)):
  88. knowledge = Knowledges.get_knowledge_by_id(id=id)
  89. if knowledge:
  90. file_ids = knowledge.data.get("file_ids", []) if knowledge.data else []
  91. files = Files.get_files_by_ids(file_ids)
  92. return KnowledgeFilesResponse(
  93. **knowledge.model_dump(),
  94. files=files,
  95. )
  96. else:
  97. raise HTTPException(
  98. status_code=status.HTTP_401_UNAUTHORIZED,
  99. detail=ERROR_MESSAGES.NOT_FOUND,
  100. )
  101. ############################
  102. # UpdateKnowledgeById
  103. ############################
  104. @router.post("/{id}/update", response_model=Optional[KnowledgeFilesResponse])
  105. async def update_knowledge_by_id(
  106. id: str,
  107. form_data: KnowledgeUpdateForm,
  108. user=Depends(get_admin_user),
  109. ):
  110. knowledge = Knowledges.update_knowledge_by_id(id=id, form_data=form_data)
  111. if knowledge:
  112. file_ids = knowledge.data.get("file_ids", []) if knowledge.data else []
  113. files = Files.get_files_by_ids(file_ids)
  114. return KnowledgeFilesResponse(
  115. **knowledge.model_dump(),
  116. files=files,
  117. )
  118. else:
  119. raise HTTPException(
  120. status_code=status.HTTP_400_BAD_REQUEST,
  121. detail=ERROR_MESSAGES.ID_TAKEN,
  122. )
  123. ############################
  124. # AddFileToKnowledge
  125. ############################
  126. class KnowledgeFileIdForm(BaseModel):
  127. file_id: str
  128. @router.post("/{id}/file/add", response_model=Optional[KnowledgeFilesResponse])
  129. def add_file_to_knowledge_by_id(
  130. id: str,
  131. form_data: KnowledgeFileIdForm,
  132. user=Depends(get_admin_user),
  133. ):
  134. knowledge = Knowledges.get_knowledge_by_id(id=id)
  135. file = Files.get_file_by_id(form_data.file_id)
  136. if not file:
  137. raise HTTPException(
  138. status_code=status.HTTP_400_BAD_REQUEST,
  139. detail=ERROR_MESSAGES.NOT_FOUND,
  140. )
  141. if not file.data:
  142. raise HTTPException(
  143. status_code=status.HTTP_400_BAD_REQUEST,
  144. detail=ERROR_MESSAGES.FILE_NOT_PROCESSED,
  145. )
  146. # Add content to the vector database
  147. try:
  148. process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id))
  149. except Exception as e:
  150. log.debug(e)
  151. raise HTTPException(
  152. status_code=status.HTTP_400_BAD_REQUEST,
  153. detail=str(e),
  154. )
  155. if knowledge:
  156. data = knowledge.data or {}
  157. file_ids = data.get("file_ids", [])
  158. if form_data.file_id not in file_ids:
  159. file_ids.append(form_data.file_id)
  160. data["file_ids"] = file_ids
  161. knowledge = Knowledges.update_knowledge_by_id(
  162. id=id, form_data=KnowledgeUpdateForm(data=data)
  163. )
  164. if knowledge:
  165. files = Files.get_files_by_ids(file_ids)
  166. return KnowledgeFilesResponse(
  167. **knowledge.model_dump(),
  168. files=files,
  169. )
  170. else:
  171. raise HTTPException(
  172. status_code=status.HTTP_400_BAD_REQUEST,
  173. detail=ERROR_MESSAGES.DEFAULT("knowledge"),
  174. )
  175. else:
  176. raise HTTPException(
  177. status_code=status.HTTP_400_BAD_REQUEST,
  178. detail=ERROR_MESSAGES.DEFAULT("file_id"),
  179. )
  180. else:
  181. raise HTTPException(
  182. status_code=status.HTTP_400_BAD_REQUEST,
  183. detail=ERROR_MESSAGES.NOT_FOUND,
  184. )
  185. @router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse])
  186. def update_file_from_knowledge_by_id(
  187. id: str,
  188. form_data: KnowledgeFileIdForm,
  189. user=Depends(get_admin_user),
  190. ):
  191. knowledge = Knowledges.get_knowledge_by_id(id=id)
  192. file = Files.get_file_by_id(form_data.file_id)
  193. if not file:
  194. raise HTTPException(
  195. status_code=status.HTTP_400_BAD_REQUEST,
  196. detail=ERROR_MESSAGES.NOT_FOUND,
  197. )
  198. # Remove content from the vector database
  199. VECTOR_DB_CLIENT.delete(
  200. collection_name=knowledge.id, filter={"file_id": form_data.file_id}
  201. )
  202. # Add content to the vector database
  203. try:
  204. process_file(ProcessFileForm(file_id=form_data.file_id, collection_name=id))
  205. except Exception as e:
  206. raise HTTPException(
  207. status_code=status.HTTP_400_BAD_REQUEST,
  208. detail=str(e),
  209. )
  210. if knowledge:
  211. data = knowledge.data or {}
  212. file_ids = data.get("file_ids", [])
  213. files = Files.get_files_by_ids(file_ids)
  214. return KnowledgeFilesResponse(
  215. **knowledge.model_dump(),
  216. files=files,
  217. )
  218. else:
  219. raise HTTPException(
  220. status_code=status.HTTP_400_BAD_REQUEST,
  221. detail=ERROR_MESSAGES.NOT_FOUND,
  222. )
  223. ############################
  224. # RemoveFileFromKnowledge
  225. ############################
  226. @router.post("/{id}/file/remove", response_model=Optional[KnowledgeFilesResponse])
  227. def remove_file_from_knowledge_by_id(
  228. id: str,
  229. form_data: KnowledgeFileIdForm,
  230. user=Depends(get_admin_user),
  231. ):
  232. knowledge = Knowledges.get_knowledge_by_id(id=id)
  233. file = Files.get_file_by_id(form_data.file_id)
  234. if not file:
  235. raise HTTPException(
  236. status_code=status.HTTP_400_BAD_REQUEST,
  237. detail=ERROR_MESSAGES.NOT_FOUND,
  238. )
  239. # Remove content from the vector database
  240. VECTOR_DB_CLIENT.delete(
  241. collection_name=knowledge.id, filter={"file_id": form_data.file_id}
  242. )
  243. result = VECTOR_DB_CLIENT.query(
  244. collection_name=knowledge.id,
  245. filter={"file_id": form_data.file_id},
  246. )
  247. Files.delete_file_by_id(form_data.file_id)
  248. if knowledge:
  249. data = knowledge.data or {}
  250. file_ids = data.get("file_ids", [])
  251. if form_data.file_id in file_ids:
  252. file_ids.remove(form_data.file_id)
  253. data["file_ids"] = file_ids
  254. knowledge = Knowledges.update_knowledge_by_id(
  255. id=id, form_data=KnowledgeUpdateForm(data=data)
  256. )
  257. if knowledge:
  258. files = Files.get_files_by_ids(file_ids)
  259. return KnowledgeFilesResponse(
  260. **knowledge.model_dump(),
  261. files=files,
  262. )
  263. else:
  264. raise HTTPException(
  265. status_code=status.HTTP_400_BAD_REQUEST,
  266. detail=ERROR_MESSAGES.DEFAULT("knowledge"),
  267. )
  268. else:
  269. raise HTTPException(
  270. status_code=status.HTTP_400_BAD_REQUEST,
  271. detail=ERROR_MESSAGES.DEFAULT("file_id"),
  272. )
  273. else:
  274. raise HTTPException(
  275. status_code=status.HTTP_400_BAD_REQUEST,
  276. detail=ERROR_MESSAGES.NOT_FOUND,
  277. )
  278. ############################
  279. # ResetKnowledgeById
  280. ############################
  281. @router.post("/{id}/reset", response_model=Optional[KnowledgeResponse])
  282. async def reset_knowledge_by_id(id: str, user=Depends(get_admin_user)):
  283. try:
  284. VECTOR_DB_CLIENT.delete_collection(collection_name=id)
  285. except Exception as e:
  286. log.debug(e)
  287. pass
  288. knowledge = Knowledges.update_knowledge_by_id(
  289. id=id, form_data=KnowledgeUpdateForm(data={"file_ids": []})
  290. )
  291. return knowledge
  292. ############################
  293. # DeleteKnowledgeById
  294. ############################
  295. @router.delete("/{id}/delete", response_model=bool)
  296. async def delete_knowledge_by_id(id: str, user=Depends(get_admin_user)):
  297. try:
  298. VECTOR_DB_CLIENT.delete_collection(collection_name=id)
  299. except Exception as e:
  300. log.debug(e)
  301. pass
  302. result = Knowledges.delete_knowledge_by_id(id=id)
  303. return result