|
@@ -411,7 +411,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
|
|
elif file_ext == "xml":
|
|
elif file_ext == "xml":
|
|
loader = UnstructuredXMLLoader(file_path)
|
|
loader = UnstructuredXMLLoader(file_path)
|
|
elif file_ext in ["htm", "html"]:
|
|
elif file_ext in ["htm", "html"]:
|
|
- loader = BSHTMLLoader(file_path)
|
|
|
|
|
|
+ loader = BSHTMLLoader(file_path, open_encoding="unicode_escape")
|
|
elif file_ext == "md":
|
|
elif file_ext == "md":
|
|
loader = UnstructuredMarkdownLoader(file_path)
|
|
loader = UnstructuredMarkdownLoader(file_path)
|
|
elif file_content_type == "application/epub+zip":
|
|
elif file_content_type == "application/epub+zip":
|