Explorar o código

Merge pull request #8637 from kahghi/add-gcs-storage-provider

feat:add GCSStorageProvider
Timothy Jaeryang Baek hai 3 meses
pai
achega
80e004c31f

+ 3 - 0
backend/open_webui/config.py

@@ -662,6 +662,9 @@ S3_REGION_NAME = os.environ.get("S3_REGION_NAME", None)
 S3_BUCKET_NAME = os.environ.get("S3_BUCKET_NAME", None)
 S3_BUCKET_NAME = os.environ.get("S3_BUCKET_NAME", None)
 S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL", None)
 S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL", None)
 
 
+GCS_BUCKET_NAME = os.environ.get("GCS_BUCKET_NAME", None)
+GOOGLE_APPLICATION_CREDENTIALS_JSON = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS_JSON", None)
+
 ####################################
 ####################################
 # File Upload DIR
 # File Upload DIR
 ####################################
 ####################################

+ 67 - 1
backend/open_webui/storage/provider.py

@@ -1,5 +1,6 @@
 import os
 import os
 import shutil
 import shutil
+import json
 from abc import ABC, abstractmethod
 from abc import ABC, abstractmethod
 from typing import BinaryIO, Tuple
 from typing import BinaryIO, Tuple
 
 
@@ -11,9 +12,13 @@ from open_webui.config import (
     S3_ENDPOINT_URL,
     S3_ENDPOINT_URL,
     S3_REGION_NAME,
     S3_REGION_NAME,
     S3_SECRET_ACCESS_KEY,
     S3_SECRET_ACCESS_KEY,
+    GCS_BUCKET_NAME,
+    GOOGLE_APPLICATION_CREDENTIALS_JSON,
     STORAGE_PROVIDER,
     STORAGE_PROVIDER,
     UPLOAD_DIR,
     UPLOAD_DIR,
 )
 )
+from google.cloud import storage
+from google.cloud.exceptions import GoogleCloudError, NotFound
 from open_webui.constants import ERROR_MESSAGES
 from open_webui.constants import ERROR_MESSAGES
 
 
 
 
@@ -137,15 +142,76 @@ class S3StorageProvider(StorageProvider):
         # Always delete from local storage
         # Always delete from local storage
         LocalStorageProvider.delete_all_files()
         LocalStorageProvider.delete_all_files()
 
 
+class GCSStorageProvider(StorageProvider):
+    def __init__(self):
+        self.bucket_name = GCS_BUCKET_NAME
+    
+        if GOOGLE_APPLICATION_CREDENTIALS_JSON:
+            self.gcs_client = storage.Client.from_service_account_info(info=json.loads(GOOGLE_APPLICATION_CREDENTIALS_JSON))
+        else:
+            # if no credentials json is provided, credentials will be picked up from the environment
+            # if running on local environment, credentials would be user credentials
+            # if running on a Compute Engine instance, credentials would be from Google Metadata server
+            self.gcs_client = storage.Client()
+        self.bucket = self.gcs_client.bucket(GCS_BUCKET_NAME)
+    
+    def upload_file(self, file: BinaryIO, filename: str) -> Tuple[bytes, str]:
+        """Handles uploading of the file to GCS storage."""
+        contents, file_path = LocalStorageProvider.upload_file(file, filename)
+        try:
+            blob = self.bucket.blob(filename)
+            blob.upload_from_filename(file_path)
+            return contents, "gs://" + self.bucket_name + "/" + filename
+        except GoogleCloudError as e:
+            raise RuntimeError(f"Error uploading file to GCS: {e}")
+
+    def get_file(self, file_path:str) -> str:
+        """Handles downloading of the file from GCS storage."""
+        try:
+            filename = file_path.removeprefix("gs://").split("/")[1]
+            local_file_path = f"{UPLOAD_DIR}/{filename}"            
+            blob = self.bucket.get_blob(filename)
+            blob.download_to_filename(local_file_path)
+
+            return local_file_path
+        except NotFound as e:
+            raise RuntimeError(f"Error downloading file from GCS: {e}")
+    
+    def delete_file(self, file_path:str) -> None:
+        """Handles deletion of the file from GCS storage."""
+        try:
+            filename = file_path.removeprefix("gs://").split("/")[1]
+            blob = self.bucket.get_blob(filename)
+            blob.delete()
+        except NotFound as e:
+            raise RuntimeError(f"Error deleting file from GCS: {e}")
+        
+        # Always delete from local storage
+        LocalStorageProvider.delete_file(file_path)
+
+    def delete_all_files(self) -> None:
+        """Handles deletion of all files from GCS storage."""
+        try:
+            blobs = self.bucket.list_blobs()
+
+            for blob in blobs:
+                blob.delete()
+
+        except NotFound as e:
+            raise RuntimeError(f"Error deleting all files from GCS: {e}")
+        
+        # Always delete from local storage
+        LocalStorageProvider.delete_all_files()
 
 
 def get_storage_provider(storage_provider: str):
 def get_storage_provider(storage_provider: str):
     if storage_provider == "local":
     if storage_provider == "local":
         Storage = LocalStorageProvider()
         Storage = LocalStorageProvider()
     elif storage_provider == "s3":
     elif storage_provider == "s3":
         Storage = S3StorageProvider()
         Storage = S3StorageProvider()
+    elif storage_provider == "gcs":
+        Storage = GCSStorageProvider()
     else:
     else:
         raise RuntimeError(f"Unsupported storage provider: {storage_provider}")
         raise RuntimeError(f"Unsupported storage provider: {storage_provider}")
     return Storage
     return Storage
 
 
-
 Storage = get_storage_provider(STORAGE_PROVIDER)
 Storage = get_storage_provider(STORAGE_PROVIDER)

+ 95 - 1
backend/open_webui/test/apps/webui/storage/test_provider.py

@@ -1,10 +1,12 @@
 import io
 import io
-
+import os
 import boto3
 import boto3
 import pytest
 import pytest
 from botocore.exceptions import ClientError
 from botocore.exceptions import ClientError
 from moto import mock_aws
 from moto import mock_aws
 from open_webui.storage import provider
 from open_webui.storage import provider
+from gcp_storage_emulator.server import create_server
+from google.cloud import storage
 
 
 
 
 def mock_upload_dir(monkeypatch, tmp_path):
 def mock_upload_dir(monkeypatch, tmp_path):
@@ -19,6 +21,7 @@ def test_imports():
     provider.StorageProvider
     provider.StorageProvider
     provider.LocalStorageProvider
     provider.LocalStorageProvider
     provider.S3StorageProvider
     provider.S3StorageProvider
+    provider.GCSStorageProvider
     provider.Storage
     provider.Storage
 
 
 
 
@@ -27,6 +30,8 @@ def test_get_storage_provider():
     assert isinstance(Storage, provider.LocalStorageProvider)
     assert isinstance(Storage, provider.LocalStorageProvider)
     Storage = provider.get_storage_provider("s3")
     Storage = provider.get_storage_provider("s3")
     assert isinstance(Storage, provider.S3StorageProvider)
     assert isinstance(Storage, provider.S3StorageProvider)
+    Storage = provider.get_storage_provider("gcs")
+    assert isinstance(Storage, provider.GCSStorageProvider)
     with pytest.raises(RuntimeError):
     with pytest.raises(RuntimeError):
         provider.get_storage_provider("invalid")
         provider.get_storage_provider("invalid")
 
 
@@ -42,6 +47,7 @@ def test_class_instantiation():
         Test()
         Test()
     provider.LocalStorageProvider()
     provider.LocalStorageProvider()
     provider.S3StorageProvider()
     provider.S3StorageProvider()
+    provider.GCSStorageProvider()
 
 
 
 
 class TestLocalStorageProvider:
 class TestLocalStorageProvider:
@@ -175,3 +181,91 @@ class TestS3StorageProvider:
         self.Storage.delete_all_files()
         self.Storage.delete_all_files()
         assert not (upload_dir / self.filename).exists()
         assert not (upload_dir / self.filename).exists()
         assert not (upload_dir / self.filename_extra).exists()
         assert not (upload_dir / self.filename_extra).exists()
+
+class TestGCSStorageProvider:
+    Storage = provider.GCSStorageProvider()
+    Storage.bucket_name = "my-bucket"
+    file_content = b"test content"
+    filename = "test.txt"
+    filename_extra = "test_exyta.txt"
+    file_bytesio_empty = io.BytesIO()
+
+    @pytest.fixture(scope="class")
+    def setup(self):
+        host, port = "localhost", 9023
+
+        server = create_server(host, port, in_memory=True)
+        server.start()
+        os.environ["STORAGE_EMULATOR_HOST"] = f"http://{host}:{port}"
+
+        gcs_client = storage.Client()
+        bucket = gcs_client.bucket(self.Storage.bucket_name)
+        bucket.create()
+        self.Storage.gcs_client, self.Storage.bucket = gcs_client, bucket
+        yield 
+        bucket.delete(force=True)
+        server.stop()
+    
+    def test_upload_file(self, monkeypatch, tmp_path, setup):
+        upload_dir = mock_upload_dir(monkeypatch, tmp_path)
+        # catch error if bucket does not exist
+        with pytest.raises(Exception):
+            self.Storage.bucket = monkeypatch(self.Storage, "bucket", None)  
+            self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
+        contents, gcs_file_path = self.Storage.upload_file(
+            io.BytesIO(self.file_content), self.filename
+        )
+        object = self.Storage.bucket.get_blob(self.filename)
+        assert self.file_content == object.download_as_bytes()
+        # local checks
+        assert (upload_dir / self.filename).exists()
+        assert (upload_dir / self.filename).read_bytes() == self.file_content
+        assert contents == self.file_content
+        assert gcs_file_path == "gs://" + self.Storage.bucket_name + "/" + self.filename
+        # test error if file is empty
+        with pytest.raises(ValueError):
+            self.Storage.upload_file(self.file_bytesio_empty, self.filename)
+
+    def test_get_file(self, monkeypatch, tmp_path, setup):
+        upload_dir = mock_upload_dir(monkeypatch, tmp_path)
+        contents, gcs_file_path = self.Storage.upload_file(
+            io.BytesIO(self.file_content), self.filename
+        )
+        file_path = self.Storage.get_file(gcs_file_path)
+        assert file_path == str(upload_dir / self.filename)
+        assert (upload_dir / self.filename).exists()
+
+    def test_delete_file(self, monkeypatch, tmp_path, setup):
+        upload_dir = mock_upload_dir(monkeypatch, tmp_path)
+        contents, gcs_file_path = self.Storage.upload_file(
+            io.BytesIO(self.file_content), self.filename
+        )
+        # ensure that local directory has the uploaded file as well
+        assert (upload_dir / self.filename).exists()
+        assert self.Storage.bucket.get_blob(self.filename).name == self.filename
+        self.Storage.delete_file(gcs_file_path)
+        # check that deleting file from gcs will delete the local file as well
+        assert not (upload_dir / self.filename).exists()
+        assert self.Storage.bucket.get_blob(self.filename) == None
+
+    def test_delete_all_files(self, monkeypatch, tmp_path, setup):
+        upload_dir = mock_upload_dir(monkeypatch, tmp_path)
+        # create 2 files
+        self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
+        object = self.Storage.bucket.get_blob(self.filename)
+        assert (upload_dir / self.filename).exists()
+        assert (upload_dir / self.filename).read_bytes() == self.file_content
+        assert self.Storage.bucket.get_blob(self.filename).name == self.filename
+        assert self.file_content == object.download_as_bytes()
+        self.Storage.upload_file(io.BytesIO(self.file_content), self.filename_extra)
+        object = self.Storage.bucket.get_blob(self.filename_extra)
+        assert (upload_dir / self.filename_extra).exists()
+        assert (upload_dir / self.filename_extra).read_bytes() == self.file_content
+        assert self.Storage.bucket.get_blob(self.filename_extra).name == self.filename_extra
+        assert self.file_content == object.download_as_bytes()
+
+        self.Storage.delete_all_files()
+        assert not (upload_dir / self.filename).exists()
+        assert not (upload_dir / self.filename_extra).exists()
+        assert self.Storage.bucket.get_blob(self.filename) == None
+        assert self.Storage.bucket.get_blob(self.filename_extra) == None

+ 1 - 0
backend/requirements.txt

@@ -102,6 +102,7 @@ pytest~=8.3.2
 pytest-docker~=3.1.1
 pytest-docker~=3.1.1
 
 
 googleapis-common-protos==1.63.2
 googleapis-common-protos==1.63.2
+google-cloud-storage==2.19.0
 
 
 ## LDAP
 ## LDAP
 ldap3==2.9.1
 ldap3==2.9.1

+ 3 - 2
pyproject.toml

@@ -102,8 +102,9 @@ dependencies = [
     "moto[s3]>=5.0.26",
     "moto[s3]>=5.0.26",
 
 
     "googleapis-common-protos==1.63.2",
     "googleapis-common-protos==1.63.2",
-
-    "ldap3==2.9.1"
+    "ldap3==2.9.1",
+    "google-cloud-storage==2.19.0",
+    "gcp-storage-emulator>=2024.8.3",
 ]
 ]
 readme = "README.md"
 readme = "README.md"
 requires-python = ">= 3.11, < 3.13.0a1"
 requires-python = ">= 3.11, < 3.13.0a1"

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 462 - 266
uv.lock


Algúns arquivos non se mostraron porque demasiados arquivos cambiaron neste cambio