From 7756c81a69c0c6071bda62b39318434ca1fd9f1a Mon Sep 17 00:00:00 2001 From: ByrDen Date: Sat, 18 Apr 2026 05:59:07 +0300 Subject: [PATCH] feat: uploading audio files --- alembic/versions/057_audio_files.py | 58 ++++++++++++++++++ app/common/filetype_ext.py | 15 ++++- app/storage_v2/dependencies/uploads_dep.py | 30 +++++++++- app/storage_v2/main.py | 4 ++ app/storage_v2/models/files_db.py | 4 ++ app/storage_v2/routers/files_rst.py | 24 +++++++- app/storage_v2/utils/__init__.py | 0 app/storage_v2/utils/audio.py | 28 +++++++++ tests/storage_v2/conftest.py | 26 ++++++++ .../functional/test_file_uploads_rst.py | 59 +++++++++++++++---- tests/storage_v2/unit/__init__.py | 0 .../test_adding_missing_mime_to_mimetypes.py | 24 ++++++++ 12 files changed, 255 insertions(+), 17 deletions(-) create mode 100644 alembic/versions/057_audio_files.py create mode 100644 app/storage_v2/utils/__init__.py create mode 100644 app/storage_v2/utils/audio.py create mode 100644 tests/storage_v2/unit/__init__.py create mode 100644 tests/storage_v2/unit/test_adding_missing_mime_to_mimetypes.py diff --git a/alembic/versions/057_audio_files.py b/alembic/versions/057_audio_files.py new file mode 100644 index 00000000..2986ce9a --- /dev/null +++ b/alembic/versions/057_audio_files.py @@ -0,0 +1,58 @@ +"""audio_files + +Revision ID: 057 +Revises: 056 +Create Date: 2026-04-17 04:07:30.284265 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +schema_name = "xi_back_2" +table_name = "files" +column_name = "kind" +enum_name = "file_kind" +tmp_enum_name = f"_{enum_name}" + +old_enum = sa.Enum("UNCATEGORIZED", "IMAGE", name=enum_name) + +# revision identifiers, used by Alembic. +revision: str = "057" +down_revision: Union[str, None] = "056" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute(f"ALTER TYPE {enum_name} ADD VALUE 'AUDIO'") + + +def downgrade() -> None: + conn = op.get_bind() + + # rename new enum + op.execute(f"ALTER TYPE {enum_name} RENAME TO {tmp_enum_name}") + + # update old rows + metadata = sa.MetaData(schema=schema_name) + Files = sa.Table(table_name, metadata, autoload_with=conn) + + conn.execute( + sa.update(Files).where(Files.c.kind == "AUDIO").values(kind="UNCATEGORIZED") + ) + + # remove old members by updating to the new enum + old_enum.create(bind=conn) + op.execute( + f"ALTER TABLE {schema_name}.{table_name}" + f" ALTER COLUMN {column_name}" + f" TYPE {old_enum.name}" + f" USING {column_name}::text::{old_enum.name}" + ) + + # remove new enum + op.execute(f"DROP TYPE {tmp_enum_name}") diff --git a/app/common/filetype_ext.py b/app/common/filetype_ext.py index 0cc68297..e781a00d 100644 --- a/app/common/filetype_ext.py +++ b/app/common/filetype_ext.py @@ -1,7 +1,7 @@ from typing import Final import filetype # type: ignore[import-untyped] -from filetype.types import image # type: ignore[import-untyped] +from filetype.types import audio, image # type: ignore[import-untyped] FILE_HEADER_SIZE: Final[int] = 8192 @@ -17,6 +17,15 @@ image.Webp(), ] +SUPPORTED_AUDIO_FORMATS: list[filetype.Type] = [ + audio.Aac(), + audio.Mp3(), + audio.M4a(), + audio.Ogg(), + audio.Flac(), + audio.Wav(), +] + def match_filetype(obj: bytes, matchers: list[filetype.Type]) -> filetype.Type | None: return filetype.match(obj, matchers) @@ -24,3 +33,7 @@ def match_filetype(obj: bytes, matchers: list[filetype.Type]) -> filetype.Type | def match_image_filetype(obj: bytes) -> filetype.Type | None: return match_filetype(obj, SUPPORTED_IMAGE_FORMATS) + + +def match_audio_filetype(obj: bytes) -> filetype.Type | None: + return match_filetype(obj, SUPPORTED_AUDIO_FORMATS) diff --git a/app/storage_v2/dependencies/uploads_dep.py b/app/storage_v2/dependencies/uploads_dep.py index a5c2c521..0d185958 100644 --- a/app/storage_v2/dependencies/uploads_dep.py +++ b/app/storage_v2/dependencies/uploads_dep.py @@ -1,10 +1,15 @@ +from mimetypes import guess_all_extensions from typing import Annotated from fastapi import Depends, UploadFile from starlette import status from app.common.fastapi_ext import Responses, with_responses -from app.common.filetype_ext import FILE_HEADER_SIZE, match_image_filetype +from app.common.filetype_ext import ( + FILE_HEADER_SIZE, + match_audio_filetype, + match_image_filetype, +) class FileFormatResponses(Responses): @@ -31,3 +36,26 @@ async def validate_image_upload(upload: UploadFile) -> UploadFile: ValidatedImageUpload = Annotated[UploadFile, Depends(validate_image_upload)] + + +@with_responses(FileFormatResponses) +async def validate_audio_upload(upload: UploadFile) -> UploadFile: + upload_header_data = await upload.read(FILE_HEADER_SIZE) + audio_type = match_audio_filetype(upload_header_data) + + if audio_type is None: + raise FileFormatResponses.WRONG_FORMAT + + if upload.content_type is None: + raise FileFormatResponses.CONTENT_TYPE_MISMATCH + + audio_extensions = guess_all_extensions(upload.content_type) + + if len(audio_extensions) == 0 or f".{audio_type.extension}" not in audio_extensions: + raise FileFormatResponses.CONTENT_TYPE_MISMATCH + + await upload.seek(0) + return upload + + +ValidatedAudioUpload = Annotated[UploadFile, Depends(validate_audio_upload)] diff --git a/app/storage_v2/main.py b/app/storage_v2/main.py index b0e04253..33d0e758 100644 --- a/app/storage_v2/main.py +++ b/app/storage_v2/main.py @@ -13,6 +13,7 @@ files_rst, ydocs_hocus_int, ) +from app.storage_v2.utils.audio import add_missing_mime_to_mimetypes outside_router = APIRouterExt(prefix="/api/public/storage-service/v2") @@ -40,6 +41,9 @@ async def lifespan(_: Any) -> AsyncIterator[None]: settings.storage_path.mkdir(exist_ok=True) for sub_folder in FILE_KIND_TO_FOLDER.values(): (settings.storage_path / sub_folder).mkdir(exist_ok=True) + + add_missing_mime_to_mimetypes() + yield diff --git a/app/storage_v2/models/files_db.py b/app/storage_v2/models/files_db.py index 8dcf46c5..bdbde2e6 100644 --- a/app/storage_v2/models/files_db.py +++ b/app/storage_v2/models/files_db.py @@ -14,6 +14,7 @@ class FileKind(StrEnum): UNCATEGORIZED = "uncategorized" IMAGE = "image" + AUDIO = "audio" ContentDisposition = Literal["inline", "attachment"] @@ -21,14 +22,17 @@ class FileKind(StrEnum): FILE_KIND_TO_FOLDER: dict[FileKind, str] = { FileKind.UNCATEGORIZED: "uncategorized", FileKind.IMAGE: "images", + FileKind.AUDIO: "audios", } FILE_KIND_TO_MEDIA_TYPE: dict[FileKind, str | None] = { FileKind.UNCATEGORIZED: None, FileKind.IMAGE: "image/webp", + FileKind.AUDIO: None, } FILE_KIND_TO_CONTENT_DISPOSITION: dict[FileKind, ContentDisposition] = { FileKind.UNCATEGORIZED: "attachment", FileKind.IMAGE: "inline", + FileKind.AUDIO: "attachment", } diff --git a/app/storage_v2/routers/files_rst.py b/app/storage_v2/routers/files_rst.py index 548deafa..401f9e2c 100644 --- a/app/storage_v2/routers/files_rst.py +++ b/app/storage_v2/routers/files_rst.py @@ -17,7 +17,10 @@ StorageTokenResponses, UploadAllowedStorageTokenPayload, ) -from app.storage_v2.dependencies.uploads_dep import ValidatedImageUpload +from app.storage_v2.dependencies.uploads_dep import ( + ValidatedAudioUpload, + ValidatedImageUpload, +) from app.storage_v2.models.access_groups_db import AccessGroupFile from app.storage_v2.models.files_db import File, FileKind @@ -85,6 +88,24 @@ async def upload_image_file( ) +@router.post( + "/file-kinds/audio/files/", + status_code=status.HTTP_201_CREATED, + response_model=File.ResponseSchema, + summary="Upload a new audio file", +) +async def upload_audio_file( + storage_token_payload: UploadAllowedStorageTokenPayload, + upload: ValidatedAudioUpload, +) -> File: + return await upload_file( + storage_token_payload=storage_token_payload, + upload_content=await upload.read(), + upload_filename=upload.filename, + file_kind=FileKind.AUDIO, + ) + + @router.get( "/files/{file_id}/meta/", response_model=File.ResponseSchema, @@ -107,7 +128,6 @@ def parse_http_datetime(header: str | None) -> datetime | None: @router.get( "/files/{file_id}/", - response_model=File.ResponseSchema, summary="Read any file by id", ) async def read_file( diff --git a/app/storage_v2/utils/__init__.py b/app/storage_v2/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/storage_v2/utils/audio.py b/app/storage_v2/utils/audio.py new file mode 100644 index 00000000..08307693 --- /dev/null +++ b/app/storage_v2/utils/audio.py @@ -0,0 +1,28 @@ +import mimetypes + +from app.common.filetype_ext import SUPPORTED_AUDIO_FORMATS + +SUPPORTED_AUDIO_MIME_TYPES: dict[str, str] = { + # taken from https://developer.mozilla.org/en-US/docs/Web/Media/Guides/Formats/Containers + "audio/aac": ".aac", + "audio/mpeg": ".mp3", + "audio/mp3": ".mp3", + "audio/mp4": ".m4a", + "audio/ogg": ".ogg", + "audio/flac": ".flac", + "audio/x-flac": ".flac", + "audio/wave": ".wav", + "audio/wav": ".wav", + "audio/x-wav": ".wav", + "audio/x-pn-wav": ".wav", +} + + +def add_missing_mime_to_mimetypes() -> None: + for audio_format in SUPPORTED_AUDIO_FORMATS: + if mimetypes.guess_extension(audio_format.mime) is None: + mimetypes.add_type(audio_format.mime, f".{audio_format.extension}") + + for mime, extension in SUPPORTED_AUDIO_MIME_TYPES.items(): + if mimetypes.guess_extension(mime) is None: + mimetypes.add_type(mime, extension) diff --git a/tests/storage_v2/conftest.py b/tests/storage_v2/conftest.py index ab4f4bd3..790d36bb 100644 --- a/tests/storage_v2/conftest.py +++ b/tests/storage_v2/conftest.py @@ -1,3 +1,4 @@ +import wave from collections.abc import AsyncIterator from dataclasses import dataclass from io import BytesIO @@ -197,11 +198,36 @@ def png_image_file_input_data( ) +@pytest.fixture() +def wav_audio_file_content(faker: Faker) -> bytes: + audio_content = BytesIO() + with wave.open(audio_content, "wb") as f: + f.setnchannels(1) + f.setsampwidth(2) + f.setframerate(44100) + f.writeframes(faker.binary(44100)) + return audio_content.getvalue() + + +@pytest.fixture() +def wav_audio_file_input_data( + faker: Faker, wav_audio_file_content: bytes +) -> FileInputData: + return FileInputData( + kind=FileKind.AUDIO, + name=faker.file_name(extension="wav"), + input_content=wav_audio_file_content, + processed_content=wav_audio_file_content, + content_type="audio/wav", + ) + + @pytest.fixture( params=[ pytest.param(lf("uncategorized_file_input_data"), id="uncategorized"), pytest.param(lf("webp_image_file_input_data"), id="webp_image"), pytest.param(lf("png_image_file_input_data"), id="png_image"), + pytest.param(lf("wav_audio_file_input_data"), id="wav_audio"), ], ) def parametrized_file_input_data( diff --git a/tests/storage_v2/functional/test_file_uploads_rst.py b/tests/storage_v2/functional/test_file_uploads_rst.py index 7173678b..319b85da 100644 --- a/tests/storage_v2/functional/test_file_uploads_rst.py +++ b/tests/storage_v2/functional/test_file_uploads_rst.py @@ -1,5 +1,6 @@ import random from io import BytesIO +from typing import Literal from uuid import UUID import pytest @@ -116,6 +117,14 @@ async def test_file_uploading( ("image/tiff", "tif"), ("image/tiff", "tiff"), ("image/webp", "webp"), + ("audio/aac", "aac"), + ("audio/mpeg", "mp3"), + ("audio/mp4", "m4a"), + ("audio/ogg", "ogg"), + ("audio/flac", "flac"), + ("audio/x-flac", "flac"), + ("audio/wav", "wav"), + ("audio/x-wav", "wav"), ] @@ -124,9 +133,10 @@ async def test_file_uploading( [ pytest.param(lf("webp_image_file_input_data"), id="webp"), pytest.param(lf("png_image_file_input_data"), id="png"), + pytest.param(lf("wav_audio_file_input_data"), id="wav"), ], ) -async def test_image_file_uploading_content_type_mismatch( +async def test_file_uploading_content_type_mismatch( faker: Faker, authorized_client: TestClient, file_upload_storage_token: str, @@ -142,7 +152,7 @@ async def test_image_file_uploading_content_type_mismatch( assert_response( authorized_client.post( - "/api/protected/storage-service/v2/file-kinds/image/files/", + f"/api/protected/storage-service/v2/file-kinds/{file_input_data.kind}/files/", headers={"X-Storage-Token": file_upload_storage_token}, files={ "upload": ( @@ -157,23 +167,46 @@ async def test_image_file_uploading_content_type_mismatch( ) -async def test_image_file_uploading_wrong_content_format( - faker: Faker, +@pytest.mark.parametrize( + ("file_kind", "upload"), + [ + pytest.param( + "image", + ( + lfc( + "faker.file_name", + extension="webp", + ), + lf("uncategorized_file_content"), + "image/webp", + ), + id="image", + ), + pytest.param( + "audio", + ( + lfc( + "faker.file_name", + extension="mp3", + ), + lf("uncategorized_file_content"), + "audio/mpeg", + ), + id="audio", + ), + ], +) +async def test_file_uploading_wrong_content_format( authorized_client: TestClient, - uncategorized_file_content: bytes, file_upload_storage_token: str, + file_kind: Literal["image", "audio"], + upload: tuple[str, bytes, str], ) -> None: assert_response( authorized_client.post( - "/api/protected/storage-service/v2/file-kinds/image/files/", + f"/api/protected/storage-service/v2/file-kinds/{file_kind}/files/", headers={"X-Storage-Token": file_upload_storage_token}, - files={ - "upload": ( - faker.file_name(extension="webp"), - uncategorized_file_content, - "image/webp", - ) - }, + files={"upload": upload}, ), expected_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, expected_json={"detail": "Invalid file format"}, diff --git a/tests/storage_v2/unit/__init__.py b/tests/storage_v2/unit/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/storage_v2/unit/test_adding_missing_mime_to_mimetypes.py b/tests/storage_v2/unit/test_adding_missing_mime_to_mimetypes.py new file mode 100644 index 00000000..d05638d3 --- /dev/null +++ b/tests/storage_v2/unit/test_adding_missing_mime_to_mimetypes.py @@ -0,0 +1,24 @@ +import mimetypes + +import pytest + +from app.common.filetype_ext import SUPPORTED_AUDIO_FORMATS +from app.storage_v2.utils.audio import SUPPORTED_AUDIO_MIME_TYPES + +pytestmark = pytest.mark.anyio + + +async def test_matching_extension_by_filetype_mime_types() -> None: + for audio_format in SUPPORTED_AUDIO_FORMATS: + audio_extensions = mimetypes.guess_all_extensions(audio_format.mime) + + assert len(audio_extensions) != 0 + assert f".{audio_format.extension}" in audio_extensions + + +async def test_presence_extension_by_mime_types() -> None: + for mime, extension in SUPPORTED_AUDIO_MIME_TYPES.items(): + audio_extensions = mimetypes.guess_all_extensions(mime) + + assert len(audio_extensions) != 0 + assert extension in audio_extensions