Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions alembic/versions/057_audio_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""audio_files

Revision ID: 057
Revises: 056
Create Date: 2026-04-17 04:07:30.284265

"""

from typing import Sequence, Union

import sqlalchemy as sa

from alembic import op

schema_name = "xi_back_2"
table_name = "files"
column_name = "kind"
enum_name = "file_kind"
tmp_enum_name = f"_{enum_name}"

old_enum = sa.Enum("UNCATEGORIZED", "IMAGE", name=enum_name)

# revision identifiers, used by Alembic.
revision: str = "057"
down_revision: Union[str, None] = "056"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.execute(f"ALTER TYPE {enum_name} ADD VALUE 'AUDIO'")


def downgrade() -> None:
conn = op.get_bind()

# rename new enum
op.execute(f"ALTER TYPE {enum_name} RENAME TO {tmp_enum_name}")

# update old rows
metadata = sa.MetaData(schema=schema_name)
Files = sa.Table(table_name, metadata, autoload_with=conn)

conn.execute(
sa.update(Files).where(Files.c.kind == "AUDIO").values(kind="UNCATEGORIZED")
)

# remove old members by updating to the new enum
old_enum.create(bind=conn)
op.execute(
f"ALTER TABLE {schema_name}.{table_name}"
f" ALTER COLUMN {column_name}"
f" TYPE {old_enum.name}"
f" USING {column_name}::text::{old_enum.name}"
)

# remove new enum
op.execute(f"DROP TYPE {tmp_enum_name}")
15 changes: 14 additions & 1 deletion app/common/filetype_ext.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Final

import filetype # type: ignore[import-untyped]
from filetype.types import image # type: ignore[import-untyped]
from filetype.types import audio, image # type: ignore[import-untyped]

FILE_HEADER_SIZE: Final[int] = 8192

Expand All @@ -17,10 +17,23 @@
image.Webp(),
]

SUPPORTED_AUDIO_FORMATS: list[filetype.Type] = [
audio.Aac(),
audio.Mp3(),
audio.M4a(),
audio.Ogg(),
audio.Flac(),
audio.Wav(),
]


def match_filetype(obj: bytes, matchers: list[filetype.Type]) -> filetype.Type | None:
return filetype.match(obj, matchers)


def match_image_filetype(obj: bytes) -> filetype.Type | None:
return match_filetype(obj, SUPPORTED_IMAGE_FORMATS)


def match_audio_filetype(obj: bytes) -> filetype.Type | None:
return match_filetype(obj, SUPPORTED_AUDIO_FORMATS)
30 changes: 29 additions & 1 deletion app/storage_v2/dependencies/uploads_dep.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from mimetypes import guess_all_extensions
from typing import Annotated

from fastapi import Depends, UploadFile
from starlette import status

from app.common.fastapi_ext import Responses, with_responses
from app.common.filetype_ext import FILE_HEADER_SIZE, match_image_filetype
from app.common.filetype_ext import (
FILE_HEADER_SIZE,
match_audio_filetype,
match_image_filetype,
)


class FileFormatResponses(Responses):
Expand All @@ -31,3 +36,26 @@ async def validate_image_upload(upload: UploadFile) -> UploadFile:


ValidatedImageUpload = Annotated[UploadFile, Depends(validate_image_upload)]


@with_responses(FileFormatResponses)
async def validate_audio_upload(upload: UploadFile) -> UploadFile:
upload_header_data = await upload.read(FILE_HEADER_SIZE)
audio_type = match_audio_filetype(upload_header_data)

if audio_type is None:
raise FileFormatResponses.WRONG_FORMAT

if upload.content_type is None:
raise FileFormatResponses.CONTENT_TYPE_MISMATCH

audio_extensions = guess_all_extensions(upload.content_type)

if len(audio_extensions) == 0 or f".{audio_type.extension}" not in audio_extensions:
raise FileFormatResponses.CONTENT_TYPE_MISMATCH

await upload.seek(0)
return upload


ValidatedAudioUpload = Annotated[UploadFile, Depends(validate_audio_upload)]
4 changes: 4 additions & 0 deletions app/storage_v2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
files_rst,
ydocs_hocus_int,
)
from app.storage_v2.utils.audio import add_missing_mime_to_mimetypes

outside_router = APIRouterExt(prefix="/api/public/storage-service/v2")

Expand Down Expand Up @@ -40,6 +41,9 @@ async def lifespan(_: Any) -> AsyncIterator[None]:
settings.storage_path.mkdir(exist_ok=True)
for sub_folder in FILE_KIND_TO_FOLDER.values():
(settings.storage_path / sub_folder).mkdir(exist_ok=True)

add_missing_mime_to_mimetypes()

yield


Expand Down
4 changes: 4 additions & 0 deletions app/storage_v2/models/files_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,25 @@
class FileKind(StrEnum):
UNCATEGORIZED = "uncategorized"
IMAGE = "image"
AUDIO = "audio"


ContentDisposition = Literal["inline", "attachment"]

FILE_KIND_TO_FOLDER: dict[FileKind, str] = {
FileKind.UNCATEGORIZED: "uncategorized",
FileKind.IMAGE: "images",
FileKind.AUDIO: "audios",
}
FILE_KIND_TO_MEDIA_TYPE: dict[FileKind, str | None] = {
FileKind.UNCATEGORIZED: None,
FileKind.IMAGE: "image/webp",
FileKind.AUDIO: None,
}
FILE_KIND_TO_CONTENT_DISPOSITION: dict[FileKind, ContentDisposition] = {
FileKind.UNCATEGORIZED: "attachment",
FileKind.IMAGE: "inline",
FileKind.AUDIO: "attachment",
}


Expand Down
24 changes: 22 additions & 2 deletions app/storage_v2/routers/files_rst.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
StorageTokenResponses,
UploadAllowedStorageTokenPayload,
)
from app.storage_v2.dependencies.uploads_dep import ValidatedImageUpload
from app.storage_v2.dependencies.uploads_dep import (
ValidatedAudioUpload,
ValidatedImageUpload,
)
from app.storage_v2.models.access_groups_db import AccessGroupFile
from app.storage_v2.models.files_db import File, FileKind

Expand Down Expand Up @@ -85,6 +88,24 @@ async def upload_image_file(
)


@router.post(
"/file-kinds/audio/files/",
status_code=status.HTTP_201_CREATED,
response_model=File.ResponseSchema,
summary="Upload a new audio file",
)
async def upload_audio_file(
storage_token_payload: UploadAllowedStorageTokenPayload,
upload: ValidatedAudioUpload,
) -> File:
return await upload_file(
storage_token_payload=storage_token_payload,
upload_content=await upload.read(),
upload_filename=upload.filename,
file_kind=FileKind.AUDIO,
)


@router.get(
"/files/{file_id}/meta/",
response_model=File.ResponseSchema,
Expand All @@ -107,7 +128,6 @@ def parse_http_datetime(header: str | None) -> datetime | None:

@router.get(
"/files/{file_id}/",
response_model=File.ResponseSchema,
summary="Read any file by id",
)
async def read_file(
Expand Down
Empty file.
28 changes: 28 additions & 0 deletions app/storage_v2/utils/audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import mimetypes

from app.common.filetype_ext import SUPPORTED_AUDIO_FORMATS

SUPPORTED_AUDIO_MIME_TYPES: dict[str, str] = {
# taken from https://developer.mozilla.org/en-US/docs/Web/Media/Guides/Formats/Containers
"audio/aac": ".aac",
"audio/mpeg": ".mp3",
"audio/mp3": ".mp3",
"audio/mp4": ".m4a",
"audio/ogg": ".ogg",
"audio/flac": ".flac",
"audio/x-flac": ".flac",
"audio/wave": ".wav",
"audio/wav": ".wav",
"audio/x-wav": ".wav",
"audio/x-pn-wav": ".wav",
}


def add_missing_mime_to_mimetypes() -> None:
for audio_format in SUPPORTED_AUDIO_FORMATS:
if mimetypes.guess_extension(audio_format.mime) is None:
mimetypes.add_type(audio_format.mime, f".{audio_format.extension}")

for mime, extension in SUPPORTED_AUDIO_MIME_TYPES.items():
if mimetypes.guess_extension(mime) is None:
mimetypes.add_type(mime, extension)
26 changes: 26 additions & 0 deletions tests/storage_v2/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import wave
from collections.abc import AsyncIterator
from dataclasses import dataclass
from io import BytesIO
Expand Down Expand Up @@ -197,11 +198,36 @@ def png_image_file_input_data(
)


@pytest.fixture()
def wav_audio_file_content(faker: Faker) -> bytes:
audio_content = BytesIO()
with wave.open(audio_content, "wb") as f:
f.setnchannels(1)
f.setsampwidth(2)
f.setframerate(44100)
f.writeframes(faker.binary(44100))
return audio_content.getvalue()


@pytest.fixture()
def wav_audio_file_input_data(
faker: Faker, wav_audio_file_content: bytes
) -> FileInputData:
return FileInputData(
kind=FileKind.AUDIO,
name=faker.file_name(extension="wav"),
input_content=wav_audio_file_content,
processed_content=wav_audio_file_content,
content_type="audio/wav",
)


@pytest.fixture(
params=[
pytest.param(lf("uncategorized_file_input_data"), id="uncategorized"),
pytest.param(lf("webp_image_file_input_data"), id="webp_image"),
pytest.param(lf("png_image_file_input_data"), id="png_image"),
pytest.param(lf("wav_audio_file_input_data"), id="wav_audio"),
],
)
def parametrized_file_input_data(
Expand Down
59 changes: 46 additions & 13 deletions tests/storage_v2/functional/test_file_uploads_rst.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import random
from io import BytesIO
from typing import Literal
from uuid import UUID

import pytest
Expand Down Expand Up @@ -116,6 +117,14 @@ async def test_file_uploading(
("image/tiff", "tif"),
("image/tiff", "tiff"),
("image/webp", "webp"),
("audio/aac", "aac"),
("audio/mpeg", "mp3"),
("audio/mp4", "m4a"),
("audio/ogg", "ogg"),
("audio/flac", "flac"),
("audio/x-flac", "flac"),
("audio/wav", "wav"),
("audio/x-wav", "wav"),
]


Expand All @@ -124,9 +133,10 @@ async def test_file_uploading(
[
pytest.param(lf("webp_image_file_input_data"), id="webp"),
pytest.param(lf("png_image_file_input_data"), id="png"),
pytest.param(lf("wav_audio_file_input_data"), id="wav"),
],
)
async def test_image_file_uploading_content_type_mismatch(
async def test_file_uploading_content_type_mismatch(
faker: Faker,
authorized_client: TestClient,
file_upload_storage_token: str,
Expand All @@ -142,7 +152,7 @@ async def test_image_file_uploading_content_type_mismatch(

assert_response(
authorized_client.post(
"/api/protected/storage-service/v2/file-kinds/image/files/",
f"/api/protected/storage-service/v2/file-kinds/{file_input_data.kind}/files/",
headers={"X-Storage-Token": file_upload_storage_token},
files={
"upload": (
Expand All @@ -157,23 +167,46 @@ async def test_image_file_uploading_content_type_mismatch(
)


async def test_image_file_uploading_wrong_content_format(
faker: Faker,
@pytest.mark.parametrize(
("file_kind", "upload"),
[
pytest.param(
"image",
(
lfc(
"faker.file_name",
extension="webp",
),
lf("uncategorized_file_content"),
"image/webp",
),
id="image",
),
pytest.param(
"audio",
(
lfc(
"faker.file_name",
extension="mp3",
),
lf("uncategorized_file_content"),
"audio/mpeg",
),
id="audio",
),
],
)
async def test_file_uploading_wrong_content_format(
authorized_client: TestClient,
uncategorized_file_content: bytes,
file_upload_storage_token: str,
file_kind: Literal["image", "audio"],
upload: tuple[str, bytes, str],
) -> None:
assert_response(
authorized_client.post(
"/api/protected/storage-service/v2/file-kinds/image/files/",
f"/api/protected/storage-service/v2/file-kinds/{file_kind}/files/",
headers={"X-Storage-Token": file_upload_storage_token},
files={
"upload": (
faker.file_name(extension="webp"),
uncategorized_file_content,
"image/webp",
)
},
files={"upload": upload},
),
expected_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
expected_json={"detail": "Invalid file format"},
Expand Down
Empty file.
Loading
Loading