Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dependencies = [
"ffmpeg>=1.4",
"flake8>=7.3.0",
"flask>=3.1.2",
"flask-restful>=0.3.10",
"openai-whisper>=20250625",
"pandas>=2.3.3",
"pytest==8.4.2",
Expand Down
26 changes: 26 additions & 0 deletions src/app/convertor/routes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from flask import Blueprint
from flask_restful import Api, Resource, request

from app.convertor.service import FileService

upload_bp = Blueprint("convertor", __name__, url_prefix="/convertor")
api = Api(upload_bp)


class FileUploadResource(Resource):
def post(self):
allowed_extensions = ["mp3", "wav", "ogg"]

language = request.form.get("language")
model = request.form.get("model")
file_service = FileService(
allowed_extensions=allowed_extensions,
max_size_mb=1000,
language=language,
model=model,
)

return file_service.convert(request.files.get("file"))


api.add_resource(FileUploadResource, "/upload/")
63 changes: 63 additions & 0 deletions src/app/convertor/service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import os

from app.convertor.transcription import Transcription


class FileService:
def __init__(self, allowed_extensions, max_size_mb, model, language):
self.allowed_extensions = allowed_extensions
self.max_size_mb = max_size_mb
self.max_size_bytes = max_size_mb * 1024 * 1024
self.model = model
self.language = language

def _allowed_extension(self, filename):
ext = os.path.splitext(filename)[1].lower().replace(".", "")
return ext in self.allowed_extensions

def _allowed_size(self, file):
file.seek(0, os.SEEK_END)
size = file.tell()
file.seek(0)
return size <= self.max_size_bytes

def validate(self, file):
if not file:
return {"error": "Missing file"}, 400

# Controllo filename
if not file.filename:
return {"error": "Invalid filename"}, 400

# Controllo estensione
if not self._allowed_extension(file.filename):
return {"error": "File extension not allowed"}, 400

# Controllo MIME dichiarato
if not file.mimetype.startswith("audio/"):
return {"error": "File is not an audio type"}, 400

# Controllo dimensione
if not self._allowed_size(file):
return {"error": f"File exceeds {self.max_size_mb} MB"}, 400

return None

def convert(self, file):
error = self.validate(file)

if error:
return error

try:
transcription = Transcription(
file_storage=file, model_id=self.model, language=self.language
)
text = transcription.get_transcription()
return {
"message": "File converted successfully",
"transcription": text,
}, 200

except Exception as e:
return {"error": str(e)}, 400
Empty file.
Empty file.
30 changes: 0 additions & 30 deletions src/app/convertor/service/convertor_service.py

This file was deleted.

Empty file.
Binary file not shown.
135 changes: 0 additions & 135 deletions src/app/convertor/service/transcription.py

This file was deleted.

96 changes: 96 additions & 0 deletions src/app/convertor/transcription.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import os
import tempfile

import whisper


class Transcription:
whisper_allowed_extensions = [
"flac",
"m4a",
"mp3",
"mp4",
"mpeg",
"mpga",
"oga",
"ogg",
"wav",
"webm",
]

whisper_model_ids = ["tiny", "base", "small", "medium", "large", "turbo"]

def __init__(
self,
file_storage,
model_id="tiny",
show_text=False,
language="english",
test_mode=False,
text_preview_size=None,
output_file_name="",
):
self.file_storage = file_storage
self.model_id = model_id
self.show_text = show_text
self.language = language
self.test_mode = test_mode
self.text_preview_size = text_preview_size

self.output_file_name = output_file_name

print(self.model_id, self.language)

check, ext = self._check_file_extension()
if not check:
raise ValueError(f"Invalid file format: .{ext}")

if not self._check_whisper_model_id():
raise ValueError(f"Invalid model ID selection: {self.model_id}")

def _check_file_extension(self):
filename = self.file_storage.filename
ext = filename.rsplit(".", 1)[-1].lower()

if ext in self.whisper_allowed_extensions:
return True, ext
else:
return False, ext

def _check_whisper_model_id(self):
if self.model_id in self.whisper_model_ids:
return True
else:
return False

def _get_model(self):
return whisper.load_model(self.model_id)

def _save_transcription(self, text, output_file_name=""):
file_name = self.output_file_name if not output_file_name else output_file_name
with open(f"{file_name}.txt", "w", encoding="utf-8") as f:
f.write(text)

def get_transcription(self):
model = self._get_model()

# Salvataggio temporaneo
with tempfile.NamedTemporaryFile(delete=False, suffix=".tmp") as tmp:
self.file_storage.save(tmp.name)
temp_path = tmp.name

try:
result = model.transcribe(temp_path, fp16=False)

if self.show_text:
if self.text_preview_size:
print(result["text"][: self.text_preview_size])
else:
print(result["text"])
if self.test_mode:
self._save_transcription(result["text"])

return result["text"]

finally:
os.remove(temp_path)
Loading
Loading