MRD2F · MRD2F · Nov 26, 2025 · Nov 23, 2025 · Nov 24, 2025 · Nov 24, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,39 +1,50 @@
 # This workflow will install Python dependencies, run tests and lint with a single version of Python
 # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 
-name: Python application
+name: CI Pipeline
 
 on:
   push:
-    branches: [ "main" ]
+    branches: [ "main", "dev", "test"]
   pull_request:
-    branches: [ "main" ]
+    branches: [ "main", "dev" ]
 
 permissions:
   contents: read
 
 jobs:
   build:
-
+    name: Run Pytest and formating
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python 3.10
-      uses: actions/setup-python@v3
-      with:
-        python-version: "3.10"
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install flake8 pytest
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-    - name: Lint with flake8
-      run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
-      run: |
-        pytest
+      - name: Checkout repository
+        uses: actions/checkout@v5
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Run a one-line script
+        run: echo Hello, world!
+
+      - name: Install uv
+        run: pip install uv
+
+      - name: Install project dependencies
+        run: uv sync 
+
+      - name: Install additional packages
+        run: uv add --dev ruff 
+          #if not [ -f pyproject.toml ] or [ -f uv.lock ]  ; then pip install -r requirements.txt; fi
+
+      - name: Run pytest
+        run: |
+          PYTHONPATH=. uv run pytest --maxfail=1 --disable-warnings -q
+
+      - name: Lint with ruffW
+        run: uv run ruff check --output-format=github . --exclude tests
+
+      - name: Check formatting with black
+        run: uv run black --check . --diff
diff --git a/README.md b/README.md
@@ -1 +1,117 @@
-# Talk2Text
+# 🎧 Audio Transcription Service
+
+A Python application that provides audio transcription using Whisper and
+OpenAI models.\
+The project is managed with **uv** (Ultrafast Python Package Manager).
+
+------------------------------------------------------------------------
+
+## 🚀 1. Installation
+
+### **1.1. Clone the repository**
+
+``` bash
+git clone https://github.com/yourusername/yourrepo.git
+cd yourrepo
+```
+
+### **1.2. Install uv (if not already installed)**
+
+``` bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+---or via pip---
+
+``` bash
+pip install uv
+```
+
+### **1.3. Create and sync the environment**
+
+uv uses `pyproject.toml` + `uv.lock` to reproduce the environment:
+
+``` bash
+uv sync
+```
+
+This automatically:
+
+-   Creates a virtual environment (`.venv`)
+-   Installs all dependencies
+-   Pins exact versions from `uv.lock`
+
+------------------------------------------------------------------------
+
+## 🎤 2. Project Structure (relevant section)
+
+    src/
+     └── app/
+         └── convertor/
+             └── service/
+                 └── transcription_service.py
+    data/
+     └── inputs/
+         └── file.ogg
+
+------------------------------------------------------------------------
+
+## 🏃 3. Running the Transcription Script
+
+Run from the **root directory**:
+
+``` bash
+uv run src/app/convertor/service/transcription_service.py   
+```
+
+### Important
+
+Running from the project root ensures that relative paths like
+`data/inputs/...` resolve correctly.
+
+
+## 🧪 5. Running Tests (if applicable)
+
+``` bash
+uv run pytest
+```
+
+------------------------------------------------------------------------
+
+## 🛠 6. Updating Dependencies
+
+### Add a new package
+
+``` bash
+uv add <package-name>
+```
+
+### Upgrade all dependencies
+
+``` bash
+uv lock --upgrade
+uv sync
+```
+
+------------------------------------------------------------------------
+
+## ❗ Troubleshooting
+
+### **FileNotFoundError for audio inputs**
+
+Ensure the script is always run from the **project root**.
+
+Correct:
+
+``` bash
+uv run src/app/convertor/service/transcription_service.py
+```
+
+Incorrect:
+
+``` bash
+cd src/app/convertor/service/
+uv run transcription_service.py   # ❌ breaks relative paths
+```
+
+------------------------------------------------------------------------
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,8 +5,13 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
+    "black>=25.11.0",
     "ffmpeg>=1.4",
+    "flake8>=7.3.0",
+    "flask>=3.1.2",
     "openai-whisper>=20250625",
     "pandas>=2.3.3",
+    "pytest==8.4.2",
+    "ruff>=0.14.6",
     "torch>=2.9.1",
 ]
diff --git a/requirements.txt b/requirements.txt
@@ -1,19 +1,23 @@
 Package                  Version
 ------------------------ -----------
 asttokens                3.0.1
+blinker                  1.9.0
 certifi                  2025.11.12
 charset-normalizer       3.4.4
+click                    8.3.1
 comm                     0.2.3
 debugpy                  1.8.17
 decorator                5.2.1
 executing                2.2.1
 ffmpeg                   1.4
 filelock                 3.20.0
+flask                    3.1.2
 fsspec                   2025.10.0
 idna                     3.11
 ipykernel                7.1.0
 ipython                  9.7.0
 ipython-pygments-lexers  1.1.1
+itsdangerous             2.2.0
 jedi                     0.19.2
 jinja2                   3.1.6
 jupyter-client           8.6.3
@@ -72,3 +76,4 @@ typing-extensions        4.15.0
 tzdata                   2025.2
 urllib3                  2.5.0
 wcwidth                  0.2.14
+werkzeug                 3.1.3
diff --git a/data/__init__.py → src/app/convertor/service/__init__.py b/data/__init__.py → src/app/convertor/service/__init__.py
diff --git a/src/models/__init__.py → ...convertor/service/convertor_controller.py b/src/models/__init__.py → ...convertor/service/convertor_controller.py
diff --git a/src/app/convertor/service/convertor_service.py b/src/app/convertor/service/convertor_service.py
@@ -0,0 +1,22 @@
+from convertor.service.transcription import Transcription
+
+
+class ConvertorService:
+
+    @classmethod
+    def create_text(cls):
+        # data_dir = "data"
+        input_file_name = "./convertor/service/data/inputs/5846093734223028963.ogg"
+        # output_file_name = "./data/outputs/5846093734223028963"
+        model_id = "tiny"
+        show_text = True
+        text_preview_size = 10
+
+        transcription = Transcription(
+            model_id=model_id,
+            input_file_name=input_file_name,
+            show_text=show_text,
+            text_preview_size=text_preview_size,
+        )
+
+        return transcription.get_transcription()
diff --git a/src/models/convertor.py → src/app/convertor/service/data/__init__.py b/src/models/convertor.py → src/app/convertor/service/data/__init__.py
diff --git a/src/app/convertor/service/data/inputs/5846093734223028963.ogg b/src/app/convertor/service/data/inputs/5846093734223028963.ogg
diff --git a/src/app/convertor/service/data/inputs/5846093734223028963.ogx b/src/app/convertor/service/data/inputs/5846093734223028963.ogx
diff --git a/src/app/convertor/service/transcription.py b/src/app/convertor/service/transcription.py
@@ -0,0 +1,137 @@
+import whisper
+import os
+
+
+class Transcription:
+    def __init__(
+        self,
+        model_id="tiny",
+        input_file_name=".ogg",
+        show_text=False,
+        output_file_name="",
+        text_preview_size=None,
+        language="english",
+    ):
+        self.model_id = model_id
+        self.input_file_name = input_file_name  # self.load_file(input_file_name)
+        self.show_text = show_text
+        self.output_file_name = output_file_name
+        self.text_preview_size = text_preview_size
+        self.language = language
+
+        ########## Sanity checks for whisper model use #########
+        self.whisper_allowed_extensions = [
+            "flac",
+            "m4a",
+            "mp3",
+            "mp4",
+            "mpeg",
+            "mpga",
+            "oga",
+            "ogg",
+            "wav",
+            "webm",
+        ]
+        self.whisper_model_ids = ["tiny", "base", "small", "medium", "large", "turbo"]
+
+        # The default setting (which selects the turbo model) works well for transcribing English.
+        # However, the turbo model is not trained for translation tasks.
+        # If you need to translate non-English speech into English, use one of the
+        # multilingual models (tiny, base, small, medium, large) instead of turbo.
+
+        self.whisper_model_ids_english_only = [
+            "tiny.en",
+            "base.en",
+            "small.en",
+            "medium.en",
+        ]
+        self._check_file_extension()
+        self._check_whisper_model_id()
+
+    @staticmethod
+    def load_file(input_file_name):
+        if not os.path.exists(input_file_name):
+            raise FileNotFoundError(f"File: {input_file_name} does not exist")
+        return input_file_name
+
+    def _check_file_extension(self):
+        ext = self.input_file_name.rsplit(".", 1)[-1].lower()
+
+        if ext not in self.whisper_allowed_extensions:
+            allowed = ", ".join(self.whisper_allowed_extensions)
+            raise ValueError(
+                f"Invalid file format: .{ext}\n" f"Allowed formats are: {allowed}"
+            )
+        return True
+
+    def _check_whisper_model_id(self):
+        if self.model_id not in self.whisper_model_ids:
+            allowed = ", ".join(self.whisper_model_ids)
+            raise ValueError(
+                f"Invalid model ID selection: {self.model_id}\n"
+                f"Allowed formats are: {allowed}"
+            )
+        return True
+
+    def _get_model(self):
+        model = whisper.load_model(self.model_id)
+        return model
+
+    def save_transcription(self, text, output_file_name=""):
+        file_name = self.output_file_name if not output_file_name else output_file_name
+        with open(f"{file_name}.txt", "w", encoding="utf-8") as f:
+            f.write(text)
+
+    def get_transcription(self):
+        model = self._get_model()
+        print(f"Using as requested model {self.model_id}.")
+        print(
+            f"Transcribing file {self.input_file_name}... this may take a few minutes depening of the file size."
+        )
+        # load audio and pad/trim it to fit 30 seconds
+        # audio = whisper.load_audio("audio.mp3")
+        # audio = whisper.pad_or_trim(audio)
+
+        # detect the spoken language
+        # _, probs = model.detect_language(mel)
+        # print(f"Detected language: {max(probs, key=probs.get)}")
+
+        result = model.transcribe(
+            self.input_file_name,
+        )
+
+        if self.show_text:
+            if self.text_preview_size:
+                print(result["text"][: self.text_preview_size])
+            else:
+                print(result["text"])
+
+        if (len(self.output_file_name) > 0) or (len(self.output_file_name) > 0):
+            file_name = (
+                self.output_file_name if not output_file_name else output_file_name
+            )
+            self.save_transcription(result["text"], file_name)
+            print(f"Saved transcription as: {file_name}.")
+
+        return result["text"]
+
+
+if __name__ == "__main__":
+
+    data_dir = "data"
+    input_file_name = "./src/app/convertor/service/data/inputs/5846093734223028963.ogg"
+    output_file_name = "./src/app/convertor/service/data/inputs/5846093734223028963.ogx"
+
+    model_id = "tiny"
+    show_text = True
+    text_preview_size = 10
+
+    transcription_service = Transcription(
+        model_id=model_id,
+        input_file_name=input_file_name,
+        show_text=show_text,
+        output_file_name=output_file_name,
+        text_preview_size=text_preview_size,
+    )
+
+    transcription_service.get_transcription()