diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..2f54cd4 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,43 @@ +name: Backend Tests + +on: + pull_request: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install -r backend/requirements.txt + + - name: Run Schema Migrations and Tests + env: + ENV_FILE: ".env" + PYTHONPATH: "backend" + DATABASE_URL: ${{ secrets.TEST_DATABASE_URL }} + SUPABASE_URL: ${{ secrets.TEST_SUPABASE_URL }} + SUPABASE_KEY: ${{ secrets.TEST_SUPABASE_KEY }} + SUPABASE_JWT_SECRET: ${{ secrets.TEST_SUPABASE_JWT_SECRET }} + # Default mock values for other keys + HF_API_URL: "https://router.huggingface.co/hf-inference/models/sentence-transformers/all-MiniLM-L6-v2/pipeline/feature-extraction" + HF_API_KEY: "mock" + LLM_API_URL: "https://api.groq.com/openai/v1/chat/completions" + LLM_API_KEY: "mock" + LLM_MODEL: "mock" + run: | + cd backend + export PYTHONPATH=$PYTHONPATH:. + alembic upgrade head + pytest app/tests/ diff --git a/backend/.gitignore b/backend/.gitignore index 1541da5..cf50bd5 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -1,2 +1,8 @@ __pycache__/ -.env \ No newline at end of file +.env +.env.dev +.venv/ +venv/ +.pytest_cache/ +*.pyc +.DS_Store diff --git a/backend/Dockerfile b/backend/Dockerfile index 0ea938d..88994d2 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -15,8 +15,10 @@ COPY requirements.txt . RUN pip install --no-cache-dir --upgrade pip \ && pip install --no-cache-dir -r requirements.txt -COPY . . +# Copy start script +COPY start.sh . +RUN chmod +x start.sh EXPOSE 8000 -CMD uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-8000} --proxy-headers +CMD ["./start.sh"] diff --git a/backend/alembic.ini b/backend/alembic.ini new file mode 100644 index 0000000..807ded2 --- /dev/null +++ b/backend/alembic.ini @@ -0,0 +1,149 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts. +# this is typically a path given in POSIX (e.g. forward slashes) +# format, relative to the token %(here)s which refers to the location of this +# ini file +script_location = %(here)s/alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s +# Or organize into date-based subdirectories (requires recursive_version_locations = true) +# file_template = %%(year)d/%%(month).2d/%%(day).2d_%%(hour).2d%%(minute).2d_%%(second).2d_%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. for multiple paths, the path separator +# is defined by "path_separator" below. +prepend_sys_path = . + + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the tzdata library which can be installed by adding +# `alembic[tz]` to the pip requirements. +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to /versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "path_separator" +# below. +# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions + +# path_separator; This indicates what character is used to split lists of file +# paths, including version_locations and prepend_sys_path within configparser +# files such as alembic.ini. +# The default rendered in new alembic.ini files is "os", which uses os.pathsep +# to provide os-dependent path splitting. +# +# Note that in order to support legacy alembic.ini files, this default does NOT +# take place if path_separator is not present in alembic.ini. If this +# option is omitted entirely, fallback logic is as follows: +# +# 1. Parsing of the version_locations option falls back to using the legacy +# "version_path_separator" key, which if absent then falls back to the legacy +# behavior of splitting on spaces and/or commas. +# 2. Parsing of the prepend_sys_path option falls back to the legacy +# behavior of splitting on spaces, commas, or colons. +# +# Valid values for path_separator are: +# +# path_separator = : +# path_separator = ; +# path_separator = space +# path_separator = newline +# +# Use os.pathsep. Default configuration used for new projects. +path_separator = os + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# database URL. This is consumed by the user-maintained env.py script only. +# other means of configuring database URLs may be customized within the env.py +# file. +sqlalchemy.url = driver://user:pass@localhost/dbname + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module +# hooks = ruff +# ruff.type = module +# ruff.module = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Alternatively, use the exec runner to execute a binary found on your PATH +# hooks = ruff +# ruff.type = exec +# ruff.executable = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Logging configuration. This is also consumed by the user-maintained +# env.py script only. +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARNING +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/backend/alembic/README b/backend/alembic/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/backend/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/backend/alembic/env.py b/backend/alembic/env.py new file mode 100644 index 0000000..45da8f3 --- /dev/null +++ b/backend/alembic/env.py @@ -0,0 +1,58 @@ +import os +import sys +from logging.config import fileConfig + +from sqlalchemy import engine_from_config +from sqlalchemy import pool + +from alembic import context + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from app.core.config import settings +from app.db.database import Base +import app.models + +config = context.config + +config.set_main_option("sqlalchemy.url", settings.DATABASE_URL) + +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +target_metadata = Base.metadata + + +def run_migrations_offline() -> None: + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/backend/alembic/script.py.mako b/backend/alembic/script.py.mako new file mode 100644 index 0000000..1101630 --- /dev/null +++ b/backend/alembic/script.py.mako @@ -0,0 +1,28 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + """Upgrade schema.""" + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + """Downgrade schema.""" + ${downgrades if downgrades else "pass"} diff --git a/backend/alembic/versions/af24fc97a432_initial_migration.py b/backend/alembic/versions/af24fc97a432_initial_migration.py new file mode 100644 index 0000000..043890f --- /dev/null +++ b/backend/alembic/versions/af24fc97a432_initial_migration.py @@ -0,0 +1,79 @@ +"""Initial migration + +Revision ID: af24fc97a432 +Revises: +Create Date: 2026-03-19 10:47:49.482533 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +import pgvector.sqlalchemy + + +# revision identifiers, used by Alembic. +revision: str = 'af24fc97a432' +down_revision: Union[str, Sequence[str], None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.execute('CREATE EXTENSION IF NOT EXISTS vector') + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('users', + sa.Column('id', sa.UUID(), nullable=False), + sa.Column('supabase_id', sa.String(), nullable=True), + sa.Column('name', sa.String(), nullable=False), + sa.Column('email', sa.String(), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('email'), + sa.UniqueConstraint('id') + ) + op.create_index(op.f('ix_users_supabase_id'), 'users', ['supabase_id'], unique=True) + op.create_table('documents', + sa.Column('id', sa.UUID(), nullable=False), + sa.Column('user_id', sa.UUID(), nullable=False), + sa.Column('filename', sa.String(), nullable=False), + sa.Column('file_url', sa.String(), nullable=False), + sa.Column('pages', sa.Integer(), nullable=True), + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False), + sa.Column('status', sa.Enum('processing', 'ready', 'failed', name='documentstatus'), nullable=False), + sa.ForeignKeyConstraint(['user_id'], ['users.id'], ), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('id') + ) + op.create_index('ix_documents_user_id', 'documents', ['user_id'], unique=False) + op.create_table('chunks', + sa.Column('id', sa.UUID(), nullable=False), + sa.Column('document_id', sa.UUID(), nullable=False), + sa.Column('chunk_index', sa.Integer(), nullable=False), + sa.Column('text', sa.Text(), nullable=False), + sa.ForeignKeyConstraint(['document_id'], ['documents.id'], ), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('id') + ) + op.create_index('ix_chunks_document_id', 'chunks', ['document_id'], unique=False) + op.create_table('embeddings', + sa.Column('chunk_id', sa.UUID(), nullable=False), + sa.Column('vector', pgvector.sqlalchemy.vector.VECTOR(dim=384), nullable=False), + sa.ForeignKeyConstraint(['chunk_id'], ['chunks.id'], ), + sa.PrimaryKeyConstraint('chunk_id') + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('embeddings') + op.drop_index('ix_chunks_document_id', table_name='chunks') + op.drop_table('chunks') + op.drop_index('ix_documents_user_id', table_name='documents') + op.drop_table('documents') + op.drop_index(op.f('ix_users_supabase_id'), table_name='users') + op.drop_table('users') + # ### end Alembic commands ### diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 8f35890..d761034 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -1,3 +1,4 @@ +import os from pydantic_settings import BaseSettings, SettingsConfigDict class Settings(BaseSettings): @@ -17,9 +18,10 @@ class Settings(BaseSettings): ALLOWED_ORIGINS: str = "http://localhost:5173,http://localhost:5174" model_config = SettingsConfigDict( - env_file=".env", + env_file=os.getenv("ENV_FILE", ".env"), env_file_encoding='utf-8', - case_sensitive=True + case_sensitive=True, + extra='ignore' ) settings = Settings() diff --git a/backend/app/db/init_db.py b/backend/app/db/init_db.py deleted file mode 100644 index 21f76d4..0000000 --- a/backend/app/db/init_db.py +++ /dev/null @@ -1,15 +0,0 @@ -from app.db.database import Base, engine -from app.models.user import User -from app.models.chunk import Chunk -from app.models.document import Document -from app.models.embedding import Embedding - - -def init_db(): - print("Creating Tables...") - Base.metadata.create_all(bind=engine) - print("Database successfully initialized.") - - -if __name__ == "__main__": - init_db() diff --git a/backend/app/tests/conftest.py b/backend/app/tests/conftest.py new file mode 100644 index 0000000..2a1cc50 --- /dev/null +++ b/backend/app/tests/conftest.py @@ -0,0 +1,36 @@ +import pytest +from fastapi.testclient import TestClient +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from app.main import app +from app.core.config import settings +from app.db.database import get_db + +# The settings.DATABASE_URL will be populated by `.env.dev` natively if +# ENV_FILE is specified during test run, or standard `.env` otherwise. +engine = create_engine(settings.DATABASE_URL) +TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +def override_get_db(): + try: + db = TestingSessionLocal() + yield db + finally: + db.close() + +app.dependency_overrides[get_db] = override_get_db + +@pytest.fixture(scope="module") +def client(): + with TestClient(app) as c: + yield c + +@pytest.fixture(scope="module") +def db_session(): + """Provides a database session for testing.""" + session = TestingSessionLocal() + try: + yield session + finally: + session.close() diff --git a/backend/app/tests/test_main.py b/backend/app/tests/test_main.py new file mode 100644 index 0000000..23ba6dc --- /dev/null +++ b/backend/app/tests/test_main.py @@ -0,0 +1,12 @@ +from fastapi.testclient import TestClient +from app.main import app + +client = TestClient(app) + +def test_health_check(): + """ + Test the health check endpoint to ensure the API is running. + """ + response = client.get("/") + assert response.status_code == 200 + assert response.json() == {"message": "Easy study running"} diff --git a/backend/requirements.txt b/backend/requirements.txt index 1dfe14a..6c1bb2e 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,7 +1,9 @@ +alembic==1.18.4 annotated-doc==0.0.4 annotated-types==0.7.0 anyio==4.12.1 bcrypt==5.0.0 +cachetools==6.2.6 certifi==2026.1.4 cffi==2.0.0 charset-normalizer==3.4.4 @@ -11,6 +13,7 @@ cryptography==46.0.4 deprecation==2.1.0 ecdsa==0.19.1 fastapi==0.128.0 +fsspec==2026.2.0 google-auth==2.48.0 google-auth-oauthlib==1.2.4 greenlet==3.3.1 @@ -21,7 +24,13 @@ httpcore==1.0.9 httpx==0.28.1 hyperframe==6.1.0 idna==3.11 +iniconfig==2.3.0 lxml==6.0.2 +Mako==1.3.10 +markdown-it-py==4.0.0 +MarkupSafe==3.0.3 +mdurl==0.1.2 +mmh3==5.2.0 multidict==6.7.1 numpy==2.4.2 oauthlib==3.3.1 @@ -29,6 +38,7 @@ packaging==26.0 passlib==1.7.4 pgvector==0.4.2 pillow==12.1.0 +pluggy==1.6.0 postgrest==2.27.3 propcache==0.4.1 psycopg2-binary==2.9.11 @@ -38,9 +48,15 @@ pycparser==3.0 pydantic==2.12.5 pydantic-settings==2.12.0 pydantic_core==2.41.5 +Pygments==2.19.2 +pyiceberg==0.10.0 PyJWT==2.11.0 PyMuPDF==1.26.7 +pyparsing==3.3.2 pypdf==6.7.0 +pyroaring==1.0.3 +pytest==9.0.2 +python-dateutil==2.9.0.post0 python-dotenv==1.2.1 python-jose==3.5.0 python-multipart==0.0.22 @@ -48,15 +64,20 @@ python-pptx==1.0.2 realtime==2.27.3 requests==2.32.5 requests-oauthlib==2.0.0 +rich==14.3.2 rsa==4.9.1 setuptools==82.0.0 six==1.17.0 +sortedcontainers==2.4.0 SQLAlchemy==2.0.46 starlette==0.50.0 +storage3==2.27.3 StrEnum==0.4.15 +strictyaml==1.7.3 supabase==2.27.3 supabase-auth==2.27.3 supabase-functions==2.27.3 +tenacity==9.1.4 typing-inspection==0.4.2 typing_extensions==4.15.0 urllib3==2.6.3 diff --git a/backend/start.sh b/backend/start.sh new file mode 100644 index 0000000..7be84f9 --- /dev/null +++ b/backend/start.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Run migrations +echo "Running database migrations..." +alembic upgrade head + +# Start the application +echo "Starting FastAPI server..." +exec uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-8000} --proxy-headers