diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index b77d9cf..fa8990b 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -1,61 +1,60 @@ -name: CI / CD for Zilean +name: CI/CD on: push: - tags: - - 'v[0-9]+.[0-9]+.[0-9]+' + branches: [main] + tags: ['v[0-9]+.[0-9]+.[0-9]+'] + pull_request: + branches: [main] workflow_dispatch: env: - IMAGE_NAME: ipromknight/zilean + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} jobs: - execution: + build-and-test: runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-dotnet@v4 + with: + dotnet-version: '9.0.x' + - run: dotnet restore + - run: dotnet build --no-restore -c Release + + docker: + needs: build-and-test + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/v') permissions: contents: read - name: Build Zilean Image + packages: write steps: - - name: Checkout code - uses: actions/checkout@v4.1.2 - - - name: Docker Setup QEMU - uses: docker/setup-qemu-action@v3 - id: qemu + - uses: actions/checkout@v4 + - uses: docker/setup-qemu-action@v3 with: platforms: amd64,arm64 - - - name: Login to Docker Hub - uses: docker/login-action@v3 + - uses: docker/setup-buildx-action@v3 + - uses: docker/login-action@v3 with: - username: ${{ vars.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3.2.0 - - - name: Build Docker Metadata - id: docker-metadata - uses: docker/metadata-action@v5 + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - uses: docker/metadata-action@v5 + id: meta with: - images: ${{ env.IMAGE_NAME }} - flavor: | - latest=auto + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | - type=ref,event=tag - type=sha,commit=${{ github.sha }} type=semver,pattern={{version}} + type=sha type=raw,value=latest,enable={{is_default_branch}} - - - name: Push Service Image to repo - uses: docker/build-push-action@v5 + - uses: docker/build-push-action@v5 with: context: . - file: ./Dockerfile push: true - provenance: mode=max - tags: ${{ steps.docker-metadata.outputs.tags }} - labels: ${{ steps.docker-metadata.outputs.labels }} platforms: linux/amd64,linux/arm64 - cache-from: type=gha,scope=${{ github.workflow }} - cache-to: type=gha,mode=max,scope=${{ github.workflow }} \ No newline at end of file + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/release-please.yaml b/.github/workflows/release-please.yaml index 623e2bc..a6fcf02 100644 --- a/.github/workflows/release-please.yaml +++ b/.github/workflows/release-please.yaml @@ -16,4 +16,4 @@ jobs: steps: - uses: googleapis/release-please-action@v4 with: - token: ${{ secrets.RELEASE_PLEASE_TOKEN }} \ No newline at end of file + token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/Directory.Packages.props b/Directory.Packages.props index 139c77e..ff1df4b 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -21,6 +21,7 @@ + diff --git a/Dockerfile b/Dockerfile index 1e5fbc5..207653a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,9 @@ RUN apk add --update --no-cache \ python3=~3.11 \ py3-pip=~23.1 \ curl \ + git \ icu-libs \ + tzdata \ && ln -sf python3 /usr/bin/python ENV DOTNET_RUNNING_IN_CONTAINER=true ENV DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=false @@ -34,4 +36,7 @@ RUN rm -rf /app/python || true && \ mkdir -p /app/python || true RUN pip3 install -r /app/requirements.txt -t /app/python +HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \ + CMD curl -f http://localhost:8181/healthchecks/ready || exit 1 + ENTRYPOINT ["./zilean-api"] diff --git a/README.md b/README.md index c450830..311a81c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# What is Zilean +# Zilean (Maintained Fork) zilean logo @@ -6,9 +6,189 @@ Zilean is a service that allows you to search for [DebridMediaManager](https://g This can then be configured as a Torznab indexer in your favorite content application. Newly added is the ability for Zilean to scrape from your running Zurg instance, and from other running Zilean instances. -Documentation for zilean can be viewed at [https://ipromknight.github.io/zilean/](https://ipromknight.github.io/zilean/) +This is an actively maintained fork of [iPromKnight/zilean](https://github.com/iPromKnight/zilean) (v3.5.0, last upstream commit May 2025). ---- +Upstream documentation: [https://ipromknight.github.io/zilean/](https://ipromknight.github.io/zilean/) +## Requirements -Buy Me a Coffee at ko-fi.com +Zilean requires only **PostgreSQL 16+**. Elasticsearch is **NOT** required and was removed in v2.0. + +## Docker Image + +``` +ghcr.io/thoroslives/zilean:latest +``` + +## Fork Changes + +All changes beyond upstream v3.5.0: + +### v3.6.0 +- **Flexible database configuration** - supports `Zilean__Database__ConnectionString` env var (backwards compat), individual `POSTGRES_*` env vars, or sensible defaults. Uses `NpgsqlConnectionStringBuilder` for proper escaping of special characters in passwords. +- **Incremental DMM sync** - replaces the 1.2GB zip download with `git clone --depth 1` on first run and `git pull` on subsequent runs. Supports `GITHUB_TOKEN` for authenticated requests (5,000 req/hr vs 60). Includes exponential backoff retry. +- **Logging config preservation** - `logging.json` is only written if it doesn't exist, preserving user customizations across restarts. + +### v3.7.0 +- **Security hardening** - warns at startup if PostgreSQL password is empty or set to default "postgres". Docker-compose example no longer exposes Postgres ports. +- **Database startup resilience** - retries database connection up to 5 times with 5-second delays before running migrations. Clear error messages on failure including host and database name. +- **Filtered search fix** - `/dmm/filtered` with short query strings (e.g., "1923") combined with season/episode filters no longer returns 0 results. Similarity threshold is automatically lowered when structured filters provide precision. +- **Scraping toggle fix** - setting `EnableScraping=false` now correctly hides the on-demand-scrape endpoint while keeping search endpoints functional. +- **Timezone support** - set `TZ` env var (e.g., `TZ=Australia/Sydney`) to display log timestamps in your local timezone. `tzdata` package included in the image. +- **Readiness health check** - new `/healthchecks/ready` endpoint that verifies database connectivity. Used by the Dockerfile HEALTHCHECK for orchestrator integration. +- **HEALTHCHECK instruction** - Docker image includes a built-in health check (30s interval, 60s start period) so orchestrators can detect readiness. +- **Graceful error handling** - database errors no longer kill the process immediately (`Process.Kill()` replaced with proper exception propagation). Search errors are logged instead of silently swallowed. +- **Startup config validation** - validates configuration values (cron syntax, numeric ranges, required fields) at startup with clear error messages. +- **DMM sync progress reporting** - periodic progress logs during sync showing files processed, percentage complete, and new torrents found. +- **ISystemClock deprecation fix** - removed deprecated `ISystemClock` usage in authentication handler. + +## Configuration + +### Database Connection + +Three ways to configure the database connection (checked in this order): + +#### 1. Full Connection String (recommended for existing setups) + +```yaml +environment: + - Zilean__Database__ConnectionString=Host=postgres;Database=zilean;Username=postgres;Password=mypass;Include Error Detail=true;Timeout=30;CommandTimeout=3600; +``` + +#### 2. Individual Environment Variables + +```yaml +environment: + - POSTGRES_HOST=postgres # default: localhost + - POSTGRES_PORT=5432 # default: 5432 + - POSTGRES_DB=zilean # default: zilean + - POSTGRES_USER=postgres # default: postgres + - POSTGRES_PASSWORD=mypass # default: (empty) +``` + +#### 3. Defaults + +If no database env vars are set, connects to `localhost:5432/zilean` as `postgres` with no password (suitable for trust auth). + +### DMM Sync + +Set `GITHUB_TOKEN` to avoid GitHub API rate limiting during DMM hashlist sync: + +```yaml +environment: + - GITHUB_TOKEN=ghp_xxxxxxxxxxxx +``` + +The initial DMM sync is **resumable** - if interrupted, it picks up where it left off on next startup. Expected initial sync duration varies by hardware (typically 30min-2hrs for parsing, longer for IMDB matching). + +### Timezone + +Set the `TZ` environment variable to display log timestamps in your local timezone: + +```yaml +environment: + - TZ=Australia/Sydney +``` + +### PostgreSQL Shared Memory + +PostgreSQL's default shared memory (`shm_size`) of 64MB is too small for Zilean's bulk DMM upserts. You'll get errors like: + +``` +could not resize shared memory segment "/PostgreSQL.xxx" to 67146560 bytes: No space left on device +``` + +Set `shm_size: 256m` on your PostgreSQL container to fix this. See the docker-compose example below. + +## Security + +**Never expose your PostgreSQL port to the internet.** Multiple users have been compromised with crypto miners after exposing Postgres with default credentials. Zilean will warn you at startup if your database password is empty or set to the default "postgres". + +Best practices: +- Always set a strong `POSTGRES_PASSWORD` +- Do NOT add `ports:` to your Postgres container unless you need external access +- If you must expose Postgres, use a firewall to restrict access to trusted IPs +- Use Docker's internal networking - Zilean connects to Postgres by container name + +## Resource Usage + +- **Initial sync:** Expect high CPU for 10-30 minutes during the first DMM sync. This is normal - Zilean is parsing ~1.2M HTML files and performing bulk database upserts. Progress is logged periodically. +- **Subsequent syncs:** Lightweight. Only pulls new/changed files via `git pull` and processes the diff. +- **If high usage persists** after the initial sync completes: check for security compromise (see Security section above). Persistent high CPU with unfamiliar processes is a red flag. +- PostgreSQL requires `shm_size: 256m` for bulk operations (see PostgreSQL Shared Memory section). + +## Multi-Instance Deployment + +For high-availability or high-traffic setups, you can run multiple Zilean instances: + +- **1 scraper instance** (`Zilean__Dmm__EnableScraping=true`) - handles DMM sync and data ingestion +- **N API instances** (`Zilean__Dmm__EnableScraping=false`, `Zilean__Dmm__EnableEndpoint=true`) - serve search queries only +- All instances share the same PostgreSQL database +- `PreventOverlapping("SyncJobs")` prevents concurrent scraping within an instance +- PostgreSQL's default `max_connections=100` is sufficient for typical deployments + +## Health Checks + +- `/healthchecks/ping` - lightweight liveness check (always returns 200) +- `/healthchecks/ready` - readiness check that verifies database connectivity (returns 503 if DB is unreachable) + +## Troubleshooting + +### Database not found / "does not exist" + +Common causes: +- PostgreSQL hasn't finished initializing - Zilean now retries 5 times with 5-second delays +- Wrong credentials - check `POSTGRES_PASSWORD` matches between Zilean and Postgres containers +- Volume permissions - on Unraid/Synology, ensure the Postgres data volume has correct ownership + +### "could not resize shared memory segment" + +Set `shm_size: 256m` on your PostgreSQL container. See the docker-compose example. + +### Search returns 0 results + +- Ensure the initial DMM sync has completed (check logs for "DMM sync complete") +- For filtered searches with short titles, the similarity threshold is automatically adjusted + +## Docker Compose Example + +```yaml +services: + zilean: + image: ghcr.io/thoroslives/zilean:latest + container_name: zilean + restart: unless-stopped + ports: + - "8181:8181" + volumes: + - zilean-data:/app/data + environment: + - POSTGRES_HOST=postgres + - POSTGRES_PASSWORD=your_strong_password_here + - GITHUB_TOKEN=ghp_xxxxxxxxxxxx # optional, recommended + - TZ=UTC # optional, set your timezone + depends_on: + postgres: + condition: service_healthy + + postgres: + image: postgres:16-alpine + container_name: zilean-postgres + restart: unless-stopped + shm_size: 256m # required - default 64m causes "No space left on device" during bulk upserts + # Do NOT expose ports unless you need external access - see Security section + volumes: + - zilean-pg:/var/lib/postgresql/data + environment: + - POSTGRES_DB=zilean + - POSTGRES_PASSWORD=your_strong_password_here + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres -d zilean"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + zilean-data: + zilean-pg: +``` diff --git a/src/Zilean.ApiService/Features/Authentication/ApiKeyAuthenticationHandler.cs b/src/Zilean.ApiService/Features/Authentication/ApiKeyAuthenticationHandler.cs index 3756af8..e661a29 100644 --- a/src/Zilean.ApiService/Features/Authentication/ApiKeyAuthenticationHandler.cs +++ b/src/Zilean.ApiService/Features/Authentication/ApiKeyAuthenticationHandler.cs @@ -4,9 +4,8 @@ public class ApiKeyAuthenticationHandler( IOptionsMonitor options, ILoggerFactory logger, UrlEncoder encoder, - ISystemClock clock, ZileanConfiguration configuration) - : AuthenticationHandler(options, logger, encoder, clock) + : AuthenticationHandler(options, logger, encoder) { protected override Task HandleAuthenticateAsync() { diff --git a/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs b/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs index 803b1c2..213d9e9 100644 --- a/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs +++ b/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs @@ -6,6 +6,9 @@ public class StartupService( IServiceProvider serviceProvider, ILoggerFactory loggerFactory) : IHostedLifecycleService { + private const int MaxRetries = 5; + private static readonly TimeSpan _retryDelay = TimeSpan.FromSeconds(5); + public Task StartAsync(CancellationToken cancellationToken) => Task.CompletedTask; public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask; @@ -13,6 +16,29 @@ public class StartupService( public async Task StartingAsync(CancellationToken cancellationToken) { var logger = loggerFactory.CreateLogger(); + + // Security check — warn about insecure Postgres credentials + if (configuration.Database.HasInsecurePassword()) + { + logger.LogWarning("SECURITY WARNING: PostgreSQL password is empty or set to the default 'postgres'. " + + "This is a security risk — if your database port is exposed, attackers can connect and compromise your system. " + + "Set a strong password via POSTGRES_PASSWORD or Zilean__Database__ConnectionString."); + } + + // Validate configuration before proceeding + var validationErrors = configuration.Validate(); + if (validationErrors.Count > 0) + { + foreach (var error in validationErrors) + { + logger.LogError("Configuration error: {Error}", error); + } + throw new InvalidOperationException($"Zilean configuration is invalid: {string.Join("; ", validationErrors)}"); + } + + // Wait for database with retry + await WaitForDatabaseAsync(logger, cancellationToken); + logger.LogInformation("Applying Migrations..."); await using var asyncScope = serviceProvider.CreateAsyncScope(); var dbContext = asyncScope.ServiceProvider.GetRequiredService(); @@ -20,6 +46,48 @@ public async Task StartingAsync(CancellationToken cancellationToken) logger.LogInformation("Migrations Applied."); } + private async Task WaitForDatabaseAsync(ILogger logger, CancellationToken cancellationToken) + { + var connectionString = configuration.Database.ConnectionString; + + for (var attempt = 1; attempt <= MaxRetries; attempt++) + { + try + { + await using var connection = new Npgsql.NpgsqlConnection(connectionString); + await connection.OpenAsync(cancellationToken); + logger.LogInformation("Database connection established."); + return; + } + catch (Exception ex) when (attempt < MaxRetries) + { + logger.LogWarning("Database connection attempt {Attempt}/{MaxRetries} failed: {Message}. Retrying in {Delay}s...", + attempt, MaxRetries, ex.Message, _retryDelay.TotalSeconds); + await Task.Delay(_retryDelay, cancellationToken); + } + catch (Exception ex) + { + logger.LogError(ex, "Failed to connect to database after {MaxRetries} attempts. " + + "Connection string: Host={Host}, Database={Database}. " + + "Check that PostgreSQL is running, the database exists, and credentials are correct.", + MaxRetries, GetConnectionHost(connectionString), GetConnectionDatabase(connectionString)); + throw; + } + } + } + + private static string GetConnectionHost(string connectionString) + { + try { return new Npgsql.NpgsqlConnectionStringBuilder(connectionString).Host ?? "unknown"; } + catch { return "unknown"; } + } + + private static string GetConnectionDatabase(string connectionString) + { + try { return new Npgsql.NpgsqlConnectionStringBuilder(connectionString).Database ?? "unknown"; } + catch { return "unknown"; } + } + public Task StoppedAsync(CancellationToken cancellationToken) => Task.CompletedTask; public Task StoppingAsync(CancellationToken cancellationToken) => Task.CompletedTask; diff --git a/src/Zilean.ApiService/Features/HealthChecks/HealthCheckEndpoints.cs b/src/Zilean.ApiService/Features/HealthChecks/HealthCheckEndpoints.cs index f9873c3..c2ee051 100644 --- a/src/Zilean.ApiService/Features/HealthChecks/HealthCheckEndpoints.cs +++ b/src/Zilean.ApiService/Features/HealthChecks/HealthCheckEndpoints.cs @@ -4,6 +4,7 @@ public static class HealthCheckEndpoints { private const string GroupName = "healthchecks"; private const string Ping = "/ping"; + private const string Ready = "/ready"; public static WebApplication MapHealthCheckEndpoints(this WebApplication app) { @@ -19,9 +20,30 @@ public static WebApplication MapHealthCheckEndpoints(this WebApplication app) private static RouteGroupBuilder HealthChecks(this RouteGroupBuilder group) { group.MapGet(Ping, RespondPong); + group.MapGet(Ready, CheckReadiness); return group; } private static string RespondPong(HttpContext context) => $"[{DateTime.UtcNow.ToString(CultureInfo.InvariantCulture)}]: Pong!"; + + private static async Task CheckReadiness(ZileanConfiguration configuration, ILogger logger) + { + try + { + await using var connection = new Npgsql.NpgsqlConnection(configuration.Database.ConnectionString); + await connection.OpenAsync(); + await using var cmd = connection.CreateCommand(); + cmd.CommandText = "SELECT 1"; + await cmd.ExecuteScalarAsync(); + return Results.Ok(new { status = "healthy", timestamp = DateTime.UtcNow }); + } + catch (Exception ex) + { + logger.LogWarning(ex, "Readiness check failed — database is not reachable"); + return Results.Json(new { status = "unhealthy", error = ex.Message, timestamp = DateTime.UtcNow }, statusCode: 503); + } + } + + private abstract class ReadinessCheck; } diff --git a/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs b/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs index 6dcdf24..1b2dd80 100644 --- a/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs +++ b/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs @@ -13,14 +13,14 @@ public static WebApplication MapDmmEndpoints(this WebApplication app, ZileanConf { app.MapGroup(GroupName) .WithTags(GroupName) - .Dmm() + .Dmm(configuration) .DisableAntiforgery(); } return app; } - private static RouteGroupBuilder Dmm(this RouteGroupBuilder group) + private static RouteGroupBuilder Dmm(this RouteGroupBuilder group, ZileanConfiguration configuration) { group.MapPost(Search, PerformSearch) .Produces() @@ -30,9 +30,12 @@ private static RouteGroupBuilder Dmm(this RouteGroupBuilder group) .Produces() .AllowAnonymous(); - group.MapGet(Ingest, PerformOnDemandScrape) - .RequireAuthorization(ApiKeyAuthentication.Policy) - .WithMetadata(new OpenApiSecurityMetadata(ApiKeyAuthentication.Scheme)); + if (configuration.Dmm.EnableScraping) + { + group.MapGet(Ingest, PerformOnDemandScrape) + .RequireAuthorization(ApiKeyAuthentication.Policy) + .WithMetadata(new OpenApiSecurityMetadata(ApiKeyAuthentication.Scheme)); + } return group; } @@ -88,15 +91,15 @@ private static async Task> PerformSearch(HttpContext context, ? TypedResults.Ok(Array.Empty()) : TypedResults.Ok(results); } - catch + catch (Exception ex) { + logger.LogError(ex, "Unfiltered search failed for query: {QueryText}", queryRequest.QueryText); return TypedResults.Ok(Array.Empty()); } } private static async Task> PerformFilteredSearch(HttpContext context, ITorrentInfoService torrentInfoService, ZileanConfiguration configuration, ILogger logger, [AsParameters] SearchFilteredRequest request) { - try { logger.LogInformation("Performing filtered search for {@Request}", request); @@ -119,8 +122,10 @@ private static async Task> PerformFilteredSearch(HttpContext c ? TypedResults.Ok(Array.Empty()) : TypedResults.Ok(results); } - catch + catch (Exception ex) { + logger.LogError(ex, "Filtered search failed for query: {Query}, Season: {Season}, Episode: {Episode}", + request.Query, request.Season, request.Episode); return TypedResults.Ok(Array.Empty()); } } diff --git a/src/Zilean.Database/Functions/SearchTorrentsMetaV6.cs b/src/Zilean.Database/Functions/SearchTorrentsMetaV6.cs new file mode 100644 index 0000000..df3704e --- /dev/null +++ b/src/Zilean.Database/Functions/SearchTorrentsMetaV6.cs @@ -0,0 +1,178 @@ +namespace Zilean.Database.Functions; + +public class SearchTorrentsMetaV6 +{ + internal const string Create = + """ + CREATE OR REPLACE FUNCTION search_torrents_meta( + query TEXT DEFAULT NULL, + season INT DEFAULT NULL, + episode INT DEFAULT NULL, + year INT DEFAULT NULL, + language TEXT DEFAULT NULL, + resolution TEXT DEFAULT NULL, + imdbId TEXT DEFAULT NULL, + limit_param INT DEFAULT 20, + category TEXT DEFAULT NULL, + similarity_threshold REAL DEFAULT 0.85 + ) + RETURNS TABLE( + "InfoHash" TEXT, + "Resolution" TEXT, + "Year" INT, + "Remastered" BOOLEAN, + "Codec" TEXT, + "Audio" TEXT[], + "Quality" TEXT, + "Episodes" INT[], + "Seasons" INT[], + "Languages" TEXT[], + "ParsedTitle" TEXT, + "NormalizedTitle" TEXT, + "RawTitle" TEXT, + "Size" TEXT, + "Category" TEXT, + "Complete" BOOLEAN, + "Volumes" INT[], + "Hdr" TEXT[], + "Channels" TEXT[], + "Dubbed" BOOLEAN, + "Subbed" BOOLEAN, + "Edition" TEXT, + "BitDepth" TEXT, + "Bitrate" TEXT, + "Network" TEXT, + "Extended" BOOLEAN, + "Converted" BOOLEAN, + "Hardcoded" BOOLEAN, + "Region" TEXT, + "Ppv" BOOLEAN, + "Is3d" BOOLEAN, + "Site" TEXT, + "Proper" BOOLEAN, + "Repack" BOOLEAN, + "Retail" BOOLEAN, + "Upscaled" BOOLEAN, + "Unrated" BOOLEAN, + "Documentary" BOOLEAN, + "EpisodeCode" TEXT, + "Country" TEXT, + "Container" TEXT, + "Extension" TEXT, + "Torrent" BOOLEAN, + "Score" REAL, + "ImdbId" TEXT, + "ImdbCategory" TEXT, + "ImdbTitle" TEXT, + "ImdbYear" INT, + "ImdbAdult" BOOLEAN, + "IngestedAt" TIMESTAMPTZ + ) AS $$ + DECLARE + effective_threshold REAL; + has_filters BOOLEAN; + BEGIN + -- When structured filters are provided (season, episode, year, imdbId), + -- lower the similarity threshold since the filters themselves provide precision. + -- This fixes short query strings (e.g. "1923") returning 0 results when combined + -- with season/episode filters, because trigram similarity is unreliable for short strings. + has_filters := (season IS NOT NULL OR episode IS NOT NULL OR year IS NOT NULL OR imdbId IS NOT NULL); + + IF has_filters AND query IS NOT NULL AND length(query) <= 6 THEN + effective_threshold := similarity_threshold * 0.3; + ELSIF has_filters THEN + effective_threshold := similarity_threshold * 0.5; + ELSE + effective_threshold := similarity_threshold; + END IF; + + EXECUTE format('SET pg_trgm.similarity_threshold = %L', effective_threshold); + + RETURN QUERY + SELECT + t."InfoHash", + t."Resolution", + t."Year", + t."Remastered", + t."Codec", + t."Audio", + t."Quality", + t."Episodes", + t."Seasons", + t."Languages", + t."ParsedTitle", + t."NormalizedTitle", + t."RawTitle", + t."Size", + t."Category", + t."Complete", + t."Volumes", + t."Hdr", + t."Channels", + t."Dubbed", + t."Subbed", + t."Edition", + t."BitDepth", + t."Bitrate", + t."Network", + t."Extended", + t."Converted", + t."Hardcoded", + t."Region", + t."Ppv", + t."Is3d", + t."Site", + t."Proper", + t."Repack", + t."Retail", + t."Upscaled", + t."Unrated", + t."Documentary", + t."EpisodeCode", + t."Country", + t."Container", + t."Extension", + t."Torrent", + similarity(t."CleanedParsedTitle", query) AS "Score", + t."ImdbId", + i."Category" AS "ImdbCategory", + i."Title" AS "ImdbTitle", + i."Year" AS "ImdbYear", + i."Adult" AS "ImdbAdult", + t."IngestedAt" + FROM + public."Torrents" t + LEFT JOIN + public."ImdbFiles" i ON t."ImdbId" = i."ImdbId" + WHERE + Length(t."InfoHash") = 40 + AND + (category IS NULL OR t."Category" = category) + AND + (query IS NULL OR t."CleanedParsedTitle" % query) + AND (imdbId IS NULL OR t."ImdbId" = imdbId) + AND (season IS NULL OR season = ANY(t."Seasons")) + AND ( + (episode IS NULL AND season IS NOT NULL) + OR + ( + episode IS NOT NULL AND + season IS NOT NULL AND + (episode = ANY(t."Episodes") OR t."Episodes" IS NULL OR t."Episodes" = '{}') + ) + OR (season IS NULL AND episode IS NULL) + ) + AND (year IS NULL OR t."Year" BETWEEN year - 1 AND year + 1) + AND (language IS NULL OR language = ANY(t."Languages")) + AND (resolution IS NULL OR resolution = t."Resolution") + ORDER BY + "Score" DESC, + "IngestedAt" DESC + LIMIT + limit_param; + END; + $$ LANGUAGE plpgsql; + """; + + internal const string Remove = "DROP FUNCTION IF EXISTS search_torrents_meta(TEXT, INT, INT, INT, TEXT, TEXT, TEXT, INT, TEXT, REAL);"; +} diff --git a/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.Designer.cs b/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.Designer.cs new file mode 100644 index 0000000..34c4742 --- /dev/null +++ b/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.Designer.cs @@ -0,0 +1,385 @@ +// +using System; +using System.Text.Json; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; +using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata; +using Zilean.Database; + +#nullable disable + +namespace Zilean.Database.Migrations +{ + [DbContext(typeof(ZileanDbContext))] + [Migration("20260325000000_SearchV6FilteredThreshold")] + partial class SearchV6FilteredThreshold + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasAnnotation("ProductVersion", "9.0.0") + .HasAnnotation("Relational:MaxIdentifierLength", 63); + + NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder); + + modelBuilder.Entity("Zilean.Shared.Features.Blacklist.BlacklistedItem", b => + { + b.Property("InfoHash") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "info_hash"); + + b.Property("BlacklistedAt") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasDefaultValueSql("now() at time zone 'utc'") + .HasAnnotation("Relational:JsonPropertyName", "blacklisted_at"); + + b.Property("Reason") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "reason"); + + b.HasKey("InfoHash"); + + b.HasIndex("InfoHash") + .IsUnique(); + + b.ToTable("BlacklistedItems", (string)null); + }); + + modelBuilder.Entity("Zilean.Shared.Features.Dmm.ParsedPages", b => + { + b.Property("Page") + .HasColumnType("text"); + + b.Property("EntryCount") + .HasColumnType("integer"); + + b.HasKey("Page"); + + b.ToTable("ParsedPages", (string)null); + }); + + modelBuilder.Entity("Zilean.Shared.Features.Dmm.TorrentInfo", b => + { + b.Property("InfoHash") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "info_hash"); + + b.PrimitiveCollection("Audio") + .IsRequired() + .HasColumnType("text[]") + .HasAnnotation("Relational:JsonPropertyName", "audio"); + + b.Property("BitDepth") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "bit_depth"); + + b.Property("Bitrate") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "bitrate"); + + b.Property("Category") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "category"); + + b.PrimitiveCollection("Channels") + .IsRequired() + .HasColumnType("text[]") + .HasAnnotation("Relational:JsonPropertyName", "channels"); + + b.Property("CleanedParsedTitle") + .IsRequired() + .ValueGeneratedOnAdd() + .HasColumnType("text") + .HasDefaultValue("") + .HasAnnotation("Relational:JsonPropertyName", "cleaned_parsed_title"); + + b.Property("Codec") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "codec"); + + b.Property("Complete") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "complete"); + + b.Property("Container") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "container"); + + b.Property("Converted") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "converted"); + + b.Property("Country") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "country"); + + b.Property("Date") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "date"); + + b.Property("Documentary") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "documentary"); + + b.Property("Dubbed") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "dubbed"); + + b.Property("Edition") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "edition"); + + b.Property("EpisodeCode") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "episode_code"); + + b.PrimitiveCollection("Episodes") + .IsRequired() + .HasColumnType("integer[]") + .HasAnnotation("Relational:JsonPropertyName", "episodes"); + + b.Property("Extended") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "extended"); + + b.Property("Extension") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "extension"); + + b.Property("Group") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "group"); + + b.Property("Hardcoded") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "hardcoded"); + + b.PrimitiveCollection("Hdr") + .IsRequired() + .HasColumnType("text[]") + .HasAnnotation("Relational:JsonPropertyName", "hdr"); + + b.Property("ImdbId") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "imdb_id"); + + b.Property("IngestedAt") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasDefaultValueSql("now() at time zone 'utc'") + .HasAnnotation("Relational:JsonPropertyName", "ingested_at"); + + b.Property("Is3d") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "_3d"); + + b.Property("IsAdult") + .ValueGeneratedOnAdd() + .HasColumnType("boolean") + .HasDefaultValue(false) + .HasAnnotation("Relational:JsonPropertyName", "adult"); + + b.PrimitiveCollection("Languages") + .IsRequired() + .HasColumnType("text[]") + .HasAnnotation("Relational:JsonPropertyName", "languages"); + + b.Property("Network") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "network"); + + b.Property("NormalizedTitle") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "normalized_title"); + + b.Property("ParsedTitle") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "parsed_title"); + + b.Property("Ppv") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "ppv"); + + b.Property("Proper") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "proper"); + + b.Property("Quality") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "quality"); + + b.Property("RawTitle") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "raw_title"); + + b.Property("Region") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "region"); + + b.Property("Remastered") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "remastered"); + + b.Property("Repack") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "repack"); + + b.Property("Resolution") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "resolution"); + + b.Property("Retail") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "retail"); + + b.PrimitiveCollection("Seasons") + .IsRequired() + .HasColumnType("integer[]") + .HasAnnotation("Relational:JsonPropertyName", "seasons"); + + b.Property("Site") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "site"); + + b.Property("Size") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "size"); + + b.Property("Subbed") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "subbed"); + + b.Property("Torrent") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "torrent"); + + b.Property("Trash") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "trash"); + + b.Property("Unrated") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "unrated"); + + b.Property("Upscaled") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "upscaled"); + + b.PrimitiveCollection("Volumes") + .IsRequired() + .HasColumnType("integer[]") + .HasAnnotation("Relational:JsonPropertyName", "volumes"); + + b.Property("Year") + .HasColumnType("integer") + .HasAnnotation("Relational:JsonPropertyName", "year"); + + b.HasKey("InfoHash"); + + b.HasIndex("CleanedParsedTitle") + .HasDatabaseName("idx_cleaned_parsed_title_trgm"); + + NpgsqlIndexBuilderExtensions.HasMethod(b.HasIndex("CleanedParsedTitle"), "GIN"); + NpgsqlIndexBuilderExtensions.HasOperators(b.HasIndex("CleanedParsedTitle"), new[] { "gin_trgm_ops" }); + + b.HasIndex("Episodes") + .HasDatabaseName("idx_episodes_gin"); + + NpgsqlIndexBuilderExtensions.HasMethod(b.HasIndex("Episodes"), "GIN"); + + b.HasIndex("ImdbId") + .HasDatabaseName("idx_torrents_imdbid"); + + b.HasIndex("InfoHash") + .IsUnique(); + + b.HasIndex("IngestedAt") + .IsDescending() + .HasDatabaseName("idx_ingested_at"); + + b.HasIndex("IsAdult") + .HasDatabaseName("idx_torrents_isadult"); + + b.HasIndex("Languages") + .HasDatabaseName("idx_languages_gin"); + + NpgsqlIndexBuilderExtensions.HasMethod(b.HasIndex("Languages"), "GIN"); + + b.HasIndex("Seasons") + .HasDatabaseName("idx_seasons_gin"); + + NpgsqlIndexBuilderExtensions.HasMethod(b.HasIndex("Seasons"), "GIN"); + + b.HasIndex("Trash") + .HasDatabaseName("idx_torrents_trash"); + + b.HasIndex("Year") + .HasDatabaseName("idx_year"); + + b.ToTable("Torrents", (string)null); + }); + + modelBuilder.Entity("Zilean.Shared.Features.Imdb.ImdbFile", b => + { + b.Property("ImdbId") + .HasColumnType("text"); + + b.Property("Adult") + .HasColumnType("boolean"); + + b.Property("Category") + .HasColumnType("text"); + + b.Property("Title") + .HasColumnType("text"); + + b.Property("Year") + .HasColumnType("integer"); + + b.HasKey("ImdbId"); + + b.HasIndex("ImdbId") + .IsUnique(); + + b.ToTable("ImdbFiles", (string)null); + + b.HasAnnotation("Relational:JsonPropertyName", "imdb"); + }); + + modelBuilder.Entity("Zilean.Shared.Features.Statistics.ImportMetadata", b => + { + b.Property("Key") + .HasColumnType("text"); + + b.Property("Value") + .IsRequired() + .HasColumnType("jsonb"); + + b.HasKey("Key"); + + b.ToTable("ImportMetadata", (string)null); + }); + + modelBuilder.Entity("Zilean.Shared.Features.Dmm.TorrentInfo", b => + { + b.HasOne("Zilean.Shared.Features.Imdb.ImdbFile", "Imdb") + .WithMany() + .HasForeignKey("ImdbId"); + + b.Navigation("Imdb"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.cs b/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.cs new file mode 100644 index 0000000..f35c837 --- /dev/null +++ b/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.cs @@ -0,0 +1,24 @@ +using Microsoft.EntityFrameworkCore.Migrations; +using Zilean.Database.Functions; + +#nullable disable + +namespace Zilean.Database.Migrations; + +/// +public partial class SearchV6FilteredThreshold : Migration +{ + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + migrationBuilder.Sql(SearchTorrentsMetaV5.Remove); + migrationBuilder.Sql(SearchTorrentsMetaV6.Create); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.Sql(SearchTorrentsMetaV6.Remove); + migrationBuilder.Sql(SearchTorrentsMetaV5.Create); + } +} diff --git a/src/Zilean.Database/Services/BaseDapperService.cs b/src/Zilean.Database/Services/BaseDapperService.cs index f916600..b62e58f 100644 --- a/src/Zilean.Database/Services/BaseDapperService.cs +++ b/src/Zilean.Database/Services/BaseDapperService.cs @@ -16,7 +16,7 @@ protected async Task ExecuteCommandAsync(Func operation, catch (Exception ex) { logger.LogError(ex, "An error occurred while executing a command."); - Process.GetCurrentProcess().Kill(); + throw; } } diff --git a/src/Zilean.Scraper/Features/Ingestion/Dmm/DmmFileDownloader.cs b/src/Zilean.Scraper/Features/Ingestion/Dmm/DmmFileDownloader.cs index f9ff01f..b04f9ef 100644 --- a/src/Zilean.Scraper/Features/Ingestion/Dmm/DmmFileDownloader.cs +++ b/src/Zilean.Scraper/Features/Ingestion/Dmm/DmmFileDownloader.cs @@ -2,7 +2,10 @@ namespace Zilean.Scraper.Features.Ingestion.Dmm; public class DmmFileDownloader(ILogger logger, ZileanConfiguration configuration) { - private const string Filename = "main.zip"; + private const string RepoUrl = "https://github.com/debridmediamanager/hashlists.git"; + private const string RepoBranch = "main"; + private const int MaxRetryAttempts = 5; + private static readonly TimeSpan _initialRetryDelay = TimeSpan.FromSeconds(5); private static readonly IReadOnlyCollection _filesToIgnore = [ @@ -10,96 +13,212 @@ public class DmmFileDownloader(ILogger logger, ZileanConfigur "404.html", "dedupe.sh", "CNAME", + ".git", ]; public async Task DownloadFileToTempPath(DmmLastImport? dmmLastImport, CancellationToken cancellationToken) { - logger.LogInformation("Downloading DMM Hashlists"); + logger.LogInformation("Syncing DMM Hashlists"); - var tempDirectory = Path.Combine(Path.GetTempPath(), "DMMHashlists"); + var dataDirectory = Path.Combine(AppContext.BaseDirectory, "data", "DMMHashlists"); if (dmmLastImport is not null) { if (DateTime.UtcNow - dmmLastImport.OccuredAt < TimeSpan.FromMinutes(configuration.Dmm.MinimumReDownloadIntervalMinutes)) { - logger.LogInformation("DMM Hashlists download not required as last download was less than the configured {Minutes} minutes re-download interval set in DMM Configuration.", configuration.Dmm.MinimumReDownloadIntervalMinutes); - return tempDirectory; + logger.LogInformation("DMM Hashlists sync not required as last sync was less than the configured {Minutes} minutes re-download interval set in DMM Configuration.", configuration.Dmm.MinimumReDownloadIntervalMinutes); + return dataDirectory; } } - var client = CreateHttpClient(); - var response = await client.GetAsync(Filename, HttpCompletionOption.ResponseHeadersRead, cancellationToken); + var repoDirectory = Path.Combine(dataDirectory, "repo"); + var gitDirectory = Path.Combine(repoDirectory, ".git"); - EnsureDirectoryIsClean(tempDirectory); + var githubToken = Environment.GetEnvironmentVariable("GITHUB_TOKEN"); + var repoUrlWithAuth = GetRepoUrlWithAuth(githubToken); - response.EnsureSuccessStatusCode(); - - var tempFilePath = Path.Combine(tempDirectory, "DMMHashlists.zip"); - await using (var stream = await response.Content.ReadAsStreamAsync(cancellationToken)) - await using (var fileStream = new FileStream(tempFilePath, FileMode.Create, FileAccess.Write, FileShare.None, 8192, true)) + if (Directory.Exists(gitDirectory)) + { + logger.LogInformation("Repository exists, pulling latest changes"); + await GitPullAsync(repoDirectory, repoUrlWithAuth, cancellationToken); + } + else { - await stream.CopyToAsync(fileStream, cancellationToken); + logger.LogInformation("Repository does not exist, cloning"); + EnsureDirectoryIsClean(dataDirectory); + await GitCloneAsync(repoUrlWithAuth, repoDirectory, cancellationToken); } - ExtractZipFile(tempFilePath, tempDirectory); + CopyFilesToDataDirectory(repoDirectory, dataDirectory); - File.Delete(tempFilePath); + logger.LogInformation("Synced Repository to {DataDirectory}", dataDirectory); + + return dataDirectory; + } - foreach (var file in _filesToIgnore) + private string GetRepoUrlWithAuth(string? githubToken) + { + if (string.IsNullOrWhiteSpace(githubToken)) { - CleanRepoExtras(tempDirectory, file); + logger.LogDebug("No GITHUB_TOKEN environment variable found. Git operations may be rate limited"); + return RepoUrl; } - logger.LogInformation("Downloaded and extracted Repository to {TempDirectory}", tempDirectory); - - return tempDirectory; + logger.LogInformation("Using GITHUB_TOKEN for authenticated Git operations to avoid rate limiting"); + // Format: https://@github.com/owner/repo.git + return RepoUrl.Replace("https://", $"https://{githubToken}@"); } - private static void ExtractZipFile(string zipFilePath, string extractPath) + private async Task GitCloneAsync(string repoUrl, string targetDirectory, CancellationToken cancellationToken) { - using var fileStream = new FileStream(zipFilePath, FileMode.Open, FileAccess.Read, FileShare.Read); - using var archive = new ZipArchive(fileStream, ZipArchiveMode.Read); + await ExecuteWithRetryAsync(async () => + { + var process = new Process + { + StartInfo = new ProcessStartInfo + { + FileName = "git", + Arguments = $"clone --depth 1 --branch {RepoBranch} --single-branch \"{repoUrl}\" \"{targetDirectory}\"", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + } + }; + + await RunGitProcessAsync(process, "clone", cancellationToken); + }, "clone", targetDirectory, cancellationToken); + } - foreach (var entry in archive.Entries) + private async Task GitPullAsync(string repoDirectory, string repoUrl, CancellationToken cancellationToken) + { + // Update the remote URL in case the token changed + var setUrlProcess = new Process { - var entryPath = Path.Combine(extractPath, Path.GetFileName(entry.FullName)); - if (!entry.FullName.EndsWith('/')) + StartInfo = new ProcessStartInfo { - entry.ExtractToFile(entryPath, true); + FileName = "git", + Arguments = $"-C \"{repoDirectory}\" remote set-url origin \"{repoUrl}\"", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, } - } + }; + + await RunGitProcessAsync(setUrlProcess, "remote set-url", cancellationToken); + + // Pull latest changes with retry + await ExecuteWithRetryAsync(async () => + { + var pullProcess = new Process + { + StartInfo = new ProcessStartInfo + { + FileName = "git", + Arguments = $"-C \"{repoDirectory}\" pull --ff-only", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + } + }; + + await RunGitProcessAsync(pullProcess, "pull", cancellationToken); + }, "pull", repoDirectory, cancellationToken); } - private static void CleanRepoExtras(string tempDirectory, string fileName) + private async Task RunGitProcessAsync(Process process, string operation, CancellationToken cancellationToken) { - var repoIndex = Path.Combine(tempDirectory, fileName); + process.Start(); + + var outputTask = process.StandardOutput.ReadToEndAsync(cancellationToken); + var errorTask = process.StandardError.ReadToEndAsync(cancellationToken); + + await process.WaitForExitAsync(cancellationToken); + + var output = await outputTask; + var error = await errorTask; - if (File.Exists(repoIndex)) + if (process.ExitCode != 0) { - File.Delete(repoIndex); + logger.LogError("Git {Operation} failed with exit code {ExitCode}: {Error}", operation, process.ExitCode, error); + throw new InvalidOperationException($"Git {operation} failed: {error}"); + } + + if (!string.IsNullOrWhiteSpace(output)) + { + logger.LogDebug("Git {Operation} output: {Output}", operation, output); } } - private static void EnsureDirectoryIsClean(string tempDirectory) + private async Task ExecuteWithRetryAsync(Func operation, string operationName, string targetDirectory, CancellationToken cancellationToken) { - if (Directory.Exists(tempDirectory)) + var attempt = 0; + var delay = _initialRetryDelay; + + while (true) { - Directory.Delete(tempDirectory, true); + attempt++; + try + { + await operation(); + return; + } + catch (InvalidOperationException ex) when (attempt < MaxRetryAttempts && !cancellationToken.IsCancellationRequested) + { + logger.LogWarning( + "Git {Operation} attempt {Attempt}/{MaxAttempts} failed. Retrying in {Delay} seconds... Error: {Error}", + operationName, + attempt, + MaxRetryAttempts, + delay.TotalSeconds, + ex.Message); + + // Clean up the target directory before retry for clone operations + if (operationName == "clone" && Directory.Exists(targetDirectory)) + { + try + { + Directory.Delete(targetDirectory, true); + } + catch (Exception cleanupEx) + { + logger.LogWarning("Failed to clean up directory {Directory} before retry: {Error}", targetDirectory, cleanupEx.Message); + } + } + + await Task.Delay(delay, cancellationToken); + delay = TimeSpan.FromSeconds(Math.Min(delay.TotalSeconds * 2, 60)); // Exponential backoff, max 60 seconds + } } + } - Directory.CreateDirectory(tempDirectory); + private void CopyFilesToDataDirectory(string repoDirectory, string dataDirectory) + { + var files = Directory.GetFiles(repoDirectory); + + foreach (var file in files) + { + var fileName = Path.GetFileName(file); + + if (_filesToIgnore.Contains(fileName)) + { + continue; + } + + var destPath = Path.Combine(dataDirectory, fileName); + File.Copy(file, destPath, true); + } } - private static HttpClient CreateHttpClient() + private static void EnsureDirectoryIsClean(string directory) { - var httpClient = new HttpClient + if (Directory.Exists(directory)) { - BaseAddress = new Uri("https://github.com/debridmediamanager/hashlists/zipball/main/"), - Timeout = TimeSpan.FromMinutes(10), - }; + Directory.Delete(directory, true); + } - httpClient.DefaultRequestHeaders.Add("Accept-Encoding", "gzip"); - httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("curl/7.54"); - return httpClient; + Directory.CreateDirectory(directory); } } diff --git a/src/Zilean.Scraper/Features/Ingestion/Processing/DmmFileEntryProcessor.cs b/src/Zilean.Scraper/Features/Ingestion/Processing/DmmFileEntryProcessor.cs index e7efff5..b70c656 100644 --- a/src/Zilean.Scraper/Features/Ingestion/Processing/DmmFileEntryProcessor.cs +++ b/src/Zilean.Scraper/Features/Ingestion/Processing/DmmFileEntryProcessor.cs @@ -28,6 +28,12 @@ public async Task ProcessFilesAsync(List files, CancellationToken cancel private async Task ProduceEntriesAsync(ChannelWriter> writer, CancellationToken cancellationToken) { + var totalFiles = _filesToProcess.Count; + var processedFiles = 0; + var skippedFiles = 0; + var newTorrentsFound = 0; + var lastProgressLog = Stopwatch.StartNew(); + foreach (var file in _filesToProcess) { if (cancellationToken.IsCancellationRequested) @@ -39,14 +45,15 @@ private async Task ProduceEntriesAsync(ChannelWriter> wr var fileName = Path.GetFileName(file); if (ExistingPages.TryGetValue(fileName, out _) || NewPages.TryGetValue(fileName, out _)) { + skippedFiles++; + processedFiles++; continue; } - _logger.LogInformation("Processing file: {FileName}", fileName); - try { var torrents = await ProcessPageAsync(file, fileName, cancellationToken); + newTorrentsFound += torrents.Count; foreach (var torrent in torrents) { await writer.WriteAsync(Task.FromResult(torrent), cancellationToken); @@ -56,6 +63,24 @@ private async Task ProduceEntriesAsync(ChannelWriter> wr { _logger.LogError(ex, "Error processing file: {FileName}", fileName); } + + processedFiles++; + + // Log progress every 60 seconds + if (lastProgressLog.Elapsed.TotalSeconds >= 60) + { + var percentage = totalFiles > 0 ? (double)processedFiles / totalFiles * 100 : 0; + _logger.LogInformation("DMM sync progress: {Processed}/{Total} files ({Percentage:F1}%), {Skipped} skipped, {NewTorrents} new torrents found", + processedFiles, totalFiles, percentage, skippedFiles, newTorrentsFound); + lastProgressLog.Restart(); + } + } + + // Final progress log + if (processedFiles > 0) + { + _logger.LogInformation("DMM sync complete: {Processed}/{Total} files processed, {Skipped} skipped, {NewTorrents} new torrents found", + processedFiles, totalFiles, skippedFiles, newTorrentsFound); } writer.Complete(); diff --git a/src/Zilean.Scraper/Features/Ingestion/Processing/GenericProcessor.cs b/src/Zilean.Scraper/Features/Ingestion/Processing/GenericProcessor.cs index 28ea7e4..b5be3f9 100644 --- a/src/Zilean.Scraper/Features/Ingestion/Processing/GenericProcessor.cs +++ b/src/Zilean.Scraper/Features/Ingestion/Processing/GenericProcessor.cs @@ -13,9 +13,11 @@ public abstract class GenericProcessor( protected readonly ZileanConfiguration _configuration = configuration; private HashSet _blacklistedHashes = []; private readonly ObjectPool> _torrentsListPool = new DefaultObjectPoolProvider().Create>(); + private int _batchNumber; protected async Task ProcessAsync(Func>, CancellationToken, Task> producerAction, CancellationToken cancellationToken) { + _batchNumber = 0; _blacklistedHashes = await torrentInfoService.GetBlacklistedItems(); var channel = Channel.CreateBounded>(new BoundedChannelOptions(_configuration.Parsing.BatchSize * 2) @@ -78,6 +80,7 @@ private async Task ConsumeAsync(ChannelReader> reader, Cancellation private async Task OnProcessTorrentsAsync(List> batch, CancellationToken cancellationToken) { + var currentBatch = Interlocked.Increment(ref _batchNumber); var torrents = _torrentsListPool.Get(); try @@ -116,6 +119,9 @@ private async Task OnProcessTorrentsAsync(List> batch, Cancellation await torrentInfoService.StoreTorrentInfo(finalizedTorrents); _processedCounts.AddProcessed(finalizedTorrents.Count); + + _logger.LogInformation("Batch {BatchNumber} complete: {NewCount} new torrents stored, {TotalProcessed} total processed so far", + currentBatch, finalizedTorrents.Count, _processedCounts.TotalProcessed); } } catch (OperationCanceledException) diff --git a/src/Zilean.Scraper/Features/Ingestion/Processing/ProcessedCounts.cs b/src/Zilean.Scraper/Features/Ingestion/Processing/ProcessedCounts.cs index 6d4a247..627567d 100644 --- a/src/Zilean.Scraper/Features/Ingestion/Processing/ProcessedCounts.cs +++ b/src/Zilean.Scraper/Features/Ingestion/Processing/ProcessedCounts.cs @@ -15,6 +15,7 @@ public void Reset() Interlocked.Exchange(ref _blacklistedRemoved, 0); } + public int TotalProcessed => _totalProcessed; public void AddProcessed(int count) => Interlocked.Add(ref _totalProcessed, count); public void AddAdultRemoved(int count) => Interlocked.Add(ref _adultRemoved, count); public void AddTrashRemoved(int count) => Interlocked.Add(ref _trashRemoved, count); diff --git a/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs b/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs index 14983b5..5a715cf 100644 --- a/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs @@ -1,17 +1,57 @@ +using Npgsql; + namespace Zilean.Shared.Features.Configuration; public class DatabaseConfiguration { - public string ConnectionString { get; set; } + public string ConnectionString { get; set; } - public DatabaseConfiguration() - { - var password = Environment.GetEnvironmentVariable("POSTGRES_PASSWORD"); - if (string.IsNullOrWhiteSpace(password)) + public DatabaseConfiguration() { - throw new InvalidOperationException("Environment variable POSTGRES_PASSWORD is not set."); + // Check for full connection string first (backwards compat with v3.5.0) + var fullConnString = Environment.GetEnvironmentVariable("Zilean__Database__ConnectionString"); + if (!string.IsNullOrWhiteSpace(fullConnString)) + { + ConnectionString = fullConnString; + return; + } + + // Build from individual env vars + var host = Environment.GetEnvironmentVariable("POSTGRES_HOST") ?? "localhost"; + var port = Environment.GetEnvironmentVariable("POSTGRES_PORT") ?? "5432"; + var db = Environment.GetEnvironmentVariable("POSTGRES_DB") ?? "zilean"; + var user = Environment.GetEnvironmentVariable("POSTGRES_USER") ?? "postgres"; + var password = Environment.GetEnvironmentVariable("POSTGRES_PASSWORD") ?? ""; + + var builder = new NpgsqlConnectionStringBuilder + { + Host = host, + Port = int.Parse(port), + Database = db, + Username = user, + Password = password, + IncludeErrorDetail = true, + Timeout = 30, + CommandTimeout = 3600, + }; + + ConnectionString = builder.ConnectionString; } - ConnectionString = $"Host=postgres;Database=zilean;Username=postgres;Password={password};Include Error Detail=true;Timeout=30;CommandTimeout=3600;"; - } + /// + /// Returns true if the configured password is empty or a known insecure default. + /// + public bool HasInsecurePassword() + { + try + { + var parsed = new NpgsqlConnectionStringBuilder(ConnectionString); + return string.IsNullOrEmpty(parsed.Password) || + string.Equals(parsed.Password, "postgres", StringComparison.OrdinalIgnoreCase); + } + catch + { + return false; + } + } } diff --git a/src/Zilean.Shared/Features/Configuration/LoggingConfiguration.cs b/src/Zilean.Shared/Features/Configuration/LoggingConfiguration.cs index 04fdcc8..df7f4fa 100644 --- a/src/Zilean.Shared/Features/Configuration/LoggingConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/LoggingConfiguration.cs @@ -35,6 +35,9 @@ public static IConfigurationBuilder AddLoggingConfiguration(this IConfigurationB private static void EnsureExists(string configurationFolderPath) { var loggingPath = Path.Combine(configurationFolderPath, ConfigurationLiterals.LoggingConfigFilename); - File.WriteAllText(loggingPath, DefaultLoggingContents); + if (!File.Exists(loggingPath)) + { + File.WriteAllText(loggingPath, DefaultLoggingContents); + } } } diff --git a/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs b/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs index c062ab9..479f99e 100644 --- a/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs @@ -28,6 +28,62 @@ public static void EnsureExists() } } + /// + /// Validates the configuration and returns a list of error messages. Empty list means valid. + /// + public List Validate() + { + var errors = new List(); + + if (Dmm.MaxFilteredResults <= 0) + { + errors.Add("Dmm.MaxFilteredResults must be greater than 0"); + } + + if (Dmm.MinimumScoreMatch is < 0 or > 1) + { + errors.Add("Dmm.MinimumScoreMatch must be between 0 and 1"); + } + + if (Dmm.MinimumReDownloadIntervalMinutes < 0) + { + errors.Add("Dmm.MinimumReDownloadIntervalMinutes must be non-negative"); + } + + if (!IsValidCronExpression(Dmm.ScrapeSchedule)) + { + errors.Add($"Dmm.ScrapeSchedule '{Dmm.ScrapeSchedule}' is not a valid cron expression"); + } + + if (!IsValidCronExpression(Ingestion.ScrapeSchedule)) + { + errors.Add($"Ingestion.ScrapeSchedule '{Ingestion.ScrapeSchedule}' is not a valid cron expression"); + } + + if (Parsing.BatchSize <= 0) + { + errors.Add("Parsing.BatchSize must be greater than 0"); + } + + if (string.IsNullOrWhiteSpace(Database.ConnectionString)) + { + errors.Add("Database.ConnectionString is empty — check POSTGRES_* or Zilean__Database__ConnectionString env vars"); + } + + return errors; + } + + private static bool IsValidCronExpression(string? cron) + { + if (string.IsNullOrWhiteSpace(cron)) + { + return false; + } + + var parts = cron.Trim().Split(' ', StringSplitOptions.RemoveEmptyEntries); + return parts.Length == 5; + } + private static string DefaultConfigurationContents() { var mainSettings = new Dictionary diff --git a/src/Zilean.Shared/Zilean.Shared.csproj b/src/Zilean.Shared/Zilean.Shared.csproj index 136e768..9eebf82 100644 --- a/src/Zilean.Shared/Zilean.Shared.csproj +++ b/src/Zilean.Shared/Zilean.Shared.csproj @@ -14,6 +14,7 @@ + diff --git a/tests/Zilean.Tests/Tests/ConfigurationTests.cs b/tests/Zilean.Tests/Tests/ConfigurationTests.cs index d41220f..5809de0 100644 --- a/tests/Zilean.Tests/Tests/ConfigurationTests.cs +++ b/tests/Zilean.Tests/Tests/ConfigurationTests.cs @@ -130,6 +130,135 @@ public void adds_json_configuration_file_to_builder_with_fake_filesystem_gets_in Directory.Delete(testsFolder, true); } + [Fact] + public void database_configuration_defaults_without_env_vars() + { + var savedVars = ClearDatabaseEnvVars(); + try + { + var dbConfig = new DatabaseConfiguration(); + + dbConfig.ConnectionString.Should().NotBeNullOrWhiteSpace(); + dbConfig.ConnectionString.Should().Contain("Host=localhost"); + dbConfig.ConnectionString.Should().Contain("Database=zilean"); + dbConfig.ConnectionString.Should().Contain("Username=postgres"); + } + finally + { + RestoreDatabaseEnvVars(savedVars); + } + } + + [Fact] + public void database_configuration_respects_full_connection_string_env_var() + { + var savedVars = ClearDatabaseEnvVars(); + try + { + var expected = "Host=myhost;Database=mydb;Username=myuser;Password=mypass;"; + Environment.SetEnvironmentVariable("Zilean__Database__ConnectionString", expected); + + var dbConfig = new DatabaseConfiguration(); + + dbConfig.ConnectionString.Should().Be(expected); + } + finally + { + Environment.SetEnvironmentVariable("Zilean__Database__ConnectionString", null); + RestoreDatabaseEnvVars(savedVars); + } + } + + [Fact] + public void database_configuration_builds_from_individual_env_vars() + { + var savedVars = ClearDatabaseEnvVars(); + try + { + Environment.SetEnvironmentVariable("POSTGRES_HOST", "db.example.com"); + Environment.SetEnvironmentVariable("POSTGRES_PORT", "5433"); + Environment.SetEnvironmentVariable("POSTGRES_DB", "mydb"); + Environment.SetEnvironmentVariable("POSTGRES_USER", "admin"); + Environment.SetEnvironmentVariable("POSTGRES_PASSWORD", "secret"); + + var dbConfig = new DatabaseConfiguration(); + + dbConfig.ConnectionString.Should().Contain("Host=db.example.com"); + dbConfig.ConnectionString.Should().Contain("Port=5433"); + dbConfig.ConnectionString.Should().Contain("Database=mydb"); + dbConfig.ConnectionString.Should().Contain("Username=admin"); + dbConfig.ConnectionString.Should().Contain("Password=secret"); + } + finally + { + RestoreDatabaseEnvVars(savedVars); + } + } + + [Fact] + public void database_configuration_escapes_special_chars_in_password() + { + var savedVars = ClearDatabaseEnvVars(); + try + { + Environment.SetEnvironmentVariable("POSTGRES_PASSWORD", "p@ss#w0rd!&"); + + var dbConfig = new DatabaseConfiguration(); + + dbConfig.ConnectionString.Should().NotBeNullOrWhiteSpace(); + + // Verify it round-trips correctly + var parsed = new Npgsql.NpgsqlConnectionStringBuilder(dbConfig.ConnectionString); + parsed.Password.Should().Be("p@ss#w0rd!&"); + } + finally + { + RestoreDatabaseEnvVars(savedVars); + } + } + + [Fact] + public void database_configuration_full_connection_string_takes_priority_over_individual_vars() + { + var savedVars = ClearDatabaseEnvVars(); + try + { + var expected = "Host=priority-host;Database=prioritydb;Username=user;Password=pass;"; + Environment.SetEnvironmentVariable("Zilean__Database__ConnectionString", expected); + Environment.SetEnvironmentVariable("POSTGRES_HOST", "ignored-host"); + Environment.SetEnvironmentVariable("POSTGRES_PASSWORD", "ignored-pass"); + + var dbConfig = new DatabaseConfiguration(); + + dbConfig.ConnectionString.Should().Be(expected); + } + finally + { + Environment.SetEnvironmentVariable("Zilean__Database__ConnectionString", null); + RestoreDatabaseEnvVars(savedVars); + } + } + + private static Dictionary ClearDatabaseEnvVars() + { + var vars = new[] { "POSTGRES_HOST", "POSTGRES_PORT", "POSTGRES_DB", "POSTGRES_USER", "POSTGRES_PASSWORD", "Zilean__Database__ConnectionString" }; + var saved = new Dictionary(); + foreach (var v in vars) + { + saved[v] = Environment.GetEnvironmentVariable(v); + Environment.SetEnvironmentVariable(v, null); + } + return saved; + } + + private static void RestoreDatabaseEnvVars(Dictionary saved) + { + foreach (var (key, value) in saved) + { + Environment.SetEnvironmentVariable(key, value); + } + } + private static string CreateTestFolder() { var testsFolder = Path.Combine(Path.GetTempPath(), "Zilean.Tests");