From bdb14ee211902d9ab635e32e6613924619b33fa7 Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 02:07:15 +0000 Subject: [PATCH 01/13] feat: flexible database connection string configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support three configuration methods (checked in order): 1. Zilean__Database__ConnectionString env var (full backwards compat) 2. Individual POSTGRES_* env vars (HOST, PORT, DB, USER, PASSWORD) 3. Sensible defaults (localhost:5432/zilean) Uses NpgsqlConnectionStringBuilder for proper escaping of special characters in passwords. Never throws in constructor — empty password is valid for trust auth. --- Directory.Packages.props | 1 + .../Configuration/DatabaseConfiguration.cs | 41 ++++-- src/Zilean.Shared/Zilean.Shared.csproj | 1 + .../Zilean.Tests/Tests/ConfigurationTests.cs | 129 ++++++++++++++++++ 4 files changed, 163 insertions(+), 9 deletions(-) diff --git a/Directory.Packages.props b/Directory.Packages.props index 139c77e..ff1df4b 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -21,6 +21,7 @@ + diff --git a/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs b/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs index 14983b5..f6e74b6 100644 --- a/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs @@ -1,17 +1,40 @@ +using Npgsql; + namespace Zilean.Shared.Features.Configuration; public class DatabaseConfiguration { - public string ConnectionString { get; set; } + public string ConnectionString { get; set; } - public DatabaseConfiguration() - { - var password = Environment.GetEnvironmentVariable("POSTGRES_PASSWORD"); - if (string.IsNullOrWhiteSpace(password)) + public DatabaseConfiguration() { - throw new InvalidOperationException("Environment variable POSTGRES_PASSWORD is not set."); - } + // Check for full connection string first (backwards compat with v3.5.0) + var fullConnString = Environment.GetEnvironmentVariable("Zilean__Database__ConnectionString"); + if (!string.IsNullOrWhiteSpace(fullConnString)) + { + ConnectionString = fullConnString; + return; + } + + // Build from individual env vars + var host = Environment.GetEnvironmentVariable("POSTGRES_HOST") ?? "localhost"; + var port = Environment.GetEnvironmentVariable("POSTGRES_PORT") ?? "5432"; + var db = Environment.GetEnvironmentVariable("POSTGRES_DB") ?? "zilean"; + var user = Environment.GetEnvironmentVariable("POSTGRES_USER") ?? "postgres"; + var password = Environment.GetEnvironmentVariable("POSTGRES_PASSWORD") ?? ""; - ConnectionString = $"Host=postgres;Database=zilean;Username=postgres;Password={password};Include Error Detail=true;Timeout=30;CommandTimeout=3600;"; - } + var builder = new NpgsqlConnectionStringBuilder + { + Host = host, + Port = int.Parse(port), + Database = db, + Username = user, + Password = password, + IncludeErrorDetail = true, + Timeout = 30, + CommandTimeout = 3600, + }; + + ConnectionString = builder.ConnectionString; + } } diff --git a/src/Zilean.Shared/Zilean.Shared.csproj b/src/Zilean.Shared/Zilean.Shared.csproj index 136e768..9eebf82 100644 --- a/src/Zilean.Shared/Zilean.Shared.csproj +++ b/src/Zilean.Shared/Zilean.Shared.csproj @@ -14,6 +14,7 @@ + diff --git a/tests/Zilean.Tests/Tests/ConfigurationTests.cs b/tests/Zilean.Tests/Tests/ConfigurationTests.cs index d41220f..5809de0 100644 --- a/tests/Zilean.Tests/Tests/ConfigurationTests.cs +++ b/tests/Zilean.Tests/Tests/ConfigurationTests.cs @@ -130,6 +130,135 @@ public void adds_json_configuration_file_to_builder_with_fake_filesystem_gets_in Directory.Delete(testsFolder, true); } + [Fact] + public void database_configuration_defaults_without_env_vars() + { + var savedVars = ClearDatabaseEnvVars(); + try + { + var dbConfig = new DatabaseConfiguration(); + + dbConfig.ConnectionString.Should().NotBeNullOrWhiteSpace(); + dbConfig.ConnectionString.Should().Contain("Host=localhost"); + dbConfig.ConnectionString.Should().Contain("Database=zilean"); + dbConfig.ConnectionString.Should().Contain("Username=postgres"); + } + finally + { + RestoreDatabaseEnvVars(savedVars); + } + } + + [Fact] + public void database_configuration_respects_full_connection_string_env_var() + { + var savedVars = ClearDatabaseEnvVars(); + try + { + var expected = "Host=myhost;Database=mydb;Username=myuser;Password=mypass;"; + Environment.SetEnvironmentVariable("Zilean__Database__ConnectionString", expected); + + var dbConfig = new DatabaseConfiguration(); + + dbConfig.ConnectionString.Should().Be(expected); + } + finally + { + Environment.SetEnvironmentVariable("Zilean__Database__ConnectionString", null); + RestoreDatabaseEnvVars(savedVars); + } + } + + [Fact] + public void database_configuration_builds_from_individual_env_vars() + { + var savedVars = ClearDatabaseEnvVars(); + try + { + Environment.SetEnvironmentVariable("POSTGRES_HOST", "db.example.com"); + Environment.SetEnvironmentVariable("POSTGRES_PORT", "5433"); + Environment.SetEnvironmentVariable("POSTGRES_DB", "mydb"); + Environment.SetEnvironmentVariable("POSTGRES_USER", "admin"); + Environment.SetEnvironmentVariable("POSTGRES_PASSWORD", "secret"); + + var dbConfig = new DatabaseConfiguration(); + + dbConfig.ConnectionString.Should().Contain("Host=db.example.com"); + dbConfig.ConnectionString.Should().Contain("Port=5433"); + dbConfig.ConnectionString.Should().Contain("Database=mydb"); + dbConfig.ConnectionString.Should().Contain("Username=admin"); + dbConfig.ConnectionString.Should().Contain("Password=secret"); + } + finally + { + RestoreDatabaseEnvVars(savedVars); + } + } + + [Fact] + public void database_configuration_escapes_special_chars_in_password() + { + var savedVars = ClearDatabaseEnvVars(); + try + { + Environment.SetEnvironmentVariable("POSTGRES_PASSWORD", "p@ss#w0rd!&"); + + var dbConfig = new DatabaseConfiguration(); + + dbConfig.ConnectionString.Should().NotBeNullOrWhiteSpace(); + + // Verify it round-trips correctly + var parsed = new Npgsql.NpgsqlConnectionStringBuilder(dbConfig.ConnectionString); + parsed.Password.Should().Be("p@ss#w0rd!&"); + } + finally + { + RestoreDatabaseEnvVars(savedVars); + } + } + + [Fact] + public void database_configuration_full_connection_string_takes_priority_over_individual_vars() + { + var savedVars = ClearDatabaseEnvVars(); + try + { + var expected = "Host=priority-host;Database=prioritydb;Username=user;Password=pass;"; + Environment.SetEnvironmentVariable("Zilean__Database__ConnectionString", expected); + Environment.SetEnvironmentVariable("POSTGRES_HOST", "ignored-host"); + Environment.SetEnvironmentVariable("POSTGRES_PASSWORD", "ignored-pass"); + + var dbConfig = new DatabaseConfiguration(); + + dbConfig.ConnectionString.Should().Be(expected); + } + finally + { + Environment.SetEnvironmentVariable("Zilean__Database__ConnectionString", null); + RestoreDatabaseEnvVars(savedVars); + } + } + + private static Dictionary ClearDatabaseEnvVars() + { + var vars = new[] { "POSTGRES_HOST", "POSTGRES_PORT", "POSTGRES_DB", "POSTGRES_USER", "POSTGRES_PASSWORD", "Zilean__Database__ConnectionString" }; + var saved = new Dictionary(); + foreach (var v in vars) + { + saved[v] = Environment.GetEnvironmentVariable(v); + Environment.SetEnvironmentVariable(v, null); + } + return saved; + } + + private static void RestoreDatabaseEnvVars(Dictionary saved) + { + foreach (var (key, value) in saved) + { + Environment.SetEnvironmentVariable(key, value); + } + } + private static string CreateTestFolder() { var testsFolder = Path.Combine(Path.GetTempPath(), "Zilean.Tests"); From 935204a283971050457a92cc55f21d6e350d7aca Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 02:07:22 +0000 Subject: [PATCH 02/13] feat: incremental DMM sync via git clone/pull Replace 1.2GB zip download with git clone --depth 1 on first run and git pull --ff-only on subsequent runs. Repo persisted in data directory across restarts. Supports GITHUB_TOKEN env var for authenticated requests (5,000 req/hr vs 60). Includes exponential backoff retry with 5 attempts. Adds git package to Docker image. --- Dockerfile | 1 + .../Ingestion/Dmm/DmmFileDownloader.cs | 209 ++++++++++++++---- 2 files changed, 165 insertions(+), 45 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1e5fbc5..45dc547 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,6 +18,7 @@ RUN apk add --update --no-cache \ python3=~3.11 \ py3-pip=~23.1 \ curl \ + git \ icu-libs \ && ln -sf python3 /usr/bin/python ENV DOTNET_RUNNING_IN_CONTAINER=true diff --git a/src/Zilean.Scraper/Features/Ingestion/Dmm/DmmFileDownloader.cs b/src/Zilean.Scraper/Features/Ingestion/Dmm/DmmFileDownloader.cs index f9ff01f..b04f9ef 100644 --- a/src/Zilean.Scraper/Features/Ingestion/Dmm/DmmFileDownloader.cs +++ b/src/Zilean.Scraper/Features/Ingestion/Dmm/DmmFileDownloader.cs @@ -2,7 +2,10 @@ namespace Zilean.Scraper.Features.Ingestion.Dmm; public class DmmFileDownloader(ILogger logger, ZileanConfiguration configuration) { - private const string Filename = "main.zip"; + private const string RepoUrl = "https://github.com/debridmediamanager/hashlists.git"; + private const string RepoBranch = "main"; + private const int MaxRetryAttempts = 5; + private static readonly TimeSpan _initialRetryDelay = TimeSpan.FromSeconds(5); private static readonly IReadOnlyCollection _filesToIgnore = [ @@ -10,96 +13,212 @@ public class DmmFileDownloader(ILogger logger, ZileanConfigur "404.html", "dedupe.sh", "CNAME", + ".git", ]; public async Task DownloadFileToTempPath(DmmLastImport? dmmLastImport, CancellationToken cancellationToken) { - logger.LogInformation("Downloading DMM Hashlists"); + logger.LogInformation("Syncing DMM Hashlists"); - var tempDirectory = Path.Combine(Path.GetTempPath(), "DMMHashlists"); + var dataDirectory = Path.Combine(AppContext.BaseDirectory, "data", "DMMHashlists"); if (dmmLastImport is not null) { if (DateTime.UtcNow - dmmLastImport.OccuredAt < TimeSpan.FromMinutes(configuration.Dmm.MinimumReDownloadIntervalMinutes)) { - logger.LogInformation("DMM Hashlists download not required as last download was less than the configured {Minutes} minutes re-download interval set in DMM Configuration.", configuration.Dmm.MinimumReDownloadIntervalMinutes); - return tempDirectory; + logger.LogInformation("DMM Hashlists sync not required as last sync was less than the configured {Minutes} minutes re-download interval set in DMM Configuration.", configuration.Dmm.MinimumReDownloadIntervalMinutes); + return dataDirectory; } } - var client = CreateHttpClient(); - var response = await client.GetAsync(Filename, HttpCompletionOption.ResponseHeadersRead, cancellationToken); + var repoDirectory = Path.Combine(dataDirectory, "repo"); + var gitDirectory = Path.Combine(repoDirectory, ".git"); - EnsureDirectoryIsClean(tempDirectory); + var githubToken = Environment.GetEnvironmentVariable("GITHUB_TOKEN"); + var repoUrlWithAuth = GetRepoUrlWithAuth(githubToken); - response.EnsureSuccessStatusCode(); - - var tempFilePath = Path.Combine(tempDirectory, "DMMHashlists.zip"); - await using (var stream = await response.Content.ReadAsStreamAsync(cancellationToken)) - await using (var fileStream = new FileStream(tempFilePath, FileMode.Create, FileAccess.Write, FileShare.None, 8192, true)) + if (Directory.Exists(gitDirectory)) + { + logger.LogInformation("Repository exists, pulling latest changes"); + await GitPullAsync(repoDirectory, repoUrlWithAuth, cancellationToken); + } + else { - await stream.CopyToAsync(fileStream, cancellationToken); + logger.LogInformation("Repository does not exist, cloning"); + EnsureDirectoryIsClean(dataDirectory); + await GitCloneAsync(repoUrlWithAuth, repoDirectory, cancellationToken); } - ExtractZipFile(tempFilePath, tempDirectory); + CopyFilesToDataDirectory(repoDirectory, dataDirectory); - File.Delete(tempFilePath); + logger.LogInformation("Synced Repository to {DataDirectory}", dataDirectory); + + return dataDirectory; + } - foreach (var file in _filesToIgnore) + private string GetRepoUrlWithAuth(string? githubToken) + { + if (string.IsNullOrWhiteSpace(githubToken)) { - CleanRepoExtras(tempDirectory, file); + logger.LogDebug("No GITHUB_TOKEN environment variable found. Git operations may be rate limited"); + return RepoUrl; } - logger.LogInformation("Downloaded and extracted Repository to {TempDirectory}", tempDirectory); - - return tempDirectory; + logger.LogInformation("Using GITHUB_TOKEN for authenticated Git operations to avoid rate limiting"); + // Format: https://@github.com/owner/repo.git + return RepoUrl.Replace("https://", $"https://{githubToken}@"); } - private static void ExtractZipFile(string zipFilePath, string extractPath) + private async Task GitCloneAsync(string repoUrl, string targetDirectory, CancellationToken cancellationToken) { - using var fileStream = new FileStream(zipFilePath, FileMode.Open, FileAccess.Read, FileShare.Read); - using var archive = new ZipArchive(fileStream, ZipArchiveMode.Read); + await ExecuteWithRetryAsync(async () => + { + var process = new Process + { + StartInfo = new ProcessStartInfo + { + FileName = "git", + Arguments = $"clone --depth 1 --branch {RepoBranch} --single-branch \"{repoUrl}\" \"{targetDirectory}\"", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + } + }; + + await RunGitProcessAsync(process, "clone", cancellationToken); + }, "clone", targetDirectory, cancellationToken); + } - foreach (var entry in archive.Entries) + private async Task GitPullAsync(string repoDirectory, string repoUrl, CancellationToken cancellationToken) + { + // Update the remote URL in case the token changed + var setUrlProcess = new Process { - var entryPath = Path.Combine(extractPath, Path.GetFileName(entry.FullName)); - if (!entry.FullName.EndsWith('/')) + StartInfo = new ProcessStartInfo { - entry.ExtractToFile(entryPath, true); + FileName = "git", + Arguments = $"-C \"{repoDirectory}\" remote set-url origin \"{repoUrl}\"", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, } - } + }; + + await RunGitProcessAsync(setUrlProcess, "remote set-url", cancellationToken); + + // Pull latest changes with retry + await ExecuteWithRetryAsync(async () => + { + var pullProcess = new Process + { + StartInfo = new ProcessStartInfo + { + FileName = "git", + Arguments = $"-C \"{repoDirectory}\" pull --ff-only", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true, + } + }; + + await RunGitProcessAsync(pullProcess, "pull", cancellationToken); + }, "pull", repoDirectory, cancellationToken); } - private static void CleanRepoExtras(string tempDirectory, string fileName) + private async Task RunGitProcessAsync(Process process, string operation, CancellationToken cancellationToken) { - var repoIndex = Path.Combine(tempDirectory, fileName); + process.Start(); + + var outputTask = process.StandardOutput.ReadToEndAsync(cancellationToken); + var errorTask = process.StandardError.ReadToEndAsync(cancellationToken); + + await process.WaitForExitAsync(cancellationToken); + + var output = await outputTask; + var error = await errorTask; - if (File.Exists(repoIndex)) + if (process.ExitCode != 0) { - File.Delete(repoIndex); + logger.LogError("Git {Operation} failed with exit code {ExitCode}: {Error}", operation, process.ExitCode, error); + throw new InvalidOperationException($"Git {operation} failed: {error}"); + } + + if (!string.IsNullOrWhiteSpace(output)) + { + logger.LogDebug("Git {Operation} output: {Output}", operation, output); } } - private static void EnsureDirectoryIsClean(string tempDirectory) + private async Task ExecuteWithRetryAsync(Func operation, string operationName, string targetDirectory, CancellationToken cancellationToken) { - if (Directory.Exists(tempDirectory)) + var attempt = 0; + var delay = _initialRetryDelay; + + while (true) { - Directory.Delete(tempDirectory, true); + attempt++; + try + { + await operation(); + return; + } + catch (InvalidOperationException ex) when (attempt < MaxRetryAttempts && !cancellationToken.IsCancellationRequested) + { + logger.LogWarning( + "Git {Operation} attempt {Attempt}/{MaxAttempts} failed. Retrying in {Delay} seconds... Error: {Error}", + operationName, + attempt, + MaxRetryAttempts, + delay.TotalSeconds, + ex.Message); + + // Clean up the target directory before retry for clone operations + if (operationName == "clone" && Directory.Exists(targetDirectory)) + { + try + { + Directory.Delete(targetDirectory, true); + } + catch (Exception cleanupEx) + { + logger.LogWarning("Failed to clean up directory {Directory} before retry: {Error}", targetDirectory, cleanupEx.Message); + } + } + + await Task.Delay(delay, cancellationToken); + delay = TimeSpan.FromSeconds(Math.Min(delay.TotalSeconds * 2, 60)); // Exponential backoff, max 60 seconds + } } + } - Directory.CreateDirectory(tempDirectory); + private void CopyFilesToDataDirectory(string repoDirectory, string dataDirectory) + { + var files = Directory.GetFiles(repoDirectory); + + foreach (var file in files) + { + var fileName = Path.GetFileName(file); + + if (_filesToIgnore.Contains(fileName)) + { + continue; + } + + var destPath = Path.Combine(dataDirectory, fileName); + File.Copy(file, destPath, true); + } } - private static HttpClient CreateHttpClient() + private static void EnsureDirectoryIsClean(string directory) { - var httpClient = new HttpClient + if (Directory.Exists(directory)) { - BaseAddress = new Uri("https://github.com/debridmediamanager/hashlists/zipball/main/"), - Timeout = TimeSpan.FromMinutes(10), - }; + Directory.Delete(directory, true); + } - httpClient.DefaultRequestHeaders.Add("Accept-Encoding", "gzip"); - httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("curl/7.54"); - return httpClient; + Directory.CreateDirectory(directory); } } From cd2dee63acb43b6145b975d103eb25a0da384585 Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 02:07:27 +0000 Subject: [PATCH 03/13] fix: prevent logging config overwrite on restart Only write logging.json if it doesn't exist, preserving user customizations across container restarts. --- .../Features/Configuration/LoggingConfiguration.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Zilean.Shared/Features/Configuration/LoggingConfiguration.cs b/src/Zilean.Shared/Features/Configuration/LoggingConfiguration.cs index 04fdcc8..df7f4fa 100644 --- a/src/Zilean.Shared/Features/Configuration/LoggingConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/LoggingConfiguration.cs @@ -35,6 +35,9 @@ public static IConfigurationBuilder AddLoggingConfiguration(this IConfigurationB private static void EnsureExists(string configurationFolderPath) { var loggingPath = Path.Combine(configurationFolderPath, ConfigurationLiterals.LoggingConfigFilename); - File.WriteAllText(loggingPath, DefaultLoggingContents); + if (!File.Exists(loggingPath)) + { + File.WriteAllText(loggingPath, DefaultLoggingContents); + } } } From fdf6849d409e68a6b9ab68eb44d6270a18a179da Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 02:07:32 +0000 Subject: [PATCH 04/13] ci: switch to GHCR and add build verification - Push images to ghcr.io/thoroslives/zilean instead of Docker Hub - Add dotnet build check on PRs - Use GITHUB_TOKEN for GHCR auth (no separate secrets needed) - Multi-arch: linux/amd64 + linux/arm64 --- .github/workflows/cicd.yaml | 76 ++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index b77d9cf..55d37a5 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -1,61 +1,59 @@ -name: CI / CD for Zilean +name: CI/CD on: push: - tags: - - 'v[0-9]+.[0-9]+.[0-9]+' - workflow_dispatch: + branches: [main] + tags: ['v[0-9]+.[0-9]+.[0-9]+'] + pull_request: + branches: [main] env: - IMAGE_NAME: ipromknight/zilean + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} jobs: - execution: + build-and-test: runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-dotnet@v4 + with: + dotnet-version: '9.0.x' + - run: dotnet restore + - run: dotnet build --no-restore -c Release + + docker: + needs: build-and-test + runs-on: ubuntu-latest + if: github.event_name == 'push' permissions: contents: read - name: Build Zilean Image + packages: write steps: - - name: Checkout code - uses: actions/checkout@v4.1.2 - - - name: Docker Setup QEMU - uses: docker/setup-qemu-action@v3 - id: qemu + - uses: actions/checkout@v4 + - uses: docker/setup-qemu-action@v3 with: platforms: amd64,arm64 - - - name: Login to Docker Hub - uses: docker/login-action@v3 + - uses: docker/setup-buildx-action@v3 + - uses: docker/login-action@v3 with: - username: ${{ vars.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3.2.0 - - - name: Build Docker Metadata - id: docker-metadata - uses: docker/metadata-action@v5 + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - uses: docker/metadata-action@v5 + id: meta with: - images: ${{ env.IMAGE_NAME }} - flavor: | - latest=auto + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | - type=ref,event=tag - type=sha,commit=${{ github.sha }} type=semver,pattern={{version}} + type=sha type=raw,value=latest,enable={{is_default_branch}} - - - name: Push Service Image to repo - uses: docker/build-push-action@v5 + - uses: docker/build-push-action@v5 with: context: . - file: ./Dockerfile push: true - provenance: mode=max - tags: ${{ steps.docker-metadata.outputs.tags }} - labels: ${{ steps.docker-metadata.outputs.labels }} platforms: linux/amd64,linux/arm64 - cache-from: type=gha,scope=${{ github.workflow }} - cache-to: type=gha,mode=max,scope=${{ github.workflow }} \ No newline at end of file + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max From 78eb72d3d72c48575a505761fe699167abf8257e Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 02:07:38 +0000 Subject: [PATCH 05/13] docs: update README with new configuration options Document all three database connection methods, GITHUB_TOKEN for DMM sync, and add docker-compose example. --- README.md | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 82 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c450830..724532f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# What is Zilean +# Zilean (Thoroslives Fork) zilean logo @@ -6,9 +6,87 @@ Zilean is a service that allows you to search for [DebridMediaManager](https://g This can then be configured as a Torznab indexer in your favorite content application. Newly added is the ability for Zilean to scrape from your running Zurg instance, and from other running Zilean instances. -Documentation for zilean can be viewed at [https://ipromknight.github.io/zilean/](https://ipromknight.github.io/zilean/) +Upstream documentation: [https://ipromknight.github.io/zilean/](https://ipromknight.github.io/zilean/) ---- +## Fork Changes +This fork (based on [iPromKnight/zilean](https://github.com/iPromKnight/zilean) v3.5.0) includes: -Buy Me a Coffee at ko-fi.com +- **Flexible database configuration** — supports `Zilean__Database__ConnectionString` env var (backwards compat), individual `POSTGRES_*` env vars, or sensible defaults. Uses `NpgsqlConnectionStringBuilder` for proper escaping of special characters in passwords. +- **Incremental DMM sync** — replaces the 1.2GB zip download with `git clone --depth 1` on first run and `git pull` on subsequent runs. Supports `GITHUB_TOKEN` for authenticated requests (5,000 req/hr vs 60). Includes exponential backoff retry. +- **Logging config preservation** — `logging.json` is only written if it doesn't exist, preserving user customizations across restarts. + +## Docker Image + +``` +ghcr.io/thoroslives/zilean:latest +``` + +## Configuration + +### Database Connection + +Three ways to configure the database connection (checked in this order): + +#### 1. Full Connection String (recommended for existing setups) + +```yaml +environment: + - Zilean__Database__ConnectionString=Host=postgres;Database=zilean;Username=postgres;Password=mypass;Include Error Detail=true;Timeout=30;CommandTimeout=3600; +``` + +#### 2. Individual Environment Variables + +```yaml +environment: + - POSTGRES_HOST=postgres # default: localhost + - POSTGRES_PORT=5432 # default: 5432 + - POSTGRES_DB=zilean # default: zilean + - POSTGRES_USER=postgres # default: postgres + - POSTGRES_PASSWORD=mypass # default: (empty) +``` + +#### 3. Defaults + +If no database env vars are set, connects to `localhost:5432/zilean` as `postgres` with no password (suitable for trust auth). + +### DMM Sync + +Set `GITHUB_TOKEN` to avoid GitHub API rate limiting during DMM hashlist sync: + +```yaml +environment: + - GITHUB_TOKEN=ghp_xxxxxxxxxxxx +``` + +## Docker Compose Example + +```yaml +services: + zilean: + image: ghcr.io/thoroslives/zilean:latest + container_name: zilean + restart: unless-stopped + ports: + - "8181:8181" + volumes: + - zilean-data:/app/data + environment: + - POSTGRES_HOST=postgres + - POSTGRES_PASSWORD=your_password + - GITHUB_TOKEN=ghp_xxxxxxxxxxxx # optional, recommended + + postgres: + image: postgres:16-alpine + container_name: zilean-postgres + restart: unless-stopped + volumes: + - zilean-pg:/var/lib/postgresql/data + environment: + - POSTGRES_DB=zilean + - POSTGRES_PASSWORD=your_password + +volumes: + zilean-data: + zilean-pg: +``` From a06c92609a3ac3cf13471800458ae41517758ed5 Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 02:08:42 +0000 Subject: [PATCH 06/13] ci: add workflow_dispatch trigger --- .github/workflows/cicd.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index 55d37a5..47a78f8 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -6,6 +6,7 @@ on: tags: ['v[0-9]+.[0-9]+.[0-9]+'] pull_request: branches: [main] + workflow_dispatch: env: REGISTRY: ghcr.io From 0b2c3a11a23e2ac2905fc2125b4cada70d80cd5b Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 03:17:06 +0000 Subject: [PATCH 07/13] docs: add PostgreSQL shm_size requirement --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 724532f..448bdd5 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,7 @@ services: image: postgres:16-alpine container_name: zilean-postgres restart: unless-stopped + shm_size: 256m # required — default 64m causes "No space left on device" during bulk upserts volumes: - zilean-pg:/var/lib/postgresql/data environment: From 47f37b7e8e784550958df65cabfdbb5676d4730a Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 03:19:25 +0000 Subject: [PATCH 08/13] ci: use GITHUB_TOKEN for release-please --- .github/workflows/release-please.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release-please.yaml b/.github/workflows/release-please.yaml index 623e2bc..a6fcf02 100644 --- a/.github/workflows/release-please.yaml +++ b/.github/workflows/release-please.yaml @@ -16,4 +16,4 @@ jobs: steps: - uses: googleapis/release-please-action@v4 with: - token: ${{ secrets.RELEASE_PLEASE_TOKEN }} \ No newline at end of file + token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file From 2be0427f8315faad7d9fa38741a0ecc7e5ff291c Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 03:31:42 +0000 Subject: [PATCH 09/13] docs: add PostgreSQL shared memory troubleshooting section --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 448bdd5..435287e 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,16 @@ environment: - GITHUB_TOKEN=ghp_xxxxxxxxxxxx ``` +### PostgreSQL Shared Memory + +PostgreSQL's default shared memory (`shm_size`) of 64MB is too small for Zilean's bulk DMM upserts. You'll get errors like: + +``` +could not resize shared memory segment "/PostgreSQL.xxx" to 67146560 bytes: No space left on device +``` + +Set `shm_size: 256m` on your PostgreSQL container to fix this. See the docker-compose example below. + ## Docker Compose Example ```yaml From 63a2da5670c6ddb8a3127bb7c4c9379602ea9003 Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 03:35:49 +0000 Subject: [PATCH 10/13] ci: only build Docker image on version tags --- .github/workflows/cicd.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml index 47a78f8..fa8990b 100644 --- a/.github/workflows/cicd.yaml +++ b/.github/workflows/cicd.yaml @@ -26,7 +26,7 @@ jobs: docker: needs: build-and-test runs-on: ubuntu-latest - if: github.event_name == 'push' + if: startsWith(github.ref, 'refs/tags/v') permissions: contents: read packages: write From ae735b8f911fde311391ddf0aaee653d9a5e9238 Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 05:51:49 +0000 Subject: [PATCH 11/13] =?UTF-8?q?feat:=20full=20issue=20remediation=20?= =?UTF-8?q?=E2=80=94=20security,=20bug=20fixes,=20improvements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses all 12 open upstream issues and adds 7 new improvements: Security & Stability: - Warn at startup if Postgres password is empty/default (#21, #89) - Retry DB connection 5x with clear error messages (#18, #22) Bug Fixes: - Guard on-demand-scrape behind EnableScraping flag (#17) - Adaptive similarity threshold for filtered search (#88) Features: - TZ timezone support via tzdata (#100) - Multi-instance deployment docs (#94) - Requirements clarification — no Elasticsearch (#7) - Resource usage documentation (#89) Improvements: - Startup config validation (cron, numerics, required fields) - Dockerfile HEALTHCHECK with /healthchecks/ready - Replace Process.Kill() with throw in BaseDapperService - Log swallowed exceptions in search endpoints - Readiness health check endpoint with DB connectivity - Remove deprecated ISystemClock - DMM sync progress reporting (every 60s) --- Dockerfile | 4 + README.md | 109 ++++- .../ApiKeyAuthenticationHandler.cs | 3 +- .../Features/Bootstrapping/StartupService.cs | 68 ++++ .../HealthChecks/HealthCheckEndpoints.cs | 22 + .../Features/Search/SearchEndpoints.cs | 21 +- .../Functions/SearchTorrentsMetaV6.cs | 178 ++++++++ ...0000_SearchV6FilteredThreshold.Designer.cs | 385 ++++++++++++++++++ ...0260325000000_SearchV6FilteredThreshold.cs | 24 ++ .../Services/BaseDapperService.cs | 2 +- .../Processing/DmmFileEntryProcessor.cs | 29 +- .../Ingestion/Processing/GenericProcessor.cs | 6 + .../Ingestion/Processing/ProcessedCounts.cs | 1 + .../Configuration/DatabaseConfiguration.cs | 17 + .../Configuration/ZileanConfiguration.cs | 40 ++ 15 files changed, 887 insertions(+), 22 deletions(-) create mode 100644 src/Zilean.Database/Functions/SearchTorrentsMetaV6.cs create mode 100644 src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.Designer.cs create mode 100644 src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.cs diff --git a/Dockerfile b/Dockerfile index 45dc547..207653a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,7 @@ RUN apk add --update --no-cache \ curl \ git \ icu-libs \ + tzdata \ && ln -sf python3 /usr/bin/python ENV DOTNET_RUNNING_IN_CONTAINER=true ENV DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=false @@ -35,4 +36,7 @@ RUN rm -rf /app/python || true && \ mkdir -p /app/python || true RUN pip3 install -r /app/requirements.txt -t /app/python +HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \ + CMD curl -f http://localhost:8181/healthchecks/ready || exit 1 + ENTRYPOINT ["./zilean-api"] diff --git a/README.md b/README.md index 435287e..d7671e8 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Zilean (Thoroslives Fork) +# Zilean (Maintained Fork) zilean logo @@ -6,15 +6,13 @@ Zilean is a service that allows you to search for [DebridMediaManager](https://g This can then be configured as a Torznab indexer in your favorite content application. Newly added is the ability for Zilean to scrape from your running Zurg instance, and from other running Zilean instances. -Upstream documentation: [https://ipromknight.github.io/zilean/](https://ipromknight.github.io/zilean/) +This is an actively maintained fork of [iPromKnight/zilean](https://github.com/iPromKnight/zilean) (v3.5.0, last upstream commit May 2025). -## Fork Changes +Upstream documentation: [https://ipromknight.github.io/zilean/](https://ipromknight.github.io/zilean/) -This fork (based on [iPromKnight/zilean](https://github.com/iPromKnight/zilean) v3.5.0) includes: +## Requirements -- **Flexible database configuration** — supports `Zilean__Database__ConnectionString` env var (backwards compat), individual `POSTGRES_*` env vars, or sensible defaults. Uses `NpgsqlConnectionStringBuilder` for proper escaping of special characters in passwords. -- **Incremental DMM sync** — replaces the 1.2GB zip download with `git clone --depth 1` on first run and `git pull` on subsequent runs. Supports `GITHUB_TOKEN` for authenticated requests (5,000 req/hr vs 60). Includes exponential backoff retry. -- **Logging config preservation** — `logging.json` is only written if it doesn't exist, preserving user customizations across restarts. +Zilean requires only **PostgreSQL 16+**. Elasticsearch is **NOT** required and was removed in v2.0. ## Docker Image @@ -22,6 +20,28 @@ This fork (based on [iPromKnight/zilean](https://github.com/iPromKnight/zilean) ghcr.io/thoroslives/zilean:latest ``` +## Fork Changes + +All changes beyond upstream v3.5.0: + +### v3.6.0 +- **Flexible database configuration** — supports `Zilean__Database__ConnectionString` env var (backwards compat), individual `POSTGRES_*` env vars, or sensible defaults. Uses `NpgsqlConnectionStringBuilder` for proper escaping of special characters in passwords. +- **Incremental DMM sync** — replaces the 1.2GB zip download with `git clone --depth 1` on first run and `git pull` on subsequent runs. Supports `GITHUB_TOKEN` for authenticated requests (5,000 req/hr vs 60). Includes exponential backoff retry. +- **Logging config preservation** — `logging.json` is only written if it doesn't exist, preserving user customizations across restarts. + +### v3.7.0 +- **Security hardening** — warns at startup if PostgreSQL password is empty or set to default "postgres". Docker-compose example no longer exposes Postgres ports. +- **Database startup resilience** — retries database connection up to 5 times with 5-second delays before running migrations. Clear error messages on failure including host and database name. +- **Filtered search fix** — `/dmm/filtered` with short query strings (e.g., "1923") combined with season/episode filters no longer returns 0 results. Similarity threshold is automatically lowered when structured filters provide precision. +- **Scraping toggle fix** — setting `EnableScraping=false` now correctly hides the on-demand-scrape endpoint while keeping search endpoints functional. +- **Timezone support** — set `TZ` env var (e.g., `TZ=Australia/Sydney`) to display log timestamps in your local timezone. `tzdata` package included in the image. +- **Readiness health check** — new `/healthchecks/ready` endpoint that verifies database connectivity. Used by the Dockerfile HEALTHCHECK for orchestrator integration. +- **HEALTHCHECK instruction** — Docker image includes a built-in health check (30s interval, 60s start period) so orchestrators can detect readiness. +- **Graceful error handling** — database errors no longer kill the process immediately (`Process.Kill()` replaced with proper exception propagation). Search errors are logged instead of silently swallowed. +- **Startup config validation** — validates configuration values (cron syntax, numeric ranges, required fields) at startup with clear error messages. +- **DMM sync progress reporting** — periodic progress logs during sync showing files processed, percentage complete, and new torrents found. +- **ISystemClock deprecation fix** — removed deprecated `ISystemClock` usage in authentication handler. + ## Configuration ### Database Connection @@ -59,6 +79,17 @@ environment: - GITHUB_TOKEN=ghp_xxxxxxxxxxxx ``` +The initial DMM sync is **resumable** — if interrupted, it picks up where it left off on next startup. Expected initial sync duration varies by hardware (typically 30min-2hrs for parsing, longer for IMDB matching). + +### Timezone + +Set the `TZ` environment variable to display log timestamps in your local timezone: + +```yaml +environment: + - TZ=Australia/Sydney +``` + ### PostgreSQL Shared Memory PostgreSQL's default shared memory (`shm_size`) of 64MB is too small for Zilean's bulk DMM upserts. You'll get errors like: @@ -69,6 +100,56 @@ could not resize shared memory segment "/PostgreSQL.xxx" to 67146560 bytes: No s Set `shm_size: 256m` on your PostgreSQL container to fix this. See the docker-compose example below. +## Security + +**Never expose your PostgreSQL port to the internet.** Multiple users have been compromised with crypto miners after exposing Postgres with default credentials. Zilean will warn you at startup if your database password is empty or set to the default "postgres". + +Best practices: +- Always set a strong `POSTGRES_PASSWORD` +- Do NOT add `ports:` to your Postgres container unless you need external access +- If you must expose Postgres, use a firewall to restrict access to trusted IPs +- Use Docker's internal networking — Zilean connects to Postgres by container name + +## Resource Usage + +- **Initial sync:** Expect high CPU for 10-30 minutes during the first DMM sync. This is normal — Zilean is parsing ~1.2M HTML files and performing bulk database upserts. Progress is logged periodically. +- **Subsequent syncs:** Lightweight. Only pulls new/changed files via `git pull` and processes the diff. +- **If high usage persists** after the initial sync completes: check for security compromise (see Security section above). Persistent high CPU with unfamiliar processes is a red flag. +- PostgreSQL requires `shm_size: 256m` for bulk operations (see PostgreSQL Shared Memory section). + +## Multi-Instance Deployment + +For high-availability or high-traffic setups, you can run multiple Zilean instances: + +- **1 scraper instance** (`Zilean__Dmm__EnableScraping=true`) — handles DMM sync and data ingestion +- **N API instances** (`Zilean__Dmm__EnableScraping=false`, `Zilean__Dmm__EnableEndpoint=true`) — serve search queries only +- All instances share the same PostgreSQL database +- `PreventOverlapping("SyncJobs")` prevents concurrent scraping within an instance +- PostgreSQL's default `max_connections=100` is sufficient for typical deployments + +## Health Checks + +- `/healthchecks/ping` — lightweight liveness check (always returns 200) +- `/healthchecks/ready` — readiness check that verifies database connectivity (returns 503 if DB is unreachable) + +## Troubleshooting + +### Database not found / "does not exist" + +Common causes: +- PostgreSQL hasn't finished initializing — Zilean now retries 5 times with 5-second delays +- Wrong credentials — check `POSTGRES_PASSWORD` matches between Zilean and Postgres containers +- Volume permissions — on Unraid/Synology, ensure the Postgres data volume has correct ownership + +### "could not resize shared memory segment" + +Set `shm_size: 256m` on your PostgreSQL container. See the docker-compose example. + +### Search returns 0 results + +- Ensure the initial DMM sync has completed (check logs for "DMM sync complete") +- For filtered searches with short titles, the similarity threshold is automatically adjusted + ## Docker Compose Example ```yaml @@ -83,19 +164,29 @@ services: - zilean-data:/app/data environment: - POSTGRES_HOST=postgres - - POSTGRES_PASSWORD=your_password + - POSTGRES_PASSWORD=your_strong_password_here - GITHUB_TOKEN=ghp_xxxxxxxxxxxx # optional, recommended + - TZ=UTC # optional, set your timezone + depends_on: + postgres: + condition: service_healthy postgres: image: postgres:16-alpine container_name: zilean-postgres restart: unless-stopped shm_size: 256m # required — default 64m causes "No space left on device" during bulk upserts + # Do NOT expose ports unless you need external access — see Security section volumes: - zilean-pg:/var/lib/postgresql/data environment: - POSTGRES_DB=zilean - - POSTGRES_PASSWORD=your_password + - POSTGRES_PASSWORD=your_strong_password_here + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres -d zilean"] + interval: 10s + timeout: 5s + retries: 5 volumes: zilean-data: diff --git a/src/Zilean.ApiService/Features/Authentication/ApiKeyAuthenticationHandler.cs b/src/Zilean.ApiService/Features/Authentication/ApiKeyAuthenticationHandler.cs index 3756af8..e661a29 100644 --- a/src/Zilean.ApiService/Features/Authentication/ApiKeyAuthenticationHandler.cs +++ b/src/Zilean.ApiService/Features/Authentication/ApiKeyAuthenticationHandler.cs @@ -4,9 +4,8 @@ public class ApiKeyAuthenticationHandler( IOptionsMonitor options, ILoggerFactory logger, UrlEncoder encoder, - ISystemClock clock, ZileanConfiguration configuration) - : AuthenticationHandler(options, logger, encoder, clock) + : AuthenticationHandler(options, logger, encoder) { protected override Task HandleAuthenticateAsync() { diff --git a/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs b/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs index 803b1c2..2343589 100644 --- a/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs +++ b/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs @@ -6,6 +6,9 @@ public class StartupService( IServiceProvider serviceProvider, ILoggerFactory loggerFactory) : IHostedLifecycleService { + private const int MaxRetries = 5; + private static readonly TimeSpan RetryDelay = TimeSpan.FromSeconds(5); + public Task StartAsync(CancellationToken cancellationToken) => Task.CompletedTask; public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask; @@ -13,6 +16,29 @@ public class StartupService( public async Task StartingAsync(CancellationToken cancellationToken) { var logger = loggerFactory.CreateLogger(); + + // Security check — warn about insecure Postgres credentials + if (configuration.Database.HasInsecurePassword()) + { + logger.LogWarning("SECURITY WARNING: PostgreSQL password is empty or set to the default 'postgres'. " + + "This is a security risk — if your database port is exposed, attackers can connect and compromise your system. " + + "Set a strong password via POSTGRES_PASSWORD or Zilean__Database__ConnectionString."); + } + + // Validate configuration before proceeding + var validationErrors = configuration.Validate(); + if (validationErrors.Count > 0) + { + foreach (var error in validationErrors) + { + logger.LogError("Configuration error: {Error}", error); + } + throw new InvalidOperationException($"Zilean configuration is invalid: {string.Join("; ", validationErrors)}"); + } + + // Wait for database with retry + await WaitForDatabaseAsync(logger, cancellationToken); + logger.LogInformation("Applying Migrations..."); await using var asyncScope = serviceProvider.CreateAsyncScope(); var dbContext = asyncScope.ServiceProvider.GetRequiredService(); @@ -20,6 +46,48 @@ public async Task StartingAsync(CancellationToken cancellationToken) logger.LogInformation("Migrations Applied."); } + private async Task WaitForDatabaseAsync(ILogger logger, CancellationToken cancellationToken) + { + var connectionString = configuration.Database.ConnectionString; + + for (var attempt = 1; attempt <= MaxRetries; attempt++) + { + try + { + await using var connection = new Npgsql.NpgsqlConnection(connectionString); + await connection.OpenAsync(cancellationToken); + logger.LogInformation("Database connection established."); + return; + } + catch (Exception ex) when (attempt < MaxRetries) + { + logger.LogWarning("Database connection attempt {Attempt}/{MaxRetries} failed: {Message}. Retrying in {Delay}s...", + attempt, MaxRetries, ex.Message, RetryDelay.TotalSeconds); + await Task.Delay(RetryDelay, cancellationToken); + } + catch (Exception ex) + { + logger.LogError(ex, "Failed to connect to database after {MaxRetries} attempts. " + + "Connection string: Host={Host}, Database={Database}. " + + "Check that PostgreSQL is running, the database exists, and credentials are correct.", + MaxRetries, GetConnectionHost(connectionString), GetConnectionDatabase(connectionString)); + throw; + } + } + } + + private static string GetConnectionHost(string connectionString) + { + try { return new Npgsql.NpgsqlConnectionStringBuilder(connectionString).Host ?? "unknown"; } + catch { return "unknown"; } + } + + private static string GetConnectionDatabase(string connectionString) + { + try { return new Npgsql.NpgsqlConnectionStringBuilder(connectionString).Database ?? "unknown"; } + catch { return "unknown"; } + } + public Task StoppedAsync(CancellationToken cancellationToken) => Task.CompletedTask; public Task StoppingAsync(CancellationToken cancellationToken) => Task.CompletedTask; diff --git a/src/Zilean.ApiService/Features/HealthChecks/HealthCheckEndpoints.cs b/src/Zilean.ApiService/Features/HealthChecks/HealthCheckEndpoints.cs index f9873c3..c2ee051 100644 --- a/src/Zilean.ApiService/Features/HealthChecks/HealthCheckEndpoints.cs +++ b/src/Zilean.ApiService/Features/HealthChecks/HealthCheckEndpoints.cs @@ -4,6 +4,7 @@ public static class HealthCheckEndpoints { private const string GroupName = "healthchecks"; private const string Ping = "/ping"; + private const string Ready = "/ready"; public static WebApplication MapHealthCheckEndpoints(this WebApplication app) { @@ -19,9 +20,30 @@ public static WebApplication MapHealthCheckEndpoints(this WebApplication app) private static RouteGroupBuilder HealthChecks(this RouteGroupBuilder group) { group.MapGet(Ping, RespondPong); + group.MapGet(Ready, CheckReadiness); return group; } private static string RespondPong(HttpContext context) => $"[{DateTime.UtcNow.ToString(CultureInfo.InvariantCulture)}]: Pong!"; + + private static async Task CheckReadiness(ZileanConfiguration configuration, ILogger logger) + { + try + { + await using var connection = new Npgsql.NpgsqlConnection(configuration.Database.ConnectionString); + await connection.OpenAsync(); + await using var cmd = connection.CreateCommand(); + cmd.CommandText = "SELECT 1"; + await cmd.ExecuteScalarAsync(); + return Results.Ok(new { status = "healthy", timestamp = DateTime.UtcNow }); + } + catch (Exception ex) + { + logger.LogWarning(ex, "Readiness check failed — database is not reachable"); + return Results.Json(new { status = "unhealthy", error = ex.Message, timestamp = DateTime.UtcNow }, statusCode: 503); + } + } + + private abstract class ReadinessCheck; } diff --git a/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs b/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs index 6dcdf24..1b2dd80 100644 --- a/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs +++ b/src/Zilean.ApiService/Features/Search/SearchEndpoints.cs @@ -13,14 +13,14 @@ public static WebApplication MapDmmEndpoints(this WebApplication app, ZileanConf { app.MapGroup(GroupName) .WithTags(GroupName) - .Dmm() + .Dmm(configuration) .DisableAntiforgery(); } return app; } - private static RouteGroupBuilder Dmm(this RouteGroupBuilder group) + private static RouteGroupBuilder Dmm(this RouteGroupBuilder group, ZileanConfiguration configuration) { group.MapPost(Search, PerformSearch) .Produces() @@ -30,9 +30,12 @@ private static RouteGroupBuilder Dmm(this RouteGroupBuilder group) .Produces() .AllowAnonymous(); - group.MapGet(Ingest, PerformOnDemandScrape) - .RequireAuthorization(ApiKeyAuthentication.Policy) - .WithMetadata(new OpenApiSecurityMetadata(ApiKeyAuthentication.Scheme)); + if (configuration.Dmm.EnableScraping) + { + group.MapGet(Ingest, PerformOnDemandScrape) + .RequireAuthorization(ApiKeyAuthentication.Policy) + .WithMetadata(new OpenApiSecurityMetadata(ApiKeyAuthentication.Scheme)); + } return group; } @@ -88,15 +91,15 @@ private static async Task> PerformSearch(HttpContext context, ? TypedResults.Ok(Array.Empty()) : TypedResults.Ok(results); } - catch + catch (Exception ex) { + logger.LogError(ex, "Unfiltered search failed for query: {QueryText}", queryRequest.QueryText); return TypedResults.Ok(Array.Empty()); } } private static async Task> PerformFilteredSearch(HttpContext context, ITorrentInfoService torrentInfoService, ZileanConfiguration configuration, ILogger logger, [AsParameters] SearchFilteredRequest request) { - try { logger.LogInformation("Performing filtered search for {@Request}", request); @@ -119,8 +122,10 @@ private static async Task> PerformFilteredSearch(HttpContext c ? TypedResults.Ok(Array.Empty()) : TypedResults.Ok(results); } - catch + catch (Exception ex) { + logger.LogError(ex, "Filtered search failed for query: {Query}, Season: {Season}, Episode: {Episode}", + request.Query, request.Season, request.Episode); return TypedResults.Ok(Array.Empty()); } } diff --git a/src/Zilean.Database/Functions/SearchTorrentsMetaV6.cs b/src/Zilean.Database/Functions/SearchTorrentsMetaV6.cs new file mode 100644 index 0000000..df3704e --- /dev/null +++ b/src/Zilean.Database/Functions/SearchTorrentsMetaV6.cs @@ -0,0 +1,178 @@ +namespace Zilean.Database.Functions; + +public class SearchTorrentsMetaV6 +{ + internal const string Create = + """ + CREATE OR REPLACE FUNCTION search_torrents_meta( + query TEXT DEFAULT NULL, + season INT DEFAULT NULL, + episode INT DEFAULT NULL, + year INT DEFAULT NULL, + language TEXT DEFAULT NULL, + resolution TEXT DEFAULT NULL, + imdbId TEXT DEFAULT NULL, + limit_param INT DEFAULT 20, + category TEXT DEFAULT NULL, + similarity_threshold REAL DEFAULT 0.85 + ) + RETURNS TABLE( + "InfoHash" TEXT, + "Resolution" TEXT, + "Year" INT, + "Remastered" BOOLEAN, + "Codec" TEXT, + "Audio" TEXT[], + "Quality" TEXT, + "Episodes" INT[], + "Seasons" INT[], + "Languages" TEXT[], + "ParsedTitle" TEXT, + "NormalizedTitle" TEXT, + "RawTitle" TEXT, + "Size" TEXT, + "Category" TEXT, + "Complete" BOOLEAN, + "Volumes" INT[], + "Hdr" TEXT[], + "Channels" TEXT[], + "Dubbed" BOOLEAN, + "Subbed" BOOLEAN, + "Edition" TEXT, + "BitDepth" TEXT, + "Bitrate" TEXT, + "Network" TEXT, + "Extended" BOOLEAN, + "Converted" BOOLEAN, + "Hardcoded" BOOLEAN, + "Region" TEXT, + "Ppv" BOOLEAN, + "Is3d" BOOLEAN, + "Site" TEXT, + "Proper" BOOLEAN, + "Repack" BOOLEAN, + "Retail" BOOLEAN, + "Upscaled" BOOLEAN, + "Unrated" BOOLEAN, + "Documentary" BOOLEAN, + "EpisodeCode" TEXT, + "Country" TEXT, + "Container" TEXT, + "Extension" TEXT, + "Torrent" BOOLEAN, + "Score" REAL, + "ImdbId" TEXT, + "ImdbCategory" TEXT, + "ImdbTitle" TEXT, + "ImdbYear" INT, + "ImdbAdult" BOOLEAN, + "IngestedAt" TIMESTAMPTZ + ) AS $$ + DECLARE + effective_threshold REAL; + has_filters BOOLEAN; + BEGIN + -- When structured filters are provided (season, episode, year, imdbId), + -- lower the similarity threshold since the filters themselves provide precision. + -- This fixes short query strings (e.g. "1923") returning 0 results when combined + -- with season/episode filters, because trigram similarity is unreliable for short strings. + has_filters := (season IS NOT NULL OR episode IS NOT NULL OR year IS NOT NULL OR imdbId IS NOT NULL); + + IF has_filters AND query IS NOT NULL AND length(query) <= 6 THEN + effective_threshold := similarity_threshold * 0.3; + ELSIF has_filters THEN + effective_threshold := similarity_threshold * 0.5; + ELSE + effective_threshold := similarity_threshold; + END IF; + + EXECUTE format('SET pg_trgm.similarity_threshold = %L', effective_threshold); + + RETURN QUERY + SELECT + t."InfoHash", + t."Resolution", + t."Year", + t."Remastered", + t."Codec", + t."Audio", + t."Quality", + t."Episodes", + t."Seasons", + t."Languages", + t."ParsedTitle", + t."NormalizedTitle", + t."RawTitle", + t."Size", + t."Category", + t."Complete", + t."Volumes", + t."Hdr", + t."Channels", + t."Dubbed", + t."Subbed", + t."Edition", + t."BitDepth", + t."Bitrate", + t."Network", + t."Extended", + t."Converted", + t."Hardcoded", + t."Region", + t."Ppv", + t."Is3d", + t."Site", + t."Proper", + t."Repack", + t."Retail", + t."Upscaled", + t."Unrated", + t."Documentary", + t."EpisodeCode", + t."Country", + t."Container", + t."Extension", + t."Torrent", + similarity(t."CleanedParsedTitle", query) AS "Score", + t."ImdbId", + i."Category" AS "ImdbCategory", + i."Title" AS "ImdbTitle", + i."Year" AS "ImdbYear", + i."Adult" AS "ImdbAdult", + t."IngestedAt" + FROM + public."Torrents" t + LEFT JOIN + public."ImdbFiles" i ON t."ImdbId" = i."ImdbId" + WHERE + Length(t."InfoHash") = 40 + AND + (category IS NULL OR t."Category" = category) + AND + (query IS NULL OR t."CleanedParsedTitle" % query) + AND (imdbId IS NULL OR t."ImdbId" = imdbId) + AND (season IS NULL OR season = ANY(t."Seasons")) + AND ( + (episode IS NULL AND season IS NOT NULL) + OR + ( + episode IS NOT NULL AND + season IS NOT NULL AND + (episode = ANY(t."Episodes") OR t."Episodes" IS NULL OR t."Episodes" = '{}') + ) + OR (season IS NULL AND episode IS NULL) + ) + AND (year IS NULL OR t."Year" BETWEEN year - 1 AND year + 1) + AND (language IS NULL OR language = ANY(t."Languages")) + AND (resolution IS NULL OR resolution = t."Resolution") + ORDER BY + "Score" DESC, + "IngestedAt" DESC + LIMIT + limit_param; + END; + $$ LANGUAGE plpgsql; + """; + + internal const string Remove = "DROP FUNCTION IF EXISTS search_torrents_meta(TEXT, INT, INT, INT, TEXT, TEXT, TEXT, INT, TEXT, REAL);"; +} diff --git a/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.Designer.cs b/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.Designer.cs new file mode 100644 index 0000000..34c4742 --- /dev/null +++ b/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.Designer.cs @@ -0,0 +1,385 @@ +// +using System; +using System.Text.Json; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; +using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata; +using Zilean.Database; + +#nullable disable + +namespace Zilean.Database.Migrations +{ + [DbContext(typeof(ZileanDbContext))] + [Migration("20260325000000_SearchV6FilteredThreshold")] + partial class SearchV6FilteredThreshold + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasAnnotation("ProductVersion", "9.0.0") + .HasAnnotation("Relational:MaxIdentifierLength", 63); + + NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder); + + modelBuilder.Entity("Zilean.Shared.Features.Blacklist.BlacklistedItem", b => + { + b.Property("InfoHash") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "info_hash"); + + b.Property("BlacklistedAt") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasDefaultValueSql("now() at time zone 'utc'") + .HasAnnotation("Relational:JsonPropertyName", "blacklisted_at"); + + b.Property("Reason") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "reason"); + + b.HasKey("InfoHash"); + + b.HasIndex("InfoHash") + .IsUnique(); + + b.ToTable("BlacklistedItems", (string)null); + }); + + modelBuilder.Entity("Zilean.Shared.Features.Dmm.ParsedPages", b => + { + b.Property("Page") + .HasColumnType("text"); + + b.Property("EntryCount") + .HasColumnType("integer"); + + b.HasKey("Page"); + + b.ToTable("ParsedPages", (string)null); + }); + + modelBuilder.Entity("Zilean.Shared.Features.Dmm.TorrentInfo", b => + { + b.Property("InfoHash") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "info_hash"); + + b.PrimitiveCollection("Audio") + .IsRequired() + .HasColumnType("text[]") + .HasAnnotation("Relational:JsonPropertyName", "audio"); + + b.Property("BitDepth") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "bit_depth"); + + b.Property("Bitrate") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "bitrate"); + + b.Property("Category") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "category"); + + b.PrimitiveCollection("Channels") + .IsRequired() + .HasColumnType("text[]") + .HasAnnotation("Relational:JsonPropertyName", "channels"); + + b.Property("CleanedParsedTitle") + .IsRequired() + .ValueGeneratedOnAdd() + .HasColumnType("text") + .HasDefaultValue("") + .HasAnnotation("Relational:JsonPropertyName", "cleaned_parsed_title"); + + b.Property("Codec") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "codec"); + + b.Property("Complete") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "complete"); + + b.Property("Container") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "container"); + + b.Property("Converted") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "converted"); + + b.Property("Country") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "country"); + + b.Property("Date") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "date"); + + b.Property("Documentary") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "documentary"); + + b.Property("Dubbed") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "dubbed"); + + b.Property("Edition") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "edition"); + + b.Property("EpisodeCode") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "episode_code"); + + b.PrimitiveCollection("Episodes") + .IsRequired() + .HasColumnType("integer[]") + .HasAnnotation("Relational:JsonPropertyName", "episodes"); + + b.Property("Extended") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "extended"); + + b.Property("Extension") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "extension"); + + b.Property("Group") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "group"); + + b.Property("Hardcoded") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "hardcoded"); + + b.PrimitiveCollection("Hdr") + .IsRequired() + .HasColumnType("text[]") + .HasAnnotation("Relational:JsonPropertyName", "hdr"); + + b.Property("ImdbId") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "imdb_id"); + + b.Property("IngestedAt") + .ValueGeneratedOnAdd() + .HasColumnType("timestamp with time zone") + .HasDefaultValueSql("now() at time zone 'utc'") + .HasAnnotation("Relational:JsonPropertyName", "ingested_at"); + + b.Property("Is3d") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "_3d"); + + b.Property("IsAdult") + .ValueGeneratedOnAdd() + .HasColumnType("boolean") + .HasDefaultValue(false) + .HasAnnotation("Relational:JsonPropertyName", "adult"); + + b.PrimitiveCollection("Languages") + .IsRequired() + .HasColumnType("text[]") + .HasAnnotation("Relational:JsonPropertyName", "languages"); + + b.Property("Network") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "network"); + + b.Property("NormalizedTitle") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "normalized_title"); + + b.Property("ParsedTitle") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "parsed_title"); + + b.Property("Ppv") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "ppv"); + + b.Property("Proper") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "proper"); + + b.Property("Quality") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "quality"); + + b.Property("RawTitle") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "raw_title"); + + b.Property("Region") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "region"); + + b.Property("Remastered") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "remastered"); + + b.Property("Repack") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "repack"); + + b.Property("Resolution") + .IsRequired() + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "resolution"); + + b.Property("Retail") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "retail"); + + b.PrimitiveCollection("Seasons") + .IsRequired() + .HasColumnType("integer[]") + .HasAnnotation("Relational:JsonPropertyName", "seasons"); + + b.Property("Site") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "site"); + + b.Property("Size") + .HasColumnType("text") + .HasAnnotation("Relational:JsonPropertyName", "size"); + + b.Property("Subbed") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "subbed"); + + b.Property("Torrent") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "torrent"); + + b.Property("Trash") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "trash"); + + b.Property("Unrated") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "unrated"); + + b.Property("Upscaled") + .HasColumnType("boolean") + .HasAnnotation("Relational:JsonPropertyName", "upscaled"); + + b.PrimitiveCollection("Volumes") + .IsRequired() + .HasColumnType("integer[]") + .HasAnnotation("Relational:JsonPropertyName", "volumes"); + + b.Property("Year") + .HasColumnType("integer") + .HasAnnotation("Relational:JsonPropertyName", "year"); + + b.HasKey("InfoHash"); + + b.HasIndex("CleanedParsedTitle") + .HasDatabaseName("idx_cleaned_parsed_title_trgm"); + + NpgsqlIndexBuilderExtensions.HasMethod(b.HasIndex("CleanedParsedTitle"), "GIN"); + NpgsqlIndexBuilderExtensions.HasOperators(b.HasIndex("CleanedParsedTitle"), new[] { "gin_trgm_ops" }); + + b.HasIndex("Episodes") + .HasDatabaseName("idx_episodes_gin"); + + NpgsqlIndexBuilderExtensions.HasMethod(b.HasIndex("Episodes"), "GIN"); + + b.HasIndex("ImdbId") + .HasDatabaseName("idx_torrents_imdbid"); + + b.HasIndex("InfoHash") + .IsUnique(); + + b.HasIndex("IngestedAt") + .IsDescending() + .HasDatabaseName("idx_ingested_at"); + + b.HasIndex("IsAdult") + .HasDatabaseName("idx_torrents_isadult"); + + b.HasIndex("Languages") + .HasDatabaseName("idx_languages_gin"); + + NpgsqlIndexBuilderExtensions.HasMethod(b.HasIndex("Languages"), "GIN"); + + b.HasIndex("Seasons") + .HasDatabaseName("idx_seasons_gin"); + + NpgsqlIndexBuilderExtensions.HasMethod(b.HasIndex("Seasons"), "GIN"); + + b.HasIndex("Trash") + .HasDatabaseName("idx_torrents_trash"); + + b.HasIndex("Year") + .HasDatabaseName("idx_year"); + + b.ToTable("Torrents", (string)null); + }); + + modelBuilder.Entity("Zilean.Shared.Features.Imdb.ImdbFile", b => + { + b.Property("ImdbId") + .HasColumnType("text"); + + b.Property("Adult") + .HasColumnType("boolean"); + + b.Property("Category") + .HasColumnType("text"); + + b.Property("Title") + .HasColumnType("text"); + + b.Property("Year") + .HasColumnType("integer"); + + b.HasKey("ImdbId"); + + b.HasIndex("ImdbId") + .IsUnique(); + + b.ToTable("ImdbFiles", (string)null); + + b.HasAnnotation("Relational:JsonPropertyName", "imdb"); + }); + + modelBuilder.Entity("Zilean.Shared.Features.Statistics.ImportMetadata", b => + { + b.Property("Key") + .HasColumnType("text"); + + b.Property("Value") + .IsRequired() + .HasColumnType("jsonb"); + + b.HasKey("Key"); + + b.ToTable("ImportMetadata", (string)null); + }); + + modelBuilder.Entity("Zilean.Shared.Features.Dmm.TorrentInfo", b => + { + b.HasOne("Zilean.Shared.Features.Imdb.ImdbFile", "Imdb") + .WithMany() + .HasForeignKey("ImdbId"); + + b.Navigation("Imdb"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.cs b/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.cs new file mode 100644 index 0000000..f35c837 --- /dev/null +++ b/src/Zilean.Database/Migrations/20260325000000_SearchV6FilteredThreshold.cs @@ -0,0 +1,24 @@ +using Microsoft.EntityFrameworkCore.Migrations; +using Zilean.Database.Functions; + +#nullable disable + +namespace Zilean.Database.Migrations; + +/// +public partial class SearchV6FilteredThreshold : Migration +{ + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + migrationBuilder.Sql(SearchTorrentsMetaV5.Remove); + migrationBuilder.Sql(SearchTorrentsMetaV6.Create); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.Sql(SearchTorrentsMetaV6.Remove); + migrationBuilder.Sql(SearchTorrentsMetaV5.Create); + } +} diff --git a/src/Zilean.Database/Services/BaseDapperService.cs b/src/Zilean.Database/Services/BaseDapperService.cs index f916600..b62e58f 100644 --- a/src/Zilean.Database/Services/BaseDapperService.cs +++ b/src/Zilean.Database/Services/BaseDapperService.cs @@ -16,7 +16,7 @@ protected async Task ExecuteCommandAsync(Func operation, catch (Exception ex) { logger.LogError(ex, "An error occurred while executing a command."); - Process.GetCurrentProcess().Kill(); + throw; } } diff --git a/src/Zilean.Scraper/Features/Ingestion/Processing/DmmFileEntryProcessor.cs b/src/Zilean.Scraper/Features/Ingestion/Processing/DmmFileEntryProcessor.cs index e7efff5..b70c656 100644 --- a/src/Zilean.Scraper/Features/Ingestion/Processing/DmmFileEntryProcessor.cs +++ b/src/Zilean.Scraper/Features/Ingestion/Processing/DmmFileEntryProcessor.cs @@ -28,6 +28,12 @@ public async Task ProcessFilesAsync(List files, CancellationToken cancel private async Task ProduceEntriesAsync(ChannelWriter> writer, CancellationToken cancellationToken) { + var totalFiles = _filesToProcess.Count; + var processedFiles = 0; + var skippedFiles = 0; + var newTorrentsFound = 0; + var lastProgressLog = Stopwatch.StartNew(); + foreach (var file in _filesToProcess) { if (cancellationToken.IsCancellationRequested) @@ -39,14 +45,15 @@ private async Task ProduceEntriesAsync(ChannelWriter> wr var fileName = Path.GetFileName(file); if (ExistingPages.TryGetValue(fileName, out _) || NewPages.TryGetValue(fileName, out _)) { + skippedFiles++; + processedFiles++; continue; } - _logger.LogInformation("Processing file: {FileName}", fileName); - try { var torrents = await ProcessPageAsync(file, fileName, cancellationToken); + newTorrentsFound += torrents.Count; foreach (var torrent in torrents) { await writer.WriteAsync(Task.FromResult(torrent), cancellationToken); @@ -56,6 +63,24 @@ private async Task ProduceEntriesAsync(ChannelWriter> wr { _logger.LogError(ex, "Error processing file: {FileName}", fileName); } + + processedFiles++; + + // Log progress every 60 seconds + if (lastProgressLog.Elapsed.TotalSeconds >= 60) + { + var percentage = totalFiles > 0 ? (double)processedFiles / totalFiles * 100 : 0; + _logger.LogInformation("DMM sync progress: {Processed}/{Total} files ({Percentage:F1}%), {Skipped} skipped, {NewTorrents} new torrents found", + processedFiles, totalFiles, percentage, skippedFiles, newTorrentsFound); + lastProgressLog.Restart(); + } + } + + // Final progress log + if (processedFiles > 0) + { + _logger.LogInformation("DMM sync complete: {Processed}/{Total} files processed, {Skipped} skipped, {NewTorrents} new torrents found", + processedFiles, totalFiles, skippedFiles, newTorrentsFound); } writer.Complete(); diff --git a/src/Zilean.Scraper/Features/Ingestion/Processing/GenericProcessor.cs b/src/Zilean.Scraper/Features/Ingestion/Processing/GenericProcessor.cs index 28ea7e4..b5be3f9 100644 --- a/src/Zilean.Scraper/Features/Ingestion/Processing/GenericProcessor.cs +++ b/src/Zilean.Scraper/Features/Ingestion/Processing/GenericProcessor.cs @@ -13,9 +13,11 @@ public abstract class GenericProcessor( protected readonly ZileanConfiguration _configuration = configuration; private HashSet _blacklistedHashes = []; private readonly ObjectPool> _torrentsListPool = new DefaultObjectPoolProvider().Create>(); + private int _batchNumber; protected async Task ProcessAsync(Func>, CancellationToken, Task> producerAction, CancellationToken cancellationToken) { + _batchNumber = 0; _blacklistedHashes = await torrentInfoService.GetBlacklistedItems(); var channel = Channel.CreateBounded>(new BoundedChannelOptions(_configuration.Parsing.BatchSize * 2) @@ -78,6 +80,7 @@ private async Task ConsumeAsync(ChannelReader> reader, Cancellation private async Task OnProcessTorrentsAsync(List> batch, CancellationToken cancellationToken) { + var currentBatch = Interlocked.Increment(ref _batchNumber); var torrents = _torrentsListPool.Get(); try @@ -116,6 +119,9 @@ private async Task OnProcessTorrentsAsync(List> batch, Cancellation await torrentInfoService.StoreTorrentInfo(finalizedTorrents); _processedCounts.AddProcessed(finalizedTorrents.Count); + + _logger.LogInformation("Batch {BatchNumber} complete: {NewCount} new torrents stored, {TotalProcessed} total processed so far", + currentBatch, finalizedTorrents.Count, _processedCounts.TotalProcessed); } } catch (OperationCanceledException) diff --git a/src/Zilean.Scraper/Features/Ingestion/Processing/ProcessedCounts.cs b/src/Zilean.Scraper/Features/Ingestion/Processing/ProcessedCounts.cs index 6d4a247..627567d 100644 --- a/src/Zilean.Scraper/Features/Ingestion/Processing/ProcessedCounts.cs +++ b/src/Zilean.Scraper/Features/Ingestion/Processing/ProcessedCounts.cs @@ -15,6 +15,7 @@ public void Reset() Interlocked.Exchange(ref _blacklistedRemoved, 0); } + public int TotalProcessed => _totalProcessed; public void AddProcessed(int count) => Interlocked.Add(ref _totalProcessed, count); public void AddAdultRemoved(int count) => Interlocked.Add(ref _adultRemoved, count); public void AddTrashRemoved(int count) => Interlocked.Add(ref _trashRemoved, count); diff --git a/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs b/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs index f6e74b6..5a715cf 100644 --- a/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/DatabaseConfiguration.cs @@ -37,4 +37,21 @@ public DatabaseConfiguration() ConnectionString = builder.ConnectionString; } + + /// + /// Returns true if the configured password is empty or a known insecure default. + /// + public bool HasInsecurePassword() + { + try + { + var parsed = new NpgsqlConnectionStringBuilder(ConnectionString); + return string.IsNullOrEmpty(parsed.Password) || + string.Equals(parsed.Password, "postgres", StringComparison.OrdinalIgnoreCase); + } + catch + { + return false; + } + } } diff --git a/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs b/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs index c062ab9..4ab04f9 100644 --- a/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs @@ -28,6 +28,46 @@ public static void EnsureExists() } } + /// + /// Validates the configuration and returns a list of error messages. Empty list means valid. + /// + public List Validate() + { + var errors = new List(); + + if (Dmm.MaxFilteredResults <= 0) + errors.Add("Dmm.MaxFilteredResults must be greater than 0"); + + if (Dmm.MinimumScoreMatch is < 0 or > 1) + errors.Add("Dmm.MinimumScoreMatch must be between 0 and 1"); + + if (Dmm.MinimumReDownloadIntervalMinutes < 0) + errors.Add("Dmm.MinimumReDownloadIntervalMinutes must be non-negative"); + + if (!IsValidCronExpression(Dmm.ScrapeSchedule)) + errors.Add($"Dmm.ScrapeSchedule '{Dmm.ScrapeSchedule}' is not a valid cron expression"); + + if (!IsValidCronExpression(Ingestion.ScrapeSchedule)) + errors.Add($"Ingestion.ScrapeSchedule '{Ingestion.ScrapeSchedule}' is not a valid cron expression"); + + if (Parsing.BatchSize <= 0) + errors.Add("Parsing.BatchSize must be greater than 0"); + + if (string.IsNullOrWhiteSpace(Database.ConnectionString)) + errors.Add("Database.ConnectionString is empty — check POSTGRES_* or Zilean__Database__ConnectionString env vars"); + + return errors; + } + + private static bool IsValidCronExpression(string? cron) + { + if (string.IsNullOrWhiteSpace(cron)) + return false; + + var parts = cron.Trim().Split(' ', StringSplitOptions.RemoveEmptyEntries); + return parts.Length == 5; + } + private static string DefaultConfigurationContents() { var mainSettings = new Dictionary From 3f194a1551cf57732470d19ff95c63d863877bba Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 05:54:21 +0000 Subject: [PATCH 12/13] fix: resolve code style violations (naming convention, braces) - Rename RetryDelay to _retryDelay (instance field prefix rule) - Add braces to all if statements in Validate() (IDE0011) - Add braces to IsValidCronExpression (IDE0011) --- .../Features/Bootstrapping/StartupService.cs | 6 +++--- .../Configuration/ZileanConfiguration.cs | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs b/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs index 2343589..213d9e9 100644 --- a/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs +++ b/src/Zilean.ApiService/Features/Bootstrapping/StartupService.cs @@ -7,7 +7,7 @@ public class StartupService( ILoggerFactory loggerFactory) : IHostedLifecycleService { private const int MaxRetries = 5; - private static readonly TimeSpan RetryDelay = TimeSpan.FromSeconds(5); + private static readonly TimeSpan _retryDelay = TimeSpan.FromSeconds(5); public Task StartAsync(CancellationToken cancellationToken) => Task.CompletedTask; @@ -62,8 +62,8 @@ private async Task WaitForDatabaseAsync(ILogger logger, CancellationToken cancel catch (Exception ex) when (attempt < MaxRetries) { logger.LogWarning("Database connection attempt {Attempt}/{MaxRetries} failed: {Message}. Retrying in {Delay}s...", - attempt, MaxRetries, ex.Message, RetryDelay.TotalSeconds); - await Task.Delay(RetryDelay, cancellationToken); + attempt, MaxRetries, ex.Message, _retryDelay.TotalSeconds); + await Task.Delay(_retryDelay, cancellationToken); } catch (Exception ex) { diff --git a/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs b/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs index 4ab04f9..479f99e 100644 --- a/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs +++ b/src/Zilean.Shared/Features/Configuration/ZileanConfiguration.cs @@ -36,25 +36,39 @@ public List Validate() var errors = new List(); if (Dmm.MaxFilteredResults <= 0) + { errors.Add("Dmm.MaxFilteredResults must be greater than 0"); + } if (Dmm.MinimumScoreMatch is < 0 or > 1) + { errors.Add("Dmm.MinimumScoreMatch must be between 0 and 1"); + } if (Dmm.MinimumReDownloadIntervalMinutes < 0) + { errors.Add("Dmm.MinimumReDownloadIntervalMinutes must be non-negative"); + } if (!IsValidCronExpression(Dmm.ScrapeSchedule)) + { errors.Add($"Dmm.ScrapeSchedule '{Dmm.ScrapeSchedule}' is not a valid cron expression"); + } if (!IsValidCronExpression(Ingestion.ScrapeSchedule)) + { errors.Add($"Ingestion.ScrapeSchedule '{Ingestion.ScrapeSchedule}' is not a valid cron expression"); + } if (Parsing.BatchSize <= 0) + { errors.Add("Parsing.BatchSize must be greater than 0"); + } if (string.IsNullOrWhiteSpace(Database.ConnectionString)) + { errors.Add("Database.ConnectionString is empty — check POSTGRES_* or Zilean__Database__ConnectionString env vars"); + } return errors; } @@ -62,7 +76,9 @@ public List Validate() private static bool IsValidCronExpression(string? cron) { if (string.IsNullOrWhiteSpace(cron)) + { return false; + } var parts = cron.Trim().Split(' ', StringSplitOptions.RemoveEmptyEntries); return parts.Length == 5; From e522ac3114ece049abe9e2ad3ad6754d7e47eea5 Mon Sep 17 00:00:00 2001 From: Thoroslives Date: Wed, 25 Mar 2026 07:20:44 +0000 Subject: [PATCH 13/13] docs: clean up formatting in README --- README.md | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index d7671e8..311a81c 100644 --- a/README.md +++ b/README.md @@ -25,22 +25,22 @@ ghcr.io/thoroslives/zilean:latest All changes beyond upstream v3.5.0: ### v3.6.0 -- **Flexible database configuration** — supports `Zilean__Database__ConnectionString` env var (backwards compat), individual `POSTGRES_*` env vars, or sensible defaults. Uses `NpgsqlConnectionStringBuilder` for proper escaping of special characters in passwords. -- **Incremental DMM sync** — replaces the 1.2GB zip download with `git clone --depth 1` on first run and `git pull` on subsequent runs. Supports `GITHUB_TOKEN` for authenticated requests (5,000 req/hr vs 60). Includes exponential backoff retry. -- **Logging config preservation** — `logging.json` is only written if it doesn't exist, preserving user customizations across restarts. +- **Flexible database configuration** - supports `Zilean__Database__ConnectionString` env var (backwards compat), individual `POSTGRES_*` env vars, or sensible defaults. Uses `NpgsqlConnectionStringBuilder` for proper escaping of special characters in passwords. +- **Incremental DMM sync** - replaces the 1.2GB zip download with `git clone --depth 1` on first run and `git pull` on subsequent runs. Supports `GITHUB_TOKEN` for authenticated requests (5,000 req/hr vs 60). Includes exponential backoff retry. +- **Logging config preservation** - `logging.json` is only written if it doesn't exist, preserving user customizations across restarts. ### v3.7.0 -- **Security hardening** — warns at startup if PostgreSQL password is empty or set to default "postgres". Docker-compose example no longer exposes Postgres ports. -- **Database startup resilience** — retries database connection up to 5 times with 5-second delays before running migrations. Clear error messages on failure including host and database name. -- **Filtered search fix** — `/dmm/filtered` with short query strings (e.g., "1923") combined with season/episode filters no longer returns 0 results. Similarity threshold is automatically lowered when structured filters provide precision. -- **Scraping toggle fix** — setting `EnableScraping=false` now correctly hides the on-demand-scrape endpoint while keeping search endpoints functional. -- **Timezone support** — set `TZ` env var (e.g., `TZ=Australia/Sydney`) to display log timestamps in your local timezone. `tzdata` package included in the image. -- **Readiness health check** — new `/healthchecks/ready` endpoint that verifies database connectivity. Used by the Dockerfile HEALTHCHECK for orchestrator integration. -- **HEALTHCHECK instruction** — Docker image includes a built-in health check (30s interval, 60s start period) so orchestrators can detect readiness. -- **Graceful error handling** — database errors no longer kill the process immediately (`Process.Kill()` replaced with proper exception propagation). Search errors are logged instead of silently swallowed. -- **Startup config validation** — validates configuration values (cron syntax, numeric ranges, required fields) at startup with clear error messages. -- **DMM sync progress reporting** — periodic progress logs during sync showing files processed, percentage complete, and new torrents found. -- **ISystemClock deprecation fix** — removed deprecated `ISystemClock` usage in authentication handler. +- **Security hardening** - warns at startup if PostgreSQL password is empty or set to default "postgres". Docker-compose example no longer exposes Postgres ports. +- **Database startup resilience** - retries database connection up to 5 times with 5-second delays before running migrations. Clear error messages on failure including host and database name. +- **Filtered search fix** - `/dmm/filtered` with short query strings (e.g., "1923") combined with season/episode filters no longer returns 0 results. Similarity threshold is automatically lowered when structured filters provide precision. +- **Scraping toggle fix** - setting `EnableScraping=false` now correctly hides the on-demand-scrape endpoint while keeping search endpoints functional. +- **Timezone support** - set `TZ` env var (e.g., `TZ=Australia/Sydney`) to display log timestamps in your local timezone. `tzdata` package included in the image. +- **Readiness health check** - new `/healthchecks/ready` endpoint that verifies database connectivity. Used by the Dockerfile HEALTHCHECK for orchestrator integration. +- **HEALTHCHECK instruction** - Docker image includes a built-in health check (30s interval, 60s start period) so orchestrators can detect readiness. +- **Graceful error handling** - database errors no longer kill the process immediately (`Process.Kill()` replaced with proper exception propagation). Search errors are logged instead of silently swallowed. +- **Startup config validation** - validates configuration values (cron syntax, numeric ranges, required fields) at startup with clear error messages. +- **DMM sync progress reporting** - periodic progress logs during sync showing files processed, percentage complete, and new torrents found. +- **ISystemClock deprecation fix** - removed deprecated `ISystemClock` usage in authentication handler. ## Configuration @@ -79,7 +79,7 @@ environment: - GITHUB_TOKEN=ghp_xxxxxxxxxxxx ``` -The initial DMM sync is **resumable** — if interrupted, it picks up where it left off on next startup. Expected initial sync duration varies by hardware (typically 30min-2hrs for parsing, longer for IMDB matching). +The initial DMM sync is **resumable** - if interrupted, it picks up where it left off on next startup. Expected initial sync duration varies by hardware (typically 30min-2hrs for parsing, longer for IMDB matching). ### Timezone @@ -108,11 +108,11 @@ Best practices: - Always set a strong `POSTGRES_PASSWORD` - Do NOT add `ports:` to your Postgres container unless you need external access - If you must expose Postgres, use a firewall to restrict access to trusted IPs -- Use Docker's internal networking — Zilean connects to Postgres by container name +- Use Docker's internal networking - Zilean connects to Postgres by container name ## Resource Usage -- **Initial sync:** Expect high CPU for 10-30 minutes during the first DMM sync. This is normal — Zilean is parsing ~1.2M HTML files and performing bulk database upserts. Progress is logged periodically. +- **Initial sync:** Expect high CPU for 10-30 minutes during the first DMM sync. This is normal - Zilean is parsing ~1.2M HTML files and performing bulk database upserts. Progress is logged periodically. - **Subsequent syncs:** Lightweight. Only pulls new/changed files via `git pull` and processes the diff. - **If high usage persists** after the initial sync completes: check for security compromise (see Security section above). Persistent high CPU with unfamiliar processes is a red flag. - PostgreSQL requires `shm_size: 256m` for bulk operations (see PostgreSQL Shared Memory section). @@ -121,25 +121,25 @@ Best practices: For high-availability or high-traffic setups, you can run multiple Zilean instances: -- **1 scraper instance** (`Zilean__Dmm__EnableScraping=true`) — handles DMM sync and data ingestion -- **N API instances** (`Zilean__Dmm__EnableScraping=false`, `Zilean__Dmm__EnableEndpoint=true`) — serve search queries only +- **1 scraper instance** (`Zilean__Dmm__EnableScraping=true`) - handles DMM sync and data ingestion +- **N API instances** (`Zilean__Dmm__EnableScraping=false`, `Zilean__Dmm__EnableEndpoint=true`) - serve search queries only - All instances share the same PostgreSQL database - `PreventOverlapping("SyncJobs")` prevents concurrent scraping within an instance - PostgreSQL's default `max_connections=100` is sufficient for typical deployments ## Health Checks -- `/healthchecks/ping` — lightweight liveness check (always returns 200) -- `/healthchecks/ready` — readiness check that verifies database connectivity (returns 503 if DB is unreachable) +- `/healthchecks/ping` - lightweight liveness check (always returns 200) +- `/healthchecks/ready` - readiness check that verifies database connectivity (returns 503 if DB is unreachable) ## Troubleshooting ### Database not found / "does not exist" Common causes: -- PostgreSQL hasn't finished initializing — Zilean now retries 5 times with 5-second delays -- Wrong credentials — check `POSTGRES_PASSWORD` matches between Zilean and Postgres containers -- Volume permissions — on Unraid/Synology, ensure the Postgres data volume has correct ownership +- PostgreSQL hasn't finished initializing - Zilean now retries 5 times with 5-second delays +- Wrong credentials - check `POSTGRES_PASSWORD` matches between Zilean and Postgres containers +- Volume permissions - on Unraid/Synology, ensure the Postgres data volume has correct ownership ### "could not resize shared memory segment" @@ -175,8 +175,8 @@ services: image: postgres:16-alpine container_name: zilean-postgres restart: unless-stopped - shm_size: 256m # required — default 64m causes "No space left on device" during bulk upserts - # Do NOT expose ports unless you need external access — see Security section + shm_size: 256m # required - default 64m causes "No space left on device" during bulk upserts + # Do NOT expose ports unless you need external access - see Security section volumes: - zilean-pg:/var/lib/postgresql/data environment: