diff --git a/.gitignore b/.gitignore index 2edf3e7..f463e50 100644 --- a/.gitignore +++ b/.gitignore @@ -42,12 +42,6 @@ client_secrets.json credentials.json token.json -# Google Drive API key (copy config.example.json → config.json and fill in your key) -config.json - -# Generated by build_exe.py — contains the XOR-obfuscated API key; never commit -src/_bundled_key.py - # Generated by build_exe.py — contains build-time flags; never commit gui/_build_flags.py diff --git a/CLAUDE.md b/CLAUDE.md index 7dc2937..0f3927a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -93,11 +93,7 @@ MPCFillToPDF/ ## Key implementation notes ### Image download -- **Primary (API key configured):** `GET https://www.googleapis.com/drive/v3/files/{id}?alt=media&key={KEY}` via `requests`. Works for public Drive files; avoids anonymous rate limiting (HTTP 429). The key is read from `config.json` in dev and from the XOR-obfuscated `src/_bundled_key.py` module in the .exe. -- **Fallback (no API key):** `gdown.download(f"https://drive.google.com/uc?id={drive_id}", ...)` — the original behaviour; may hit rate limits on large batches. -- `src/config.py` → `get_drive_api_key()` handles resolution order (bundled → config.json → None). -- `src/_bundled_key.py` is generated by `build_exe.py` at build time and deleted afterwards; it is gitignored and never committed. -- `config.json` (gitignored) is the dev-time key store; `config.example.json` is the committed template. +- `GET https://lh4.googleusercontent.com/d/{id}=d` via `requests` with `stream=True`. Works for both "Anyone with the link" and "Public on the web" Drive sharing modes; returns the original file unchanged (same approach as mpc-autofill). - Download with 5 parallel threads (matches mpc-autofill behaviour) ### Image cropping @@ -168,4 +164,4 @@ MPCFillToPDF/ - Mock only at module boundaries (`patch("src.pipeline.download_all")`), never inside `src/`. - Use `tmp_path` (pytest built-in) for all temporary files. - Group related tests in a class named `TestFeatureName`; keep each test focused on one behaviour. -- Run `python -m pytest tests/ --ignore=tests/test_downloader.py` to run the suite without network-dependent tests. All 170 tests must pass before committing. +- Run `python -m pytest tests/` to run the full suite. All tests must pass before committing. diff --git a/build_exe.py b/build_exe.py index fae7325..2de9220 100644 --- a/build_exe.py +++ b/build_exe.py @@ -9,15 +9,6 @@ folder and it will create `MPCFillToPDF/archivos generados/` and `MPCFillToPDF/procesamiento/` next to itself. -API key embedding ------------------ -If `config.json` contains a valid `google_drive_api_key`, it is XOR-encoded -with a random mask and baked into `src/_bundled_key.py` before packaging. -PyInstaller bundles that module; at runtime the app decodes and uses the key -in memory. The key never appears as a plain string inside the binary. - -`src/_bundled_key.py` is listed in .gitignore and deleted after the build. - Debug logging ------------- Pass `--debug-logging` to bake `gui/_build_flags.py` with `DEBUG_LOGGING = True`. @@ -60,8 +51,6 @@ """ import argparse -import json -import os import shutil import subprocess import sys @@ -74,63 +63,9 @@ ICONS = ROOT / "icons" RESOURCES = ROOT / "resources" VERSION_FILE = ROOT / "version_file.txt" -BUNDLED_KEY_PATH = ROOT / "src" / "_bundled_key.py" BUILD_FLAGS_PATH = ROOT / "gui" / "_build_flags.py" -def _embed_api_key() -> bool: - """Read API key from config.json or DRIVE_API_KEY env var, XOR-encode it, write src/_bundled_key.py. - - Returns True if a key was successfully embedded, False otherwise. - The file must be deleted after the build regardless. - """ - key = "" - - config_path = ROOT / "config.json" - if config_path.exists(): - try: - data = json.loads(config_path.read_text(encoding="utf-8")) - key = str(data.get("google_drive_api_key", "")).strip() - if key.startswith("YOUR_"): - key = "" - except Exception as exc: - print(f"WARNING: Could not parse config.json ({exc}).") - - if not key: - key = os.environ.get("DRIVE_API_KEY", "").strip() - - if not key: - print( - "WARNING: No API key found (config.json or DRIVE_API_KEY env var) — " - "API key will NOT be embedded." - ) - return False - - key_bytes = key.encode("utf-8") - mask = os.urandom(len(key_bytes)) - encoded = bytes(a ^ b for a, b in zip(key_bytes, mask)) - - BUNDLED_KEY_PATH.write_text( - "# Auto-generated by build_exe.py — do not commit.\n" - f"_E = {encoded!r}\n" - f"_M = {mask!r}\n" - "\n" - "def _get_key() -> str:\n" - " return bytes(a ^ b for a, b in zip(_E, _M)).decode('utf-8')\n", - encoding="utf-8", - ) - print(f"API key embedded (XOR-obfuscated) -> {BUNDLED_KEY_PATH.name}") - return True - - -def _remove_bundled_key() -> None: - try: - BUNDLED_KEY_PATH.unlink() - print(f"Cleaned up {BUNDLED_KEY_PATH.name}") - except FileNotFoundError: - pass - - def _write_build_flags(debug_logging: bool) -> None: BUILD_FLAGS_PATH.write_text( f"# Auto-generated by build_exe.py — do not commit.\nDEBUG_LOGGING = {debug_logging}\n", @@ -172,7 +107,6 @@ def main() -> None: sys.exit(1) _check_paths() - _embed_api_key() _write_build_flags(cli_args.debug_logging) sep = ";" if sys.platform == "win32" else ":" @@ -191,7 +125,6 @@ def main() -> None: f"--add-data={RESOURCES}{sep}resources", "--hidden-import=PIL.Image", "--hidden-import=reportlab.pdfgen", - "--hidden-import=gdown", # Strip stdlib modules that are never imported at runtime. # Reduces archive surface scanned by AV heuristics. "--exclude-module=unittest", @@ -215,7 +148,6 @@ def main() -> None: try: subprocess.run(args, check=True, cwd=ROOT) finally: - _remove_bundled_key() _remove_build_flags() suffix = ".exe" if sys.platform == "win32" else "" diff --git a/cli/main.py b/cli/main.py index 933e0b7..363bf51 100644 --- a/cli/main.py +++ b/cli/main.py @@ -66,7 +66,7 @@ def _progress(stage: str, done: int, total: int) -> None: def _cleanup(workdir: Path) -> None: - for sub in ("raw", "bled"): + for sub in ("raw", "bled", "scryfall"): target = workdir / sub if target.exists(): shutil.rmtree(target) @@ -80,9 +80,11 @@ def _print_permission_error(e: DownloadPermissionError) -> None: if e.position: print(f" Posición en el PDF: {e.position}", file=sys.stderr) print(file=sys.stderr) - print("Esto no es un fallo del programa.", file=sys.stderr) - print("La imagen ha perdido los permisos de acceso público en Google Drive.", file=sys.stderr) - print("Pide al creador del proxy que restaure los permisos.", file=sys.stderr) + print("Esto no es un fallo del programa. Posibles causas:", file=sys.stderr) + print(" • El archivo solo permite descarga con cuenta de Google", file=sys.stderr) + print(" («Cualquiera con el enlace» no basta para descarga anónima).", file=sys.stderr) + print(" • Le han quitado los permisos de acceso público.", file=sys.stderr) + print("Pide al creador que comparta las imágenes como «Público en Internet».", file=sys.stderr) def _setup_logging(log_path: Path, verbose: bool) -> None: diff --git a/config.example.json b/config.example.json deleted file mode 100644 index 4a870e0..0000000 --- a/config.example.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "google_drive_api_key": "YOUR_GOOGLE_DRIVE_API_KEY_HERE" -} diff --git a/gui/main.py b/gui/main.py index 28c5769..0f1882b 100644 --- a/gui/main.py +++ b/gui/main.py @@ -1247,8 +1247,13 @@ def _handle(self, ev: tuple) -> None: names = ", ".join(f"«{name}»" for _, name in perm_errors[:3]) more = f" y {len(perm_errors) - 3} más" if len(perm_errors) > 3 else "" parts.append( - f"{len(perm_errors)} imagen(es) sin permiso de descarga: {names}{more}.\n" - "Pide al creador del proxy que restaure los permisos de Google Drive." + f"{len(perm_errors)} imagen(es) no se pudieron descargar: {names}{more}.\n" + "Posibles causas:\n" + " • El archivo en Google Drive solo permite descarga con cuenta de Google " + "(«Cualquiera con el enlace» no basta para descarga anónima).\n" + " • Le han quitado los permisos de acceso público.\n" + "Pide al creador del proxy que comparta las imágenes como " + "«Público en Internet» en Google Drive." ) if timeout_errors: names = ", ".join(f"«{name}»" for _, name in timeout_errors[:3]) @@ -1325,9 +1330,12 @@ def _handle(self, ev: tuple) -> None: else: parts[0] += "." parts.append( - "Esto no es un fallo del programa: le han quitado los permisos " - "de acceso público a la imagen en Google Drive.\n" - "Pide al creador del proxy que restaure los permisos." + "Esto no es un fallo del programa. Posibles causas:\n" + " • El archivo en Google Drive solo permite descarga con cuenta de Google " + "(«Cualquiera con el enlace» no basta para descarga anónima).\n" + " • Le han quitado los permisos de acceso público.\n" + "Pide al creador del proxy que comparta las imágenes como " + "«Público en Internet» en Google Drive." ) self.status_var.set("Error de descarga.") self._finish_running() diff --git a/requirements.txt b/requirements.txt index c2076b3..938a0f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ Pillow>=10.0,<12 reportlab>=4.0,<5 -gdown>=5.0,<6 requests>=2.28,<3 windnd>=1.0,<2 plyer>=2.0,<3 diff --git a/src/app_settings.py b/src/app_settings.py index 4cac72e..52ca695 100644 --- a/src/app_settings.py +++ b/src/app_settings.py @@ -83,6 +83,8 @@ def save_settings(settings: AppSettings, base_dir: Path) -> None: "cut_line_over_fronts": settings.cut_line_over_fronts, "cut_line_over_backs": settings.cut_line_over_backs, } - path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8") + tmp = path.with_suffix(".tmp") + tmp.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8") + tmp.replace(path) except Exception as exc: _log.warning("Could not save settings.json: %s", exc) diff --git a/src/config.py b/src/config.py deleted file mode 100644 index 0c3306e..0000000 --- a/src/config.py +++ /dev/null @@ -1,37 +0,0 @@ -import json -import logging -from pathlib import Path - -_log = logging.getLogger(__name__) -_PROJECT_ROOT = Path(__file__).resolve().parent.parent - - -def get_drive_api_key() -> str | None: - """Return the Google Drive API key, or None if not configured. - - Resolution order: - 1. src._bundled_key — generated by build_exe.py and bundled into the .exe - (XOR-obfuscated so the key is not a plain string in the binary). - 2. config.json in the project root — used when running from source. - This file must NOT be committed; copy config.example.json to get started. - """ - try: - from src._bundled_key import _get_key # noqa: PLC0415 - - key = _get_key() - if key: - return key - except ImportError: - pass - - config_path = _PROJECT_ROOT / "config.json" - if config_path.exists(): - try: - data = json.loads(config_path.read_text(encoding="utf-8")) - key = str(data.get("google_drive_api_key", "")).strip() - if key and not key.startswith("YOUR_"): - return key - except Exception as exc: - _log.warning("Could not read config.json: %s", exc) - - return None diff --git a/src/constants.py b/src/constants.py index 27a8837..7ec6f27 100644 --- a/src/constants.py +++ b/src/constants.py @@ -17,5 +17,6 @@ class Stage(str, Enum): ProgressCallback = Callable[[int, int], None] | None StageCallback = Callable[[str, int, int], None] | None +JobPdfStartCallback = Callable[[int, int, str], None] | None SpeedCallback = Callable[[float, float], None] | None ImageDoneCallback = Callable[[str], None] | None diff --git a/src/cropper.py b/src/cropper.py index 6566e17..a1fbda3 100644 --- a/src/cropper.py +++ b/src/cropper.py @@ -1,5 +1,4 @@ import logging -import math from pathlib import Path from PIL import Image, ImageOps @@ -21,8 +20,6 @@ _CORNER_RADIUS_FRAC = 0.04 # Minimum luminance difference to consider a corner pixel "anomalous" _CORNER_LUMA_THRESHOLD = 60 -# Offset past the corner zone where border color is sampled -_CORNER_SAMPLE_OFFSET = 1.0 def _luminance(r: int, g: int, b: int) -> float: @@ -83,45 +80,44 @@ def _fill_rounded_corners(img: Image.Image) -> Image.Image: """ w, h = img.size radius = max(1, round(min(w, h) * _CORNER_RADIUS_FRAC)) - - # corner_name → (origin_x, origin_y, dx_sign, dy_sign) - corners = { - "tl": (0, 0, 1, 1), - "tr": (w - 1, 0, -1, 1), - "bl": (0, h - 1, 1, -1), - "br": (w - 1, h - 1, -1, -1), - } + radius_sq = radius * radius + + # Precompute quarter-circle offsets once (integer comparison, no sqrt) + offsets = [ + (dx, dy) + for dy in range(radius + 1) + for dx in range(radius + 1) + if dx * dx + dy * dy <= radius_sq + ] + + corner_defs = ( + ("tl", 0, 0, 1, 1), + ("tr", w - 1, 0, -1, 1), + ("bl", 0, h - 1, 1, -1), + ("br", w - 1, h - 1, -1, -1), + ) any_filled = False pixels = img.load() - for name, (ox, oy, sx, sy) in corners.items(): + for name, ox, oy, sx, sy in corner_defs: border_color = _sample_border_color(img, name, radius) border_luma = _luminance(*border_color) - filled_this = False - for dy in range(radius): - for dx in range(radius): - # Distance from the corner vertex - dist = math.sqrt(dx * dx + dy * dy) - if dist > radius * _CORNER_SAMPLE_OFFSET: - continue - px = ox + dx * sx - py = oy + dy * sy - if not (0 <= px < w and 0 <= py < h): - continue - r, g, b = pixels[px, py][:3] - luma = _luminance(r, g, b) - if abs(luma - border_luma) > _CORNER_LUMA_THRESHOLD: - pixels[px, py] = border_color - filled_this = True - + for dx, dy in offsets: + px = ox + dx * sx + py = oy + dy * sy + if not (0 <= px < w and 0 <= py < h): + continue + r, g, b = pixels[px, py][:3] + if abs(_luminance(r, g, b) - border_luma) > _CORNER_LUMA_THRESHOLD: + pixels[px, py] = border_color + filled_this = True if filled_this: any_filled = True if any_filled: _log.debug("Filled rounded corners") - return img diff --git a/src/downloader.py b/src/downloader.py index b6cbb11..4677b84 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -1,4 +1,3 @@ -import functools import logging import os import threading @@ -7,20 +6,13 @@ from pathlib import Path from threading import Event -import gdown import requests from src.cancellation import Cancelled -from src.config import get_drive_api_key from src.constants import ImageDoneCallback, ProgressCallback, SpeedCallback _log = logging.getLogger(__name__) -try: - from gdown.exceptions import FileURLRetrievalError as _GdownPermissionError -except ImportError: - _GdownPermissionError = None - THREADS = 5 _MAX_RETRIES = 4 _INITIAL_BACKOFF = 1.0 # seconds; doubles on each retry (1 → 2 → 4 → 8) @@ -32,30 +24,6 @@ _CONNECT_TIMEOUT = 10 # seconds to establish the TCP connection _READ_TIMEOUT = 30 # seconds without receiving any data -# Loaded once at import time; None means fall back to gdown. -_DRIVE_API_KEY: str | None = get_drive_api_key() - -if _DRIVE_API_KEY: - _log.info("Google Drive API key loaded — using Drive API v3 for downloads.") -else: - _log.info("No Google Drive API key found — falling back to gdown.") - - -def _install_download_timeout() -> None: - """Patch requests.Session so every request gdown makes has a timeout. - Without this, gdown can hang indefinitely when Drive stops responding.""" - orig = requests.Session.request - - @functools.wraps(orig) - def _with_timeout(self, method, url, **kwargs): - kwargs.setdefault("timeout", (_CONNECT_TIMEOUT, _READ_TIMEOUT)) - return orig(self, method, url, **kwargs) - - requests.Session.request = _with_timeout - - -_install_download_timeout() - class DownloadRateLimitError(Exception): """Raised when Google Drive rate-limits us and all retries are exhausted.""" @@ -111,12 +79,8 @@ def __init__(self, drive_id: str, card_name: str) -> None: super().__init__(f"Tiempo de espera agotado para '{card_name}' (ID: {drive_id})") -def _gdown_url(drive_id: str) -> str: - return f"https://drive.google.com/uc?id={drive_id}" - - -def _drive_api_url(drive_id: str, api_key: str) -> str: - return f"https://www.googleapis.com/drive/v3/files/{drive_id}?alt=media&key={api_key}" +def _lh4_url(drive_id: str) -> str: + return f"https://lh4.googleusercontent.com/d/{drive_id}=d" def _is_rate_limit_error(exc: Exception) -> bool: @@ -139,18 +103,6 @@ def _is_rate_limit_error(exc: Exception) -> bool: return False -def _is_permission_error(exc: Exception) -> bool: - if _GdownPermissionError is not None and isinstance(exc, _GdownPermissionError): - return True - if "FileURLRetrievalError" in type(exc).__name__: - return True - if isinstance(exc, requests.HTTPError): - status = getattr(getattr(exc, "response", None), "status_code", None) - if status in (403, 404): - return True - return False - - def _safe_unlink(path: Path) -> None: try: path.unlink() @@ -158,16 +110,6 @@ def _safe_unlink(path: Path) -> None: pass -def _download_with_api(drive_id: str, output_path: Path, api_key: str) -> None: - """Download a Drive file via the v3 API (requires a valid API key).""" - url = _drive_api_url(drive_id, api_key) - resp = requests.get(url, stream=True, timeout=(_CONNECT_TIMEOUT, _READ_TIMEOUT)) - resp.raise_for_status() - with output_path.open("wb") as fh: - for chunk in resp.iter_content(chunk_size=65536): - fh.write(chunk) - - def download_image(drive_id: str, dest_dir: Path, filename: str) -> Path: dest_dir.mkdir(parents=True, exist_ok=True) suffix = Path(filename).suffix or ".jpg" @@ -182,10 +124,15 @@ def download_image(drive_id: str, dest_dir: Path, filename: str) -> Path: delay = _INITIAL_BACKOFF for attempt in range(_MAX_RETRIES + 1): try: - if _DRIVE_API_KEY: - _download_with_api(drive_id, tmp_path, _DRIVE_API_KEY) - else: - gdown.download(_gdown_url(drive_id), str(tmp_path), quiet=True) + resp = requests.get( + _lh4_url(drive_id), + stream=True, + timeout=(_CONNECT_TIMEOUT, _READ_TIMEOUT), + ) + resp.raise_for_status() + with tmp_path.open("wb") as fh: + for chunk in resp.iter_content(chunk_size=65536): + fh.write(chunk) tmp_path.replace(output_path) _log.debug("Downloaded: %s", output_path.name) return output_path @@ -193,31 +140,10 @@ def download_image(drive_id: str, dest_dir: Path, filename: str) -> Path: _safe_unlink(tmp_path) _log.error("Timeout downloading %s (%s)", filename, drive_id) raise DownloadTimeoutError(drive_id, filename) - except Exception as exc: + except requests.HTTPError as exc: _safe_unlink(tmp_path) - if _is_permission_error(exc) and _DRIVE_API_KEY: - # Drive API v3 with an API key only works for "Public on the web" files. - # Files shared as "Anyone with the link" return 403 via the API but are - # downloadable via gdown (which follows Google's web redirect flow). - # Fall back to gdown before declaring a permission failure. - _log.info("API 403 for %s — retrying via gdown fallback", filename) - try: - gdown.download(_gdown_url(drive_id), str(tmp_path), quiet=True) - tmp_path.replace(output_path) - _log.debug("Downloaded via gdown fallback: %s", output_path.name) - return output_path - except Exception as gdown_exc: - _safe_unlink(tmp_path) - if _is_permission_error(gdown_exc): - _log.error( - "Permission denied (gdown also failed): %s (%s)", filename, drive_id - ) - raise DownloadPermissionError(drive_id, filename) from gdown_exc - if isinstance(gdown_exc, requests.exceptions.Timeout | TimeoutError): - _log.error("Timeout via gdown fallback: %s (%s)", filename, drive_id) - raise DownloadTimeoutError(drive_id, filename) from gdown_exc - raise gdown_exc - if _is_permission_error(exc): + status = getattr(getattr(exc, "response", None), "status_code", None) + if status in (403, 404): _log.error("Permission denied: %s (%s)", filename, drive_id) raise DownloadPermissionError(drive_id, filename) from exc if _is_rate_limit_error(exc): @@ -235,6 +161,23 @@ def download_image(drive_id: str, dest_dir: Path, filename: str) -> Path: _log.error("Rate limit exhausted for %s", drive_id) raise DownloadRateLimitError() from exc raise + except Exception as exc: + _safe_unlink(tmp_path) + if _is_rate_limit_error(exc): + if attempt < _MAX_RETRIES: + _log.warning( + "Rate limited on %s, retry %d/%d in %.0fs", + drive_id, + attempt + 1, + _MAX_RETRIES, + delay, + ) + time.sleep(delay) + delay *= 2 + continue + _log.error("Rate limit exhausted for %s", drive_id) + raise DownloadRateLimitError() from exc + raise raise DownloadRateLimitError() @@ -255,7 +198,7 @@ def download_all( on_speed_update(speed_mbps, eta_sec) is called after each download with running speed and estimated remaining time (both floats); only called after ≥0.1 s elapsed. If `cancel_event` is provided and gets set mid-run, pending downloads are - cancelled, in-flight ones are awaited (gdown is uninterruptible), and the + cancelled, in-flight ones are awaited before the executor shuts down, and the function raises `Cancelled` once the executor has joined. """ dest_dir = Path(dest_dir) diff --git a/src/pdf_generator.py b/src/pdf_generator.py index 004bb98..1dfff0b 100644 --- a/src/pdf_generator.py +++ b/src/pdf_generator.py @@ -268,7 +268,7 @@ def id_bytes(drive_id: str) -> int: if cancel_event is not None and cancel_event.is_set(): raise Cancelled() suffix = f"_{idx}" if multiple else "" - path = output_dir / f"out_{base_name}{suffix}.pdf" + path = output_dir / f"{base_name}{suffix}.pdf" c = canvas.Canvas(str(path), pagesize=A4) for page_slots in chunk: if cancel_event is not None and cancel_event.is_set(): diff --git a/src/pipeline.py b/src/pipeline.py index 5b3deaf..35ba730 100644 --- a/src/pipeline.py +++ b/src/pipeline.py @@ -14,7 +14,7 @@ DEFAULT_CUT_LINE_WIDTH, ) from src.cancellation import Cancelled -from src.constants import Stage, StageCallback +from src.constants import JobPdfStartCallback, Stage, StageCallback from src.cropper import process_for_pdf from src.deck_importer import fetch_deck from src.downloader import ( @@ -390,7 +390,7 @@ def _inner(done, total): def _on_crop(drive_id: str, done: int, total: int) -> None: if progress_callback: - progress_callback("crop", done, total) + progress_callback(Stage.CROP, done, total) if progress_callback and crop_tasks: progress_callback(Stage.CROP, 0, len(crop_tasks)) @@ -443,11 +443,11 @@ def _build_job_data( next_slot: int, extra_fronts: list[Path], extra_backs: list[Path | None], - fallback_cardback_id: str, ) -> tuple[_JobData, int]: """Parse XMLs and build all slot maps for one job. Returns (job_data, updated next_slot). Does not download or crop anything.""" orders = [parse(p) for p in xml_paths] + fallback_cardback_id = orders[0].cardback_id if orders else "" local_id_to_path: dict[str, Path] = {} front_slot_to_id, back_slot_to_id, id_name_map, drive_id_context, xml_needed_ids, next_slot = ( @@ -489,7 +489,7 @@ def run_plan( extra_fronts: list[str | Path] | None = None, extra_backs: list[str | Path | None] | None = None, local_crop_map: dict[Path, bool] | None = None, - on_job_pdf_start: StageCallback = None, + on_job_pdf_start: JobPdfStartCallback = None, on_xml_download_progress: StageCallback = None, on_xml_crop_progress: StageCallback = None, fronts_only: bool = False, @@ -539,7 +539,6 @@ def _inner(done, total): is_last = i == last_idx ef = extra_fronts_p if is_last else [] eb = extra_backs_p if is_last else [] - fallback = parse(Path(job.xml_paths[0])).cardback_id if job.xml_paths else "" jd, next_slot = _build_job_data( xml_paths=[Path(p) for p in job.xml_paths], @@ -547,7 +546,6 @@ def _inner(done, total): next_slot=next_slot, extra_fronts=ef, extra_backs=eb, - fallback_cardback_id=fallback, ) job_data_list.append(jd) combined_id_name.update(jd.id_name_map) @@ -628,7 +626,7 @@ def _on_image_done(drive_id: str) -> None: def _on_crop_plan(drive_id: str, done: int, total: int) -> None: if progress_callback: - progress_callback("crop", done, total) + progress_callback(Stage.CROP, done, total) if on_xml_crop_progress: for xml_name in _crop_id_to_xml.get(drive_id, []): _xml_crop_done[xml_name] += 1 diff --git a/src/precheck.py b/src/precheck.py index 95eec0e..caf9bf6 100644 --- a/src/precheck.py +++ b/src/precheck.py @@ -14,8 +14,8 @@ from src.constants import CARDS_PER_PAGE from src.parser import parse -_THUMB_CHECK_URL = "https://drive.google.com/thumbnail?id={}&sz=w1" -_CHECK_THREADS = 20 +_THUMB_CHECK_URL = "https://lh4.googleusercontent.com/d/{}=d" +_CHECK_THREADS = 5 _CHECK_TIMEOUT = (5, 10) diff --git a/tests/test_config.py b/tests/test_config.py deleted file mode 100644 index f9767b2..0000000 --- a/tests/test_config.py +++ /dev/null @@ -1,71 +0,0 @@ -"""Tests for src/config.py — Google Drive API key resolution cascade.""" - -from __future__ import annotations - -import json -import sys -import types -from unittest.mock import patch - -from src.config import get_drive_api_key - - -class TestGetDriveApiKey: - def test_returns_none_when_no_config_and_no_bundled(self, tmp_path): - with patch("src.config._PROJECT_ROOT", tmp_path): - assert get_drive_api_key() is None - - def test_reads_key_from_config_json(self, tmp_path): - (tmp_path / "config.json").write_text( - json.dumps({"google_drive_api_key": "AIzaTestKey123"}), encoding="utf-8" - ) - with patch("src.config._PROJECT_ROOT", tmp_path): - assert get_drive_api_key() == "AIzaTestKey123" - - def test_ignores_placeholder_key(self, tmp_path): - (tmp_path / "config.json").write_text( - json.dumps({"google_drive_api_key": "YOUR_API_KEY_HERE"}), encoding="utf-8" - ) - with patch("src.config._PROJECT_ROOT", tmp_path): - assert get_drive_api_key() is None - - def test_ignores_empty_key(self, tmp_path): - (tmp_path / "config.json").write_text( - json.dumps({"google_drive_api_key": ""}), encoding="utf-8" - ) - with patch("src.config._PROJECT_ROOT", tmp_path): - assert get_drive_api_key() is None - - def test_missing_key_field_in_json(self, tmp_path): - (tmp_path / "config.json").write_text( - json.dumps({"other_field": "value"}), encoding="utf-8" - ) - with patch("src.config._PROJECT_ROOT", tmp_path): - assert get_drive_api_key() is None - - def test_bundled_key_takes_priority_over_config_json(self, tmp_path): - (tmp_path / "config.json").write_text( - json.dumps({"google_drive_api_key": "from_config_json"}), encoding="utf-8" - ) - fake_module = types.ModuleType("src._bundled_key") - fake_module._get_key = lambda: "bundled_value" - with patch.dict(sys.modules, {"src._bundled_key": fake_module}): - with patch("src.config._PROJECT_ROOT", tmp_path): - assert get_drive_api_key() == "bundled_value" - - def test_empty_bundled_key_falls_through_to_config_json(self, tmp_path): - (tmp_path / "config.json").write_text( - json.dumps({"google_drive_api_key": "from_config"}), encoding="utf-8" - ) - fake_module = types.ModuleType("src._bundled_key") - fake_module._get_key = lambda: "" - with patch.dict(sys.modules, {"src._bundled_key": fake_module}): - with patch("src.config._PROJECT_ROOT", tmp_path): - assert get_drive_api_key() == "from_config" - - def test_strips_whitespace_from_config_key(self, tmp_path): - (tmp_path / "config.json").write_text( - json.dumps({"google_drive_api_key": " AIzaSpaced "}), encoding="utf-8" - ) - with patch("src.config._PROJECT_ROOT", tmp_path): - assert get_drive_api_key() == "AIzaSpaced" diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 770f9bf..0bcd592 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -1,8 +1,7 @@ """Tests for src/downloader.py — single-image and batch download logic.""" import threading -from pathlib import Path -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest import requests @@ -21,13 +20,21 @@ # ─── helpers ──────────────────────────────────────────────────────────────── -def _fake_gdown(url: str, path: str, quiet: bool) -> None: - """Simulate a successful gdown download by writing a tiny file.""" - Path(path).write_bytes(b"\xff\xd8\xff\xe0" + b"\x00" * 8) +def _fake_requests_get(url: str, **kwargs) -> MagicMock: + """Simulate a successful requests.get that returns a tiny JPEG.""" + mock = MagicMock() + mock.status_code = 200 + mock.raise_for_status.return_value = None + mock.iter_content.side_effect = lambda chunk_size=None: iter( + [b"\xff\xd8\xff\xe0" + b"\x00" * 8] + ) + return mock -class _FakeFileURLRetrievalError(Exception): - """Name detected by the 'FileURLRetrievalError' string check in downloader.""" +def _fake_403(url: str, **kwargs) -> None: + resp = MagicMock() + resp.status_code = 403 + raise requests.HTTPError(response=resp) # ─── _is_rate_limit_error ──────────────────────────────────────────────────── @@ -55,37 +62,34 @@ def test_rate_limit_not_detected_for_generic_error(): # ─── download_image ────────────────────────────────────────────────────────── -def test_download_cache_hit_skips_gdown(tmp_path): +def test_download_cache_hit_skips_request(tmp_path): dest = tmp_path / "raw" dest.mkdir() cached = dest / "DRIVEID.jpg" cached.write_bytes(b"cached content") - with patch("gdown.download") as mock_dl: + with patch("requests.get") as mock_get: result = download_image("DRIVEID", dest, "card.jpg") - mock_dl.assert_not_called() + mock_get.assert_not_called() assert result == cached def test_download_creates_file(tmp_path): - with patch("gdown.download", side_effect=_fake_gdown): + with patch("requests.get", side_effect=_fake_requests_get): result = download_image("ID001", tmp_path / "raw", "card.jpg") assert result.exists() assert result.stat().st_size > 0 def test_download_output_named_by_drive_id(tmp_path): - with patch("gdown.download", side_effect=_fake_gdown): + with patch("requests.get", side_effect=_fake_requests_get): result = download_image("MYID", tmp_path / "raw", "something.jpg") assert result.name.startswith("MYID") def test_download_permission_error(tmp_path): - def _raise(*a, **kw): - raise _FakeFileURLRetrievalError("no access") - - with patch("gdown.download", side_effect=_raise): + with patch("requests.get", side_effect=_fake_403): with pytest.raises(DownloadPermissionError) as exc_info: download_image("ID001", tmp_path / "raw", "card.jpg") assert exc_info.value.drive_id == "ID001" @@ -93,18 +97,18 @@ def _raise(*a, **kw): def test_download_timeout_error(tmp_path): - def _raise(*a, **kw): + def _raise(*args, **kwargs): raise requests.exceptions.Timeout("timed out") - with patch("gdown.download", side_effect=_raise): + with patch("requests.get", side_effect=_raise): with pytest.raises(DownloadTimeoutError) as exc_info: download_image("ID001", tmp_path / "raw", "card.jpg") assert exc_info.value.drive_id == "ID001" def test_download_rate_limit_exhausted(tmp_path): - with patch("gdown.download", side_effect=Exception("429 Too Many Requests")): - with patch("time.sleep"): # skip backoff delays + with patch("requests.get", side_effect=Exception("429 Too Many Requests")): + with patch("time.sleep"): with pytest.raises(DownloadRateLimitError): download_image("ID001", tmp_path / "raw", "card.jpg") @@ -112,13 +116,13 @@ def test_download_rate_limit_exhausted(tmp_path): def test_download_rate_limit_retries_then_succeeds(tmp_path): attempts = {"n": 0} - def _flaky(*a, **kw): + def _flaky(url, **kwargs): attempts["n"] += 1 if attempts["n"] < 3: raise Exception("503 Service Unavailable") - _fake_gdown(*a, **kw) + return _fake_requests_get(url, **kwargs) - with patch("gdown.download", side_effect=_flaky): + with patch("requests.get", side_effect=_flaky): with patch("time.sleep"): result = download_image("ID001", tmp_path / "raw", "card.jpg") assert result.exists() @@ -129,7 +133,7 @@ def _flaky(*a, **kw): def test_download_all_returns_all_results(tmp_path): - with patch("gdown.download", side_effect=_fake_gdown): + with patch("requests.get", side_effect=_fake_requests_get): results = download_all( [("ID1", "a.jpg"), ("ID2", "b.jpg")], tmp_path / "raw", @@ -150,7 +154,7 @@ def test_download_all_progress_callback_fires_per_image(tmp_path): def _cb(done, total): calls.append((done, total)) - with patch("gdown.download", side_effect=_fake_gdown): + with patch("requests.get", side_effect=_fake_requests_get): download_all( [("ID1", "a.jpg"), ("ID2", "b.jpg"), ("ID3", "c.jpg")], tmp_path / "raw", @@ -165,7 +169,7 @@ def _cb(done, total): def test_download_all_on_image_done_callback(tmp_path): done_ids = [] - with patch("gdown.download", side_effect=_fake_gdown): + with patch("requests.get", side_effect=_fake_requests_get): download_all( [("ID1", "a.jpg"), ("ID2", "b.jpg")], tmp_path / "raw", @@ -179,7 +183,7 @@ def test_download_all_cancel_event_raises_cancelled(tmp_path): event = threading.Event() event.set() - with patch("gdown.download", side_effect=_fake_gdown): + with patch("requests.get", side_effect=_fake_requests_get): with pytest.raises(Cancelled): download_all( [("ID1", "a.jpg")], @@ -189,10 +193,7 @@ def test_download_all_cancel_event_raises_cancelled(tmp_path): def test_download_all_propagates_permission_error_as_partial(tmp_path): - def _raise(*a, **kw): - raise _FakeFileURLRetrievalError("no access") - - with patch("gdown.download", side_effect=_raise): + with patch("requests.get", side_effect=_fake_403): with pytest.raises(DownloadPartialError) as exc_info: download_all([("ID1", "a.jpg")], tmp_path / "raw") @@ -203,11 +204,11 @@ def _raise(*a, **kw): def test_download_all_propagates_timeout_error_as_partial(tmp_path): - def _raise(*a, **kw): + def _raise(*args, **kwargs): raise requests.exceptions.Timeout("timeout") - with patch("gdown.download", side_effect=_raise): - with patch("time.sleep"): # skip the retry delay + with patch("requests.get", side_effect=_raise): + with patch("time.sleep"): with pytest.raises(DownloadPartialError) as exc_info: download_all([("ID1", "a.jpg")], tmp_path / "raw") @@ -220,12 +221,14 @@ def _raise(*a, **kw): def test_download_all_continues_after_partial_failure(tmp_path): """Successful downloads complete even when some images fail.""" - def _mixed(url: str, path: str, quiet: bool) -> None: + def _mixed(url: str, **kwargs) -> MagicMock: if "BAD" in url: - raise _FakeFileURLRetrievalError("no access") - Path(path).write_bytes(b"img") + resp = MagicMock() + resp.status_code = 403 + raise requests.HTTPError(response=resp) + return _fake_requests_get(url, **kwargs) - with patch("gdown.download", side_effect=_mixed): + with patch("requests.get", side_effect=_mixed): with pytest.raises(DownloadPartialError) as exc_info: download_all( [("GOOD1", "good1.jpg"), ("BAD", "bad.jpg"), ("GOOD2", "good2.jpg")], @@ -235,7 +238,6 @@ def _mixed(url: str, path: str, quiet: bool) -> None: err = exc_info.value assert len(err.permission_errors) == 1 assert err.permission_errors[0][0] == "BAD" - # Good images still downloaded assert (tmp_path / "raw" / "GOOD1.jpg").exists() assert (tmp_path / "raw" / "GOOD2.jpg").exists() @@ -244,16 +246,16 @@ def test_download_all_timeout_retry_succeeds(tmp_path): """Timed-out images that succeed on retry are NOT included in partial errors.""" attempts: dict[str, int] = {} - def _flaky(url: str, path: str, quiet: bool) -> None: - drive_id = url.split("id=")[1] + def _flaky(url: str, **kwargs) -> MagicMock: + drive_id = url.split("/d/")[1].removesuffix("=d") attempts[drive_id] = attempts.get(drive_id, 0) + 1 if attempts[drive_id] == 1: raise requests.exceptions.Timeout("timeout first time") - Path(path).write_bytes(b"img") + return _fake_requests_get(url, **kwargs) - with patch("gdown.download", side_effect=_flaky): - with patch("time.sleep"): # skip retry delay + with patch("requests.get", side_effect=_flaky): + with patch("time.sleep"): results = download_all([("ID1", "a.jpg")], tmp_path / "raw") assert "ID1" in results - assert attempts["ID1"] == 2 # first attempt timed out, retry succeeded + assert attempts["ID1"] == 2 diff --git a/tests/test_pdf_generator.py b/tests/test_pdf_generator.py index 3c5dba3..c614ea5 100644 --- a/tests/test_pdf_generator.py +++ b/tests/test_pdf_generator.py @@ -98,7 +98,7 @@ def test_generate_pdf_named_correctly(tmp_path): results = generate( tmp_path / "out", "mydeck", slots, front, back, _id_to_path(front, back, img) ) - assert results[0].name == "out_mydeck.pdf" + assert results[0].name == "mydeck.pdf" def test_generate_creates_output_dir(tmp_path): @@ -231,8 +231,8 @@ def test_generate_split_files_named_with_index(tmp_path): results = generate(tmp_path / "out", "deck", slots, front, back, id_map, max_bytes=1) names = {r.name for r in results} - assert "out_deck_1.pdf" in names - assert "out_deck_2.pdf" in names + assert "deck_1.pdf" in names + assert "deck_2.pdf" in names def test_generate_no_split_when_under_cap(tmp_path):