From be6a515bf381ce0d022ae095bdf27ba2ca8c9515 Mon Sep 17 00:00:00 2001 From: Cbass50 Date: Wed, 10 Jun 2026 19:41:48 -0400 Subject: [PATCH] fix: apply --url must never select a different job than the one targeted acquire_job's target-url fallback stripped the query string to build its LIKE pattern -- but on indeed/linkedin the query string IS the job identity (?jk=, currentJobId=). The pattern '%indeed.com/viewjob%' matched every indeed job in the DB, and LIMIT 1 then applied to an arbitrary one: clicking apply for one posting could fill out a completely different company's application (observed in the wild: targeting one indeed job ran another company's job twice). Now: exact match on url/application_url first; the only fallback is a LIKE for scheme/trailing-slash variants of the SAME url with the query string preserved; no match returns None instead of someone else's job. Three regression tests included. Co-Authored-By: Claude Fable 5 --- src/applypilot/apply/launcher.py | 19 ++++++++++--- tests/test_target_url_match.py | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 tests/test_target_url_match.py diff --git a/src/applypilot/apply/launcher.py b/src/applypilot/apply/launcher.py index 341a11a3..5b55fc1e 100644 --- a/src/applypilot/apply/launcher.py +++ b/src/applypilot/apply/launcher.py @@ -104,16 +104,27 @@ def acquire_job(target_url: str | None = None, min_score: int = 7, conn.execute("BEGIN IMMEDIATE") if target_url: - like = f"%{target_url.split('?')[0].rstrip('/')}%" - row = conn.execute(""" + # Exact match FIRST. The query string is often the entire job + # identity (indeed ?jk=, linkedin currentJobId=) -- stripping it + # for a LIKE pattern matches every job on that board, and LIMIT 1 + # then applies to an arbitrary one (a different company's job). + _sel = """ SELECT url, title, site, application_url, tailored_resume_path, fit_score, location, full_description, cover_letter_path FROM jobs - WHERE (url = ? OR application_url = ? OR application_url LIKE ? OR url LIKE ?) + WHERE ({match}) AND tailored_resume_path IS NOT NULL AND apply_status != 'in_progress' LIMIT 1 - """, (target_url, target_url, like, like)).fetchone() + """ + row = conn.execute(_sel.format(match="url = ? OR application_url = ?"), + (target_url, target_url)).fetchone() + if row is None: + # Tolerant fallback for scheme / trailing-slash variants of + # the SAME url -- the query string stays in the pattern. + like = "%" + target_url.split("://", 1)[-1].rstrip("/") + "%" + row = conn.execute(_sel.format(match="url LIKE ? OR application_url LIKE ?"), + (like, like)).fetchone() else: blocked_sites, blocked_patterns = _load_blocked() # Build parameterized filters to avoid SQL injection diff --git a/tests/test_target_url_match.py b/tests/test_target_url_match.py new file mode 100644 index 00000000..4b6292d5 --- /dev/null +++ b/tests/test_target_url_match.py @@ -0,0 +1,46 @@ +"""apply --url must select exactly the targeted job, never a different one. + +Regression: the old target-url fallback stripped the query string to build +its LIKE pattern, but on indeed/linkedin the query string IS the job +identity (?jk=, currentJobId=). '%indeed.com/viewjob%' matched every indeed +job in the DB and LIMIT 1 applied to an arbitrary one. +""" +import applypilot.database as db +from applypilot.apply.launcher import acquire_job + + +def _seed(conn, url): + conn.execute( + "INSERT INTO jobs (url, title, site, tailored_resume_path, fit_score, " + "apply_status) VALUES (?,?,?,?,?,?)", + (url, "Engineer", "indeed", "/tmp/r.txt", 8, "pending"), + ) + conn.commit() + + +def _setup(tmp_path, monkeypatch): + monkeypatch.setattr(db, "DB_PATH", tmp_path / "test.db") + return db.init_db(db_path=tmp_path / "test.db") + + +def test_target_url_query_string_is_job_identity(tmp_path, monkeypatch): + conn = _setup(tmp_path, monkeypatch) + _seed(conn, "https://www.indeed.com/viewjob?jk=aaaa1111") + _seed(conn, "https://www.indeed.com/viewjob?jk=bbbb2222") + job = acquire_job(target_url="https://www.indeed.com/viewjob?jk=bbbb2222") + assert job is not None + assert job["url"].endswith("jk=bbbb2222") + + +def test_target_url_unknown_job_returns_none(tmp_path, monkeypatch): + conn = _setup(tmp_path, monkeypatch) + _seed(conn, "https://www.indeed.com/viewjob?jk=aaaa1111") + assert acquire_job(target_url="https://www.indeed.com/viewjob?jk=zzzz9999") is None + + +def test_target_url_tolerates_scheme_and_slash_variants(tmp_path, monkeypatch): + conn = _setup(tmp_path, monkeypatch) + _seed(conn, "https://boards.greenhouse.io/acme/jobs/123") + job = acquire_job(target_url="http://boards.greenhouse.io/acme/jobs/123/") + assert job is not None + assert job["url"] == "https://boards.greenhouse.io/acme/jobs/123"