Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions src/applypilot/apply/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,16 +104,27 @@ def acquire_job(target_url: str | None = None, min_score: int = 7,
conn.execute("BEGIN IMMEDIATE")

if target_url:
like = f"%{target_url.split('?')[0].rstrip('/')}%"
row = conn.execute("""
# Exact match FIRST. The query string is often the entire job
# identity (indeed ?jk=, linkedin currentJobId=) -- stripping it
# for a LIKE pattern matches every job on that board, and LIMIT 1
# then applies to an arbitrary one (a different company's job).
_sel = """
SELECT url, title, site, application_url, tailored_resume_path,
fit_score, location, full_description, cover_letter_path
FROM jobs
WHERE (url = ? OR application_url = ? OR application_url LIKE ? OR url LIKE ?)
WHERE ({match})
AND tailored_resume_path IS NOT NULL
AND apply_status != 'in_progress'
LIMIT 1
""", (target_url, target_url, like, like)).fetchone()
"""
row = conn.execute(_sel.format(match="url = ? OR application_url = ?"),
(target_url, target_url)).fetchone()
if row is None:
# Tolerant fallback for scheme / trailing-slash variants of
# the SAME url -- the query string stays in the pattern.
like = "%" + target_url.split("://", 1)[-1].rstrip("/") + "%"
row = conn.execute(_sel.format(match="url LIKE ? OR application_url LIKE ?"),
(like, like)).fetchone()
else:
blocked_sites, blocked_patterns = _load_blocked()
# Build parameterized filters to avoid SQL injection
Expand Down
46 changes: 46 additions & 0 deletions tests/test_target_url_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""apply --url must select exactly the targeted job, never a different one.

Regression: the old target-url fallback stripped the query string to build
its LIKE pattern, but on indeed/linkedin the query string IS the job
identity (?jk=, currentJobId=). '%indeed.com/viewjob%' matched every indeed
job in the DB and LIMIT 1 applied to an arbitrary one.
"""
import applypilot.database as db
from applypilot.apply.launcher import acquire_job


def _seed(conn, url):
conn.execute(
"INSERT INTO jobs (url, title, site, tailored_resume_path, fit_score, "
"apply_status) VALUES (?,?,?,?,?,?)",
(url, "Engineer", "indeed", "/tmp/r.txt", 8, "pending"),
)
conn.commit()


def _setup(tmp_path, monkeypatch):
monkeypatch.setattr(db, "DB_PATH", tmp_path / "test.db")
return db.init_db(db_path=tmp_path / "test.db")


def test_target_url_query_string_is_job_identity(tmp_path, monkeypatch):
conn = _setup(tmp_path, monkeypatch)
_seed(conn, "https://www.indeed.com/viewjob?jk=aaaa1111")
_seed(conn, "https://www.indeed.com/viewjob?jk=bbbb2222")
job = acquire_job(target_url="https://www.indeed.com/viewjob?jk=bbbb2222")
assert job is not None
assert job["url"].endswith("jk=bbbb2222")


def test_target_url_unknown_job_returns_none(tmp_path, monkeypatch):
conn = _setup(tmp_path, monkeypatch)
_seed(conn, "https://www.indeed.com/viewjob?jk=aaaa1111")
assert acquire_job(target_url="https://www.indeed.com/viewjob?jk=zzzz9999") is None


def test_target_url_tolerates_scheme_and_slash_variants(tmp_path, monkeypatch):
conn = _setup(tmp_path, monkeypatch)
_seed(conn, "https://boards.greenhouse.io/acme/jobs/123")
job = acquire_job(target_url="http://boards.greenhouse.io/acme/jobs/123/")
assert job is not None
assert job["url"] == "https://boards.greenhouse.io/acme/jobs/123"