diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5682b270..249ae2b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,20 @@ All notable changes to ApplyPilot will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+
+### Fixed
+- Tailored resumes and cover letters get collision-free filenames, and parallel
+ apply workers no longer share one upload path (was: one job's resume could be
+ sent to another employer).
+- The fabrication watchlist is word-boundary matched and respects the candidate's
+ real skills (no more false hits on "scalable"/"guardrails"; legitimate C++/C#
+ skills allowed).
+- Cover-letter PDFs render the actual letter body (were near-empty).
+- Sequential `run` no longer silently caps tailoring/cover letters at 20 jobs.
+- Jobs stranded `in_progress` by a crashed run are recovered at apply startup.
+- One failing site no longer aborts the whole smart-extract stage.
+
## [0.2.0] - 2026-02-17
### Added
diff --git a/src/applypilot/apply/launcher.py b/src/applypilot/apply/launcher.py
index 341a11a3..d684d223 100644
--- a/src/applypilot/apply/launcher.py
+++ b/src/applypilot/apply/launcher.py
@@ -228,7 +228,7 @@ def gen_prompt(target_url: str, min_score: int = 7,
if txt_path and txt_path.exists():
resume_text = txt_path.read_text(encoding="utf-8")
- prompt = prompt_mod.build_prompt(job=job, tailored_resume=resume_text)
+ prompt = prompt_mod.build_prompt(job=job, tailored_resume=resume_text, worker_id=worker_id)
# Release the lock so the job stays available
release_lock(job["url"])
@@ -272,6 +272,25 @@ def mark_job(url: str, status: str, reason: str | None = None) -> None:
conn.commit()
+def reset_stale_locks() -> int:
+ """Clear jobs stuck in 'in_progress' from a previous crashed run.
+
+ All workers live in this process, so anything still 'in_progress' at startup
+ is by definition stale (the worker that held it is gone). Returns NULL so the
+ job is eligible again.
+
+ Returns:
+ Number of stale locks cleared.
+ """
+ conn = get_connection()
+ cursor = conn.execute(
+ "UPDATE jobs SET apply_status = NULL, agent_id = NULL "
+ "WHERE apply_status = 'in_progress'"
+ )
+ conn.commit()
+ return cursor.rowcount
+
+
def reset_failed() -> int:
"""Reset all failed jobs so they can be retried.
@@ -310,11 +329,16 @@ def run_job(job: dict, port: int, worker_id: int = 0,
if txt_path and txt_path.exists():
resume_text = txt_path.read_text(encoding="utf-8")
+ # Reset the worker dir FIRST: build_prompt copies the resume/cover PDFs into
+ # APPLY_WORKER_DIR/worker-{id}/current, which reset_worker_dir would wipe.
+ worker_dir = reset_worker_dir(worker_id)
+
# Build the prompt
agent_prompt = prompt_mod.build_prompt(
job=job,
tailored_resume=resume_text,
dry_run=dry_run,
+ worker_id=worker_id,
)
# Write per-worker MCP config
@@ -347,8 +371,6 @@ def run_job(job: dict, port: int, worker_id: int = 0,
env.pop("CLAUDECODE", None)
env.pop("CLAUDE_CODE_ENTRYPOINT", None)
- worker_dir = reset_worker_dir(worker_id)
-
update_state(worker_id, status="applying", job_title=job["title"],
company=job.get("site", ""), score=job.get("fit_score", 0),
start_time=time.time(), actions=0, last_action="starting")
@@ -674,6 +696,11 @@ def main(limit: int = 1, target_url: str | None = None,
config.ensure_dirs()
console = Console()
+ # Recover jobs stranded 'in_progress' by a previous crashed run.
+ recovered = reset_stale_locks()
+ if recovered:
+ console.print(f"[yellow]Recovered {recovered} stale in-progress job(s)[/yellow]")
+
if continuous:
effective_limit = 0
mode_label = "continuous"
diff --git a/src/applypilot/apply/prompt.py b/src/applypilot/apply/prompt.py
index 37c3790a..04852c94 100644
--- a/src/applypilot/apply/prompt.py
+++ b/src/applypilot/apply/prompt.py
@@ -419,7 +419,8 @@ def _build_captcha_section() -> str:
def build_prompt(job: dict, tailored_resume: str,
cover_letter: str | None = None,
- dry_run: bool = False) -> str:
+ dry_run: bool = False,
+ worker_id: int = 0) -> str:
"""Build the full instruction prompt for the apply agent.
Loads the user profile and search config internally. All personal data
@@ -451,7 +452,8 @@ def build_prompt(job: dict, tailored_resume: str,
# Copy to a clean filename for upload (recruiters see the filename)
full_name = personal["full_name"]
name_slug = full_name.replace(" ", "_")
- dest_dir = config.APPLY_WORKER_DIR / "current"
+ # Per-worker upload dir so parallel workers don't race on one shared path.
+ dest_dir = config.APPLY_WORKER_DIR / f"worker-{worker_id}" / "current"
dest_dir.mkdir(parents=True, exist_ok=True)
upload_pdf = dest_dir / f"{name_slug}_Resume.pdf"
shutil.copy(str(src_pdf), str(upload_pdf))
diff --git a/src/applypilot/discovery/smartextract.py b/src/applypilot/discovery/smartextract.py
index cf49a9a2..7691f2eb 100644
--- a/src/applypilot/discovery/smartextract.py
+++ b/src/applypilot/discovery/smartextract.py
@@ -1031,6 +1031,7 @@ def _run_all(
results: list[dict] = []
total_new = 0
total_existing = 0
+ errors = 0
def _process_result(r: dict, target: dict) -> None:
nonlocal total_new, total_existing
@@ -1052,7 +1053,13 @@ def _process_result(r: dict, target: dict) -> None:
}
for future in as_completed(future_to_target):
target = future_to_target[future]
- r = future.result()
+ # One flaky site must not abort the whole stage.
+ try:
+ r = future.result()
+ except Exception as e:
+ log.warning("Site %s failed: %s -- continuing", target["name"], e)
+ errors += 1
+ continue
results.append(r)
_process_result(r, target)
else:
@@ -1063,7 +1070,12 @@ def _process_result(r: dict, target: dict) -> None:
label = f"{target['name']} [{target['query']}]"
log.info("[%d/%d] %s", i + 1, len(targets), label)
- r = _run_one_site(target["name"], target["url"])
+ try:
+ r = _run_one_site(target["name"], target["url"])
+ except Exception as e:
+ log.warning("Site %s failed: %s -- continuing", target["name"], e)
+ errors += 1
+ continue
results.append(r)
_process_result(r, target)
@@ -1080,7 +1092,7 @@ def _process_result(r: dict, target: dict) -> None:
log.info("%d/%d PASS", passed, len(results))
return {"total_new": total_new, "total_existing": total_existing,
- "passed": passed, "total": len(results)}
+ "passed": passed, "total": len(results), "errors": errors}
# -- Public entry point ------------------------------------------------------
diff --git a/src/applypilot/pipeline.py b/src/applypilot/pipeline.py
index 29881c5f..a3e1ed32 100644
--- a/src/applypilot/pipeline.py
+++ b/src/applypilot/pipeline.py
@@ -125,7 +125,7 @@ def _run_tailor(min_score: int = 7, validation_mode: str = "normal") -> dict:
"""Stage: Resume tailoring — generate tailored resumes for high-fit jobs."""
try:
from applypilot.scoring.tailor import run_tailoring
- run_tailoring(min_score=min_score, validation_mode=validation_mode)
+ run_tailoring(min_score=min_score, limit=0, validation_mode=validation_mode)
return {"status": "ok"}
except Exception as e:
log.error("Tailoring failed: %s", e)
@@ -136,7 +136,7 @@ def _run_cover(min_score: int = 7, validation_mode: str = "normal") -> dict:
"""Stage: Cover letter generation."""
try:
from applypilot.scoring.cover_letter import run_cover_letters
- run_cover_letters(min_score=min_score, validation_mode=validation_mode)
+ run_cover_letters(min_score=min_score, limit=0, validation_mode=validation_mode)
return {"status": "ok"}
except Exception as e:
log.error("Cover letter generation failed: %s", e)
diff --git a/src/applypilot/scoring/cover_letter.py b/src/applypilot/scoring/cover_letter.py
index c16cdd5f..199f7cd8 100644
--- a/src/applypilot/scoring/cover_letter.py
+++ b/src/applypilot/scoring/cover_letter.py
@@ -14,6 +14,7 @@
from applypilot.config import COVER_LETTER_DIR, RESUME_PATH, load_profile
from applypilot.database import get_connection, get_jobs_by_stage
from applypilot.llm import get_client
+from applypilot.scoring.tailor import make_filename_prefix
from applypilot.scoring.validator import (
BANNED_WORDS,
LLM_LEAK_PHRASES,
@@ -201,16 +202,21 @@ def run_cover_letters(min_score: int = 7, limit: int = 20,
resume_text = RESUME_PATH.read_text(encoding="utf-8")
conn = get_connection()
- # Fetch jobs that have tailored resumes but no cover letter yet
- jobs = conn.execute(
+ # Fetch jobs that have tailored resumes but no cover letter yet.
+ # limit <= 0 means "all": a literal LIMIT 0 would return zero rows.
+ sql = (
"SELECT * FROM jobs "
"WHERE fit_score >= ? AND tailored_resume_path IS NOT NULL "
"AND full_description IS NOT NULL "
"AND (cover_letter_path IS NULL OR cover_letter_path = '') "
"AND COALESCE(cover_attempts, 0) < ? "
- "ORDER BY fit_score DESC LIMIT ?",
- (min_score, MAX_ATTEMPTS, limit),
- ).fetchall()
+ "ORDER BY fit_score DESC"
+ )
+ params: list = [min_score, MAX_ATTEMPTS]
+ if limit > 0:
+ sql += " LIMIT ?"
+ params.append(limit)
+ jobs = conn.execute(sql, params).fetchall()
if not jobs:
log.info("No jobs needing cover letters (score >= %d).", min_score)
@@ -237,19 +243,19 @@ def run_cover_letters(min_score: int = 7, limit: int = 20,
letter = generate_cover_letter(resume_text, job, profile,
validation_mode=validation_mode)
- # Build safe filename prefix
- safe_title = re.sub(r"[^\w\s-]", "", job["title"])[:50].strip().replace(" ", "_")
- safe_site = re.sub(r"[^\w\s-]", "", job["site"])[:20].strip().replace(" ", "_")
- prefix = f"{safe_site}_{safe_title}"
+ # Build safe, collision-free filename prefix
+ prefix = make_filename_prefix(job)
cl_path = COVER_LETTER_DIR / f"{prefix}_CL.txt"
cl_path.write_text(letter, encoding="utf-8")
- # Generate PDF (best-effort)
+ # Generate PDF (best-effort). Use the letter renderer, NOT the resume
+ # converter, which drops a cover letter's body.
pdf_path = None
try:
- from applypilot.scoring.pdf import convert_to_pdf
- pdf_path = str(convert_to_pdf(cl_path))
+ from applypilot.scoring.pdf import convert_letter_to_pdf
+ applicant_name = profile.get("personal", {}).get("full_name", "")
+ pdf_path = str(convert_letter_to_pdf(cl_path, applicant_name=applicant_name))
except Exception:
log.debug("PDF generation failed for %s", cl_path, exc_info=True)
diff --git a/src/applypilot/scoring/pdf.py b/src/applypilot/scoring/pdf.py
index 2b87b673..ae4c9520 100644
--- a/src/applypilot/scoring/pdf.py
+++ b/src/applypilot/scoring/pdf.py
@@ -5,6 +5,7 @@
"""
import logging
+import re
from pathlib import Path
from applypilot.config import TAILORED_DIR
@@ -390,6 +391,43 @@ def convert_to_pdf(
return out
+def _letter_html(text: str, applicant_name: str) -> str:
+ """Build a simple, correctly-structured HTML letter.
+
+ Cover letters have no resume structure (no SUMMARY line, no ALL-CAPS section
+ headers), so parse_resume() drops their body. This renders the prose as
+ paragraphs under a modest name header, escaping all content.
+ """
+ import html as _html
+
+ paragraphs = [p.strip() for p in re.split(r"\n\s*\n", text.strip()) if p.strip()]
+ body = "\n".join(
+ f"
{_html.escape(p).replace(chr(10), '
')}
" for p in paragraphs
+ )
+ name = _html.escape(applicant_name)
+ return f"""
+
+{name}
+{body}
+"""
+
+
+def convert_letter_to_pdf(txt_path: Path, applicant_name: str,
+ output_path: Path | None = None) -> Path:
+ """Render a cover-letter .txt to a properly formatted PDF."""
+ txt_path = Path(txt_path)
+ html = _letter_html(txt_path.read_text(encoding="utf-8"), applicant_name)
+ out = Path(output_path or txt_path.with_suffix(".pdf"))
+ render_pdf(html, str(out))
+ log.info("Cover letter PDF generated: %s", out)
+ return out
+
+
def batch_convert(limit: int = 50) -> int:
"""Convert .txt files in TAILORED_DIR that don't have corresponding PDFs.
diff --git a/src/applypilot/scoring/tailor.py b/src/applypilot/scoring/tailor.py
index 352fb5ff..1aef051a 100644
--- a/src/applypilot/scoring/tailor.py
+++ b/src/applypilot/scoring/tailor.py
@@ -9,6 +9,7 @@
to avoid apologetic spirals.
"""
+import hashlib
import json
import logging
import re
@@ -32,6 +33,19 @@
MAX_ATTEMPTS = 5 # max cross-run retries before giving up
+def make_filename_prefix(job: dict) -> str:
+ """Build a collision-free filename prefix for a job's generated artifacts.
+
+ Two postings with the same title from the same board would otherwise share
+ a path and overwrite each other, sending one employer the resume tailored
+ for another. A short hash of the (unique) job URL disambiguates them.
+ """
+ safe_title = re.sub(r"[^\w\s-]", "", job["title"])[:50].strip().replace(" ", "_")
+ safe_site = re.sub(r"[^\w\s-]", "", job["site"])[:20].strip().replace(" ", "_")
+ url_hash = hashlib.sha1(job["url"].encode()).hexdigest()[:8]
+ return f"{safe_site}_{safe_title}_{url_hash}"
+
+
# ── Prompt Builders (profile-driven) ──────────────────────────────────────
def _build_tailor_prompt(profile: dict) -> str:
@@ -490,10 +504,8 @@ def run_tailoring(min_score: int = 7, limit: int = 20,
tailored, report = tailor_resume(resume_text, job, profile,
validation_mode=validation_mode)
- # Build safe filename prefix
- safe_title = re.sub(r"[^\w\s-]", "", job["title"])[:50].strip().replace(" ", "_")
- safe_site = re.sub(r"[^\w\s-]", "", job["site"])[:20].strip().replace(" ", "_")
- prefix = f"{safe_site}_{safe_title}"
+ # Build safe, collision-free filename prefix
+ prefix = make_filename_prefix(job)
# Save tailored resume text
txt_path = TAILORED_DIR / f"{prefix}.txt"
diff --git a/src/applypilot/scoring/validator.py b/src/applypilot/scoring/validator.py
index abb8f89d..7091d4db 100644
--- a/src/applypilot/scoring/validator.py
+++ b/src/applypilot/scoring/validator.py
@@ -58,15 +58,45 @@
# Known fabrication markers: completely unrelated tools/languages.
# Reasonable stretches (K8s, Terraform, Redis, Kafka etc.) are ALLOWED.
-FABRICATION_WATCHLIST: set[str] = {
+# EXACT_TERMS match on word boundaries; PREFIX_TERMS match any word starting
+# with them (so "certif" catches certified/certification).
+EXACT_TERMS: set[str] = {
# Languages with zero relation to the candidate's stack
"c#", "c++", "golang", "rust", "ruby",
"kotlin", "swift", "scala", "matlab",
# Frameworks for wrong languages
"spring", "django", "rails", "angular", "vue", "svelte",
# Hard lies: certifications can't be stretched
- "certif", "certified", "pmp", "scrum master", "aws certified",
+ "certified", "pmp", "scrum master", "aws certified",
}
+PREFIX_TERMS: set[str] = {"certif"}
+
+# Kept for backwards-compat (tailor.py imports this name).
+FABRICATION_WATCHLIST: set[str] = EXACT_TERMS | PREFIX_TERMS
+
+
+def find_watchlist_hits(text: str, allowed: set[str]) -> list[str]:
+ """Return fabrication-watchlist terms that appear in ``text``.
+
+ Uses word-boundary matching so "scala" does not fire on "scalable" and
+ "rails" does not fire on "guardrails". Terms present in ``allowed`` (the
+ candidate's real skills) are never flagged. ``allowed`` is matched by exact
+ lowercased membership against the watchlist term.
+ """
+ low = text.lower()
+ hits: list[str] = []
+ for term in EXACT_TERMS:
+ if term in allowed:
+ continue
+ # Boundaries that also respect '+' and '#' so c++/c# match correctly.
+ if re.search(rf"(? dic
# Collect all text for bulk checks
all_text_parts: list[str] = [data["summary"]]
- # Skills: check for fabrication (always enforced)
+ # Skills: check for fabrication (always enforced), but never flag a tool the
+ # candidate actually lists in their profile.
+ allowed = _build_skills_set(profile)
if isinstance(data["skills"], dict):
- skills_text = " ".join(str(v) for v in data["skills"].values()).lower()
- for fake in FABRICATION_WATCHLIST:
- if len(fake) <= 2:
- continue
- if fake in skills_text:
- errors.append(f"Fabricated skill: '{fake}'")
+ skills_text = " ".join(str(v) for v in data["skills"].values())
+ for fake in find_watchlist_hits(skills_text, allowed):
+ errors.append(f"Fabricated skill: '{fake}'")
# Experience: preserved companies must be present (always enforced)
resume_facts = profile.get("resume_facts", {})
@@ -243,23 +272,19 @@ def validate_tailored_resume(text: str, profile: dict, original_text: str = "")
warnings.append("Phone missing -- will be injected")
# 7. Scan TECHNICAL SKILLS section for fabricated tools
+ allowed = _build_skills_set(profile)
skills_start = text_lower.find("technical skills")
skills_end = text_lower.find("experience", skills_start) if skills_start != -1 else -1
if skills_start != -1 and skills_end != -1:
skills_block = text_lower[skills_start:skills_end]
- for fake in FABRICATION_WATCHLIST:
- if len(fake) <= 2:
- continue
- if fake in skills_block:
- errors.append(f"FABRICATED SKILL in Technical Skills: '{fake}'")
+ for fake in find_watchlist_hits(skills_block, allowed):
+ errors.append(f"FABRICATED SKILL in Technical Skills: '{fake}'")
# 8. Scan full document for fabrication watchlist items not in original
if original_text:
- original_lower = original_text.lower()
- for fake in FABRICATION_WATCHLIST:
- if len(fake) <= 2:
- continue
- if fake in text_lower and fake not in original_lower:
+ original_hits = set(find_watchlist_hits(original_text, allowed))
+ for fake in find_watchlist_hits(text, allowed):
+ if fake not in original_hits:
warnings.append(f"New tool/skill appeared: '{fake}' (not in original)")
# 9. Em dashes (should be auto-fixed by sanitize_text, but safety net)
diff --git a/tests/test_cover_pdf.py b/tests/test_cover_pdf.py
new file mode 100644
index 00000000..47f1fe00
--- /dev/null
+++ b/tests/test_cover_pdf.py
@@ -0,0 +1,23 @@
+"""F11: cover-letter PDFs render the body (not garbage) and escape content."""
+from applypilot.scoring.pdf import _letter_html
+
+
+def test_letter_html_keeps_all_paragraphs():
+ letter = "Dear Hiring Manager,\n\nI built systems with List.\n\nSincerely,\nJane"
+ html = _letter_html(letter, "Jane Doe")
+ assert "Dear Hiring Manager," in html
+ assert "Sincerely," in html
+ # Middle paragraph survives.
+ assert "I built systems" in html
+
+
+def test_letter_html_escapes_dangerous_content():
+ html = _letter_html("I used List and .", "Jane")
+ assert "List<String>" in html
+ assert "" not in html
+ assert "<script>" in html
+
+
+def test_name_appears_once_as_header():
+ html = _letter_html("Body text here.", "Jane Doe")
+ assert html.count('class="name">Jane Doe') == 1
diff --git a/tests/test_filenames.py b/tests/test_filenames.py
new file mode 100644
index 00000000..6d796c4e
--- /dev/null
+++ b/tests/test_filenames.py
@@ -0,0 +1,40 @@
+"""F9: per-job artifact filenames must not collide; uploads are per-worker."""
+import applypilot.config as config
+import applypilot.apply.prompt as prompt_mod
+from applypilot.scoring.tailor import make_filename_prefix
+
+
+def test_same_title_site_different_url_distinct_prefix():
+ a = make_filename_prefix({"title": "Software Engineer", "site": "linkedin",
+ "url": "https://example.com/a"})
+ b = make_filename_prefix({"title": "Software Engineer", "site": "linkedin",
+ "url": "https://example.com/b"})
+ assert a != b
+
+
+def test_same_job_stable_prefix():
+ job = {"title": "Software Engineer", "site": "linkedin", "url": "https://example.com/a"}
+ assert make_filename_prefix(job) == make_filename_prefix(job)
+
+
+def _fixture_profile():
+ return {
+ "personal": {"full_name": "Jane Doe", "email": "j@example.com",
+ "phone": "5551234567", "city": "Austin"},
+ "work_authorization": {},
+ "compensation": {"salary_expectation": "100000"},
+ "experience": {}, "availability": {}, "eeo": {}, "skills_boundary": {},
+ }
+
+
+def test_upload_dir_is_per_worker(tmp_path, monkeypatch):
+ monkeypatch.setattr(config, "load_profile", _fixture_profile)
+ monkeypatch.setattr(config, "load_search_config", lambda: {})
+ monkeypatch.setattr(config, "APPLY_WORKER_DIR", tmp_path)
+ pdf = tmp_path / "x.pdf"
+ pdf.write_bytes(b"%PDF-1.4 dummy")
+ job = {"url": "https://example.com/j", "title": "Engineer", "site": "linkedin",
+ "application_url": None, "fit_score": 8, "tailored_resume_path": str(tmp_path / "x.txt")}
+ out = prompt_mod.build_prompt(job=job, tailored_resume="r", worker_id=2)
+ assert "worker-2" in out
+ assert (tmp_path / "worker-2" / "current" / "Jane_Doe_Resume.pdf").exists()
diff --git a/tests/test_smartextract_isolation.py b/tests/test_smartextract_isolation.py
new file mode 100644
index 00000000..6f1c913e
--- /dev/null
+++ b/tests/test_smartextract_isolation.py
@@ -0,0 +1,25 @@
+"""F14: one flaky site must not abort the whole smart-extract stage."""
+import applypilot.database as db
+import applypilot.discovery.smartextract as se
+
+
+def test_one_failing_site_does_not_abort(tmp_path, monkeypatch):
+ monkeypatch.setattr(db, "DB_PATH", tmp_path / "test.db")
+
+ def fake_run_one_site(name, url):
+ if name == "A":
+ raise RuntimeError("network timeout")
+ return {
+ "name": "B", "status": "PASS", "strategy": "test",
+ "total": 1, "titles": 1,
+ "jobs": [{"url": "https://example.com/1", "title": "T",
+ "salary": None, "description": None, "location": "Remote"}],
+ }
+
+ monkeypatch.setattr(se, "_run_one_site", fake_run_one_site)
+ stats = se._run_all(
+ [{"name": "A", "url": "u"}, {"name": "B", "url": "v"}], [], [], workers=1)
+
+ assert stats["errors"] == 1
+ assert stats["passed"] == 1
+ assert stats["total_new"] == 1
diff --git a/tests/test_stage_limits.py b/tests/test_stage_limits.py
new file mode 100644
index 00000000..524c6803
--- /dev/null
+++ b/tests/test_stage_limits.py
@@ -0,0 +1,19 @@
+"""F12: sequential pipeline must not silently cap tailor/cover at 20."""
+import applypilot.database as db
+
+
+def test_get_jobs_by_stage_limit_zero_is_unlimited(tmp_path, monkeypatch):
+ monkeypatch.setattr(db, "DB_PATH", tmp_path / "test.db")
+ conn = db.init_db()
+ for i in range(25):
+ conn.execute(
+ "INSERT INTO jobs (url, title, fit_score, full_description, tailored_resume_path) "
+ "VALUES (?,?,?,?,NULL)",
+ (f"https://example.com/{i}", "Engineer", 8, "x"),
+ )
+ conn.commit()
+
+ all_jobs = db.get_jobs_by_stage(conn=conn, stage="pending_tailor", min_score=7, limit=0)
+ assert len(all_jobs) == 25
+ capped = db.get_jobs_by_stage(conn=conn, stage="pending_tailor", min_score=7, limit=10)
+ assert len(capped) == 10
diff --git a/tests/test_stale_locks.py b/tests/test_stale_locks.py
new file mode 100644
index 00000000..28a14e0c
--- /dev/null
+++ b/tests/test_stale_locks.py
@@ -0,0 +1,19 @@
+"""F13: stale in_progress locks are recovered at apply startup."""
+import applypilot.database as db
+from applypilot.apply.launcher import reset_stale_locks
+
+
+def test_reset_stale_locks(tmp_path, monkeypatch):
+ monkeypatch.setattr(db, "DB_PATH", tmp_path / "test.db")
+ conn = db.init_db()
+ conn.execute(
+ "INSERT INTO jobs (url, title, apply_status, agent_id) VALUES (?,?,?,?)",
+ ("https://example.com/stuck", "Engineer", "in_progress", "worker-0"),
+ )
+ conn.commit()
+
+ assert reset_stale_locks() == 1
+ row = conn.execute("SELECT apply_status, agent_id FROM jobs WHERE url=?",
+ ("https://example.com/stuck",)).fetchone()
+ assert row["apply_status"] is None
+ assert row["agent_id"] is None
diff --git a/tests/test_validator_watchlist.py b/tests/test_validator_watchlist.py
new file mode 100644
index 00000000..499b1504
--- /dev/null
+++ b/tests/test_validator_watchlist.py
@@ -0,0 +1,44 @@
+"""F10: fabrication watchlist must be word-boundary and profile-aware."""
+from applypilot.scoring.validator import find_watchlist_hits, validate_json_fields
+
+
+def test_scalable_does_not_trip_scala():
+ assert "scala" not in find_watchlist_hits("highly scalable systems", set())
+
+
+def test_guardrails_does_not_trip_rails():
+ assert "rails" not in find_watchlist_hits("implemented guardrails everywhere", set())
+
+
+def test_real_rails_is_flagged():
+ assert "rails" in find_watchlist_hits("Ruby on Rails developer", set())
+
+
+def test_certification_prefix_flagged():
+ assert "certif" in find_watchlist_hits("AWS Certified Solutions Architect", set())
+
+
+def test_cplusplus_flagged_and_whitelistable():
+ assert "c++" in find_watchlist_hits("C++ and Python experience", set())
+ assert "c++" not in find_watchlist_hits("C++ and Python experience", {"c++"})
+
+
+def test_csharp_flagged_and_whitelistable():
+ assert "c#" in find_watchlist_hits("C# backend work", set())
+ assert "c#" not in find_watchlist_hits("C# backend work", {"c#"})
+
+
+def test_end_to_end_validate_json_fields_respects_profile():
+ data = {
+ "title": "Engineer", "summary": "Engineer",
+ "skills": {"languages": ["C++", "Python"]},
+ "experience": [{"company": "Acme", "bullets": ["did things"]}],
+ "education": [{"school": "State U"}], "projects": [{"name": "P"}],
+ }
+ # No skills_boundary -> C++ flagged as fabricated.
+ res_none = validate_json_fields(data, {"resume_facts": {}}, mode="normal")
+ assert any("c++" in e.lower() for e in res_none["errors"])
+ # C++ in profile -> not flagged.
+ res_ok = validate_json_fields(
+ data, {"resume_facts": {}, "skills_boundary": {"languages": ["C++"]}}, mode="normal")
+ assert not any("c++" in e.lower() for e in res_ok["errors"])