diff --git a/CHANGELOG.md b/CHANGELOG.md index 5682b270..249ae2b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to ApplyPilot will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed +- Tailored resumes and cover letters get collision-free filenames, and parallel + apply workers no longer share one upload path (was: one job's resume could be + sent to another employer). +- The fabrication watchlist is word-boundary matched and respects the candidate's + real skills (no more false hits on "scalable"/"guardrails"; legitimate C++/C# + skills allowed). +- Cover-letter PDFs render the actual letter body (were near-empty). +- Sequential `run` no longer silently caps tailoring/cover letters at 20 jobs. +- Jobs stranded `in_progress` by a crashed run are recovered at apply startup. +- One failing site no longer aborts the whole smart-extract stage. + ## [0.2.0] - 2026-02-17 ### Added diff --git a/src/applypilot/apply/launcher.py b/src/applypilot/apply/launcher.py index 341a11a3..d684d223 100644 --- a/src/applypilot/apply/launcher.py +++ b/src/applypilot/apply/launcher.py @@ -228,7 +228,7 @@ def gen_prompt(target_url: str, min_score: int = 7, if txt_path and txt_path.exists(): resume_text = txt_path.read_text(encoding="utf-8") - prompt = prompt_mod.build_prompt(job=job, tailored_resume=resume_text) + prompt = prompt_mod.build_prompt(job=job, tailored_resume=resume_text, worker_id=worker_id) # Release the lock so the job stays available release_lock(job["url"]) @@ -272,6 +272,25 @@ def mark_job(url: str, status: str, reason: str | None = None) -> None: conn.commit() +def reset_stale_locks() -> int: + """Clear jobs stuck in 'in_progress' from a previous crashed run. + + All workers live in this process, so anything still 'in_progress' at startup + is by definition stale (the worker that held it is gone). Returns NULL so the + job is eligible again. + + Returns: + Number of stale locks cleared. + """ + conn = get_connection() + cursor = conn.execute( + "UPDATE jobs SET apply_status = NULL, agent_id = NULL " + "WHERE apply_status = 'in_progress'" + ) + conn.commit() + return cursor.rowcount + + def reset_failed() -> int: """Reset all failed jobs so they can be retried. @@ -310,11 +329,16 @@ def run_job(job: dict, port: int, worker_id: int = 0, if txt_path and txt_path.exists(): resume_text = txt_path.read_text(encoding="utf-8") + # Reset the worker dir FIRST: build_prompt copies the resume/cover PDFs into + # APPLY_WORKER_DIR/worker-{id}/current, which reset_worker_dir would wipe. + worker_dir = reset_worker_dir(worker_id) + # Build the prompt agent_prompt = prompt_mod.build_prompt( job=job, tailored_resume=resume_text, dry_run=dry_run, + worker_id=worker_id, ) # Write per-worker MCP config @@ -347,8 +371,6 @@ def run_job(job: dict, port: int, worker_id: int = 0, env.pop("CLAUDECODE", None) env.pop("CLAUDE_CODE_ENTRYPOINT", None) - worker_dir = reset_worker_dir(worker_id) - update_state(worker_id, status="applying", job_title=job["title"], company=job.get("site", ""), score=job.get("fit_score", 0), start_time=time.time(), actions=0, last_action="starting") @@ -674,6 +696,11 @@ def main(limit: int = 1, target_url: str | None = None, config.ensure_dirs() console = Console() + # Recover jobs stranded 'in_progress' by a previous crashed run. + recovered = reset_stale_locks() + if recovered: + console.print(f"[yellow]Recovered {recovered} stale in-progress job(s)[/yellow]") + if continuous: effective_limit = 0 mode_label = "continuous" diff --git a/src/applypilot/apply/prompt.py b/src/applypilot/apply/prompt.py index 37c3790a..04852c94 100644 --- a/src/applypilot/apply/prompt.py +++ b/src/applypilot/apply/prompt.py @@ -419,7 +419,8 @@ def _build_captcha_section() -> str: def build_prompt(job: dict, tailored_resume: str, cover_letter: str | None = None, - dry_run: bool = False) -> str: + dry_run: bool = False, + worker_id: int = 0) -> str: """Build the full instruction prompt for the apply agent. Loads the user profile and search config internally. All personal data @@ -451,7 +452,8 @@ def build_prompt(job: dict, tailored_resume: str, # Copy to a clean filename for upload (recruiters see the filename) full_name = personal["full_name"] name_slug = full_name.replace(" ", "_") - dest_dir = config.APPLY_WORKER_DIR / "current" + # Per-worker upload dir so parallel workers don't race on one shared path. + dest_dir = config.APPLY_WORKER_DIR / f"worker-{worker_id}" / "current" dest_dir.mkdir(parents=True, exist_ok=True) upload_pdf = dest_dir / f"{name_slug}_Resume.pdf" shutil.copy(str(src_pdf), str(upload_pdf)) diff --git a/src/applypilot/discovery/smartextract.py b/src/applypilot/discovery/smartextract.py index cf49a9a2..7691f2eb 100644 --- a/src/applypilot/discovery/smartextract.py +++ b/src/applypilot/discovery/smartextract.py @@ -1031,6 +1031,7 @@ def _run_all( results: list[dict] = [] total_new = 0 total_existing = 0 + errors = 0 def _process_result(r: dict, target: dict) -> None: nonlocal total_new, total_existing @@ -1052,7 +1053,13 @@ def _process_result(r: dict, target: dict) -> None: } for future in as_completed(future_to_target): target = future_to_target[future] - r = future.result() + # One flaky site must not abort the whole stage. + try: + r = future.result() + except Exception as e: + log.warning("Site %s failed: %s -- continuing", target["name"], e) + errors += 1 + continue results.append(r) _process_result(r, target) else: @@ -1063,7 +1070,12 @@ def _process_result(r: dict, target: dict) -> None: label = f"{target['name']} [{target['query']}]" log.info("[%d/%d] %s", i + 1, len(targets), label) - r = _run_one_site(target["name"], target["url"]) + try: + r = _run_one_site(target["name"], target["url"]) + except Exception as e: + log.warning("Site %s failed: %s -- continuing", target["name"], e) + errors += 1 + continue results.append(r) _process_result(r, target) @@ -1080,7 +1092,7 @@ def _process_result(r: dict, target: dict) -> None: log.info("%d/%d PASS", passed, len(results)) return {"total_new": total_new, "total_existing": total_existing, - "passed": passed, "total": len(results)} + "passed": passed, "total": len(results), "errors": errors} # -- Public entry point ------------------------------------------------------ diff --git a/src/applypilot/pipeline.py b/src/applypilot/pipeline.py index 29881c5f..a3e1ed32 100644 --- a/src/applypilot/pipeline.py +++ b/src/applypilot/pipeline.py @@ -125,7 +125,7 @@ def _run_tailor(min_score: int = 7, validation_mode: str = "normal") -> dict: """Stage: Resume tailoring — generate tailored resumes for high-fit jobs.""" try: from applypilot.scoring.tailor import run_tailoring - run_tailoring(min_score=min_score, validation_mode=validation_mode) + run_tailoring(min_score=min_score, limit=0, validation_mode=validation_mode) return {"status": "ok"} except Exception as e: log.error("Tailoring failed: %s", e) @@ -136,7 +136,7 @@ def _run_cover(min_score: int = 7, validation_mode: str = "normal") -> dict: """Stage: Cover letter generation.""" try: from applypilot.scoring.cover_letter import run_cover_letters - run_cover_letters(min_score=min_score, validation_mode=validation_mode) + run_cover_letters(min_score=min_score, limit=0, validation_mode=validation_mode) return {"status": "ok"} except Exception as e: log.error("Cover letter generation failed: %s", e) diff --git a/src/applypilot/scoring/cover_letter.py b/src/applypilot/scoring/cover_letter.py index c16cdd5f..199f7cd8 100644 --- a/src/applypilot/scoring/cover_letter.py +++ b/src/applypilot/scoring/cover_letter.py @@ -14,6 +14,7 @@ from applypilot.config import COVER_LETTER_DIR, RESUME_PATH, load_profile from applypilot.database import get_connection, get_jobs_by_stage from applypilot.llm import get_client +from applypilot.scoring.tailor import make_filename_prefix from applypilot.scoring.validator import ( BANNED_WORDS, LLM_LEAK_PHRASES, @@ -201,16 +202,21 @@ def run_cover_letters(min_score: int = 7, limit: int = 20, resume_text = RESUME_PATH.read_text(encoding="utf-8") conn = get_connection() - # Fetch jobs that have tailored resumes but no cover letter yet - jobs = conn.execute( + # Fetch jobs that have tailored resumes but no cover letter yet. + # limit <= 0 means "all": a literal LIMIT 0 would return zero rows. + sql = ( "SELECT * FROM jobs " "WHERE fit_score >= ? AND tailored_resume_path IS NOT NULL " "AND full_description IS NOT NULL " "AND (cover_letter_path IS NULL OR cover_letter_path = '') " "AND COALESCE(cover_attempts, 0) < ? " - "ORDER BY fit_score DESC LIMIT ?", - (min_score, MAX_ATTEMPTS, limit), - ).fetchall() + "ORDER BY fit_score DESC" + ) + params: list = [min_score, MAX_ATTEMPTS] + if limit > 0: + sql += " LIMIT ?" + params.append(limit) + jobs = conn.execute(sql, params).fetchall() if not jobs: log.info("No jobs needing cover letters (score >= %d).", min_score) @@ -237,19 +243,19 @@ def run_cover_letters(min_score: int = 7, limit: int = 20, letter = generate_cover_letter(resume_text, job, profile, validation_mode=validation_mode) - # Build safe filename prefix - safe_title = re.sub(r"[^\w\s-]", "", job["title"])[:50].strip().replace(" ", "_") - safe_site = re.sub(r"[^\w\s-]", "", job["site"])[:20].strip().replace(" ", "_") - prefix = f"{safe_site}_{safe_title}" + # Build safe, collision-free filename prefix + prefix = make_filename_prefix(job) cl_path = COVER_LETTER_DIR / f"{prefix}_CL.txt" cl_path.write_text(letter, encoding="utf-8") - # Generate PDF (best-effort) + # Generate PDF (best-effort). Use the letter renderer, NOT the resume + # converter, which drops a cover letter's body. pdf_path = None try: - from applypilot.scoring.pdf import convert_to_pdf - pdf_path = str(convert_to_pdf(cl_path)) + from applypilot.scoring.pdf import convert_letter_to_pdf + applicant_name = profile.get("personal", {}).get("full_name", "") + pdf_path = str(convert_letter_to_pdf(cl_path, applicant_name=applicant_name)) except Exception: log.debug("PDF generation failed for %s", cl_path, exc_info=True) diff --git a/src/applypilot/scoring/pdf.py b/src/applypilot/scoring/pdf.py index 2b87b673..ae4c9520 100644 --- a/src/applypilot/scoring/pdf.py +++ b/src/applypilot/scoring/pdf.py @@ -5,6 +5,7 @@ """ import logging +import re from pathlib import Path from applypilot.config import TAILORED_DIR @@ -390,6 +391,43 @@ def convert_to_pdf( return out +def _letter_html(text: str, applicant_name: str) -> str: + """Build a simple, correctly-structured HTML letter. + + Cover letters have no resume structure (no SUMMARY line, no ALL-CAPS section + headers), so parse_resume() drops their body. This renders the prose as + paragraphs under a modest name header, escaping all content. + """ + import html as _html + + paragraphs = [p.strip() for p in re.split(r"\n\s*\n", text.strip()) if p.strip()] + body = "\n".join( + f"

{_html.escape(p).replace(chr(10), '
')}

" for p in paragraphs + ) + name = _html.escape(applicant_name) + return f""" + +
{name}
+{body} +""" + + +def convert_letter_to_pdf(txt_path: Path, applicant_name: str, + output_path: Path | None = None) -> Path: + """Render a cover-letter .txt to a properly formatted PDF.""" + txt_path = Path(txt_path) + html = _letter_html(txt_path.read_text(encoding="utf-8"), applicant_name) + out = Path(output_path or txt_path.with_suffix(".pdf")) + render_pdf(html, str(out)) + log.info("Cover letter PDF generated: %s", out) + return out + + def batch_convert(limit: int = 50) -> int: """Convert .txt files in TAILORED_DIR that don't have corresponding PDFs. diff --git a/src/applypilot/scoring/tailor.py b/src/applypilot/scoring/tailor.py index 352fb5ff..1aef051a 100644 --- a/src/applypilot/scoring/tailor.py +++ b/src/applypilot/scoring/tailor.py @@ -9,6 +9,7 @@ to avoid apologetic spirals. """ +import hashlib import json import logging import re @@ -32,6 +33,19 @@ MAX_ATTEMPTS = 5 # max cross-run retries before giving up +def make_filename_prefix(job: dict) -> str: + """Build a collision-free filename prefix for a job's generated artifacts. + + Two postings with the same title from the same board would otherwise share + a path and overwrite each other, sending one employer the resume tailored + for another. A short hash of the (unique) job URL disambiguates them. + """ + safe_title = re.sub(r"[^\w\s-]", "", job["title"])[:50].strip().replace(" ", "_") + safe_site = re.sub(r"[^\w\s-]", "", job["site"])[:20].strip().replace(" ", "_") + url_hash = hashlib.sha1(job["url"].encode()).hexdigest()[:8] + return f"{safe_site}_{safe_title}_{url_hash}" + + # ── Prompt Builders (profile-driven) ────────────────────────────────────── def _build_tailor_prompt(profile: dict) -> str: @@ -490,10 +504,8 @@ def run_tailoring(min_score: int = 7, limit: int = 20, tailored, report = tailor_resume(resume_text, job, profile, validation_mode=validation_mode) - # Build safe filename prefix - safe_title = re.sub(r"[^\w\s-]", "", job["title"])[:50].strip().replace(" ", "_") - safe_site = re.sub(r"[^\w\s-]", "", job["site"])[:20].strip().replace(" ", "_") - prefix = f"{safe_site}_{safe_title}" + # Build safe, collision-free filename prefix + prefix = make_filename_prefix(job) # Save tailored resume text txt_path = TAILORED_DIR / f"{prefix}.txt" diff --git a/src/applypilot/scoring/validator.py b/src/applypilot/scoring/validator.py index abb8f89d..7091d4db 100644 --- a/src/applypilot/scoring/validator.py +++ b/src/applypilot/scoring/validator.py @@ -58,15 +58,45 @@ # Known fabrication markers: completely unrelated tools/languages. # Reasonable stretches (K8s, Terraform, Redis, Kafka etc.) are ALLOWED. -FABRICATION_WATCHLIST: set[str] = { +# EXACT_TERMS match on word boundaries; PREFIX_TERMS match any word starting +# with them (so "certif" catches certified/certification). +EXACT_TERMS: set[str] = { # Languages with zero relation to the candidate's stack "c#", "c++", "golang", "rust", "ruby", "kotlin", "swift", "scala", "matlab", # Frameworks for wrong languages "spring", "django", "rails", "angular", "vue", "svelte", # Hard lies: certifications can't be stretched - "certif", "certified", "pmp", "scrum master", "aws certified", + "certified", "pmp", "scrum master", "aws certified", } +PREFIX_TERMS: set[str] = {"certif"} + +# Kept for backwards-compat (tailor.py imports this name). +FABRICATION_WATCHLIST: set[str] = EXACT_TERMS | PREFIX_TERMS + + +def find_watchlist_hits(text: str, allowed: set[str]) -> list[str]: + """Return fabrication-watchlist terms that appear in ``text``. + + Uses word-boundary matching so "scala" does not fire on "scalable" and + "rails" does not fire on "guardrails". Terms present in ``allowed`` (the + candidate's real skills) are never flagged. ``allowed`` is matched by exact + lowercased membership against the watchlist term. + """ + low = text.lower() + hits: list[str] = [] + for term in EXACT_TERMS: + if term in allowed: + continue + # Boundaries that also respect '+' and '#' so c++/c# match correctly. + if re.search(rf"(? dic # Collect all text for bulk checks all_text_parts: list[str] = [data["summary"]] - # Skills: check for fabrication (always enforced) + # Skills: check for fabrication (always enforced), but never flag a tool the + # candidate actually lists in their profile. + allowed = _build_skills_set(profile) if isinstance(data["skills"], dict): - skills_text = " ".join(str(v) for v in data["skills"].values()).lower() - for fake in FABRICATION_WATCHLIST: - if len(fake) <= 2: - continue - if fake in skills_text: - errors.append(f"Fabricated skill: '{fake}'") + skills_text = " ".join(str(v) for v in data["skills"].values()) + for fake in find_watchlist_hits(skills_text, allowed): + errors.append(f"Fabricated skill: '{fake}'") # Experience: preserved companies must be present (always enforced) resume_facts = profile.get("resume_facts", {}) @@ -243,23 +272,19 @@ def validate_tailored_resume(text: str, profile: dict, original_text: str = "") warnings.append("Phone missing -- will be injected") # 7. Scan TECHNICAL SKILLS section for fabricated tools + allowed = _build_skills_set(profile) skills_start = text_lower.find("technical skills") skills_end = text_lower.find("experience", skills_start) if skills_start != -1 else -1 if skills_start != -1 and skills_end != -1: skills_block = text_lower[skills_start:skills_end] - for fake in FABRICATION_WATCHLIST: - if len(fake) <= 2: - continue - if fake in skills_block: - errors.append(f"FABRICATED SKILL in Technical Skills: '{fake}'") + for fake in find_watchlist_hits(skills_block, allowed): + errors.append(f"FABRICATED SKILL in Technical Skills: '{fake}'") # 8. Scan full document for fabrication watchlist items not in original if original_text: - original_lower = original_text.lower() - for fake in FABRICATION_WATCHLIST: - if len(fake) <= 2: - continue - if fake in text_lower and fake not in original_lower: + original_hits = set(find_watchlist_hits(original_text, allowed)) + for fake in find_watchlist_hits(text, allowed): + if fake not in original_hits: warnings.append(f"New tool/skill appeared: '{fake}' (not in original)") # 9. Em dashes (should be auto-fixed by sanitize_text, but safety net) diff --git a/tests/test_cover_pdf.py b/tests/test_cover_pdf.py new file mode 100644 index 00000000..47f1fe00 --- /dev/null +++ b/tests/test_cover_pdf.py @@ -0,0 +1,23 @@ +"""F11: cover-letter PDFs render the body (not garbage) and escape content.""" +from applypilot.scoring.pdf import _letter_html + + +def test_letter_html_keeps_all_paragraphs(): + letter = "Dear Hiring Manager,\n\nI built systems with List.\n\nSincerely,\nJane" + html = _letter_html(letter, "Jane Doe") + assert "Dear Hiring Manager," in html + assert "Sincerely," in html + # Middle paragraph survives. + assert "I built systems" in html + + +def test_letter_html_escapes_dangerous_content(): + html = _letter_html("I used List and .", "Jane") + assert "List<String>" in html + assert "" not in html + assert "<script>" in html + + +def test_name_appears_once_as_header(): + html = _letter_html("Body text here.", "Jane Doe") + assert html.count('class="name">Jane Doe') == 1 diff --git a/tests/test_filenames.py b/tests/test_filenames.py new file mode 100644 index 00000000..6d796c4e --- /dev/null +++ b/tests/test_filenames.py @@ -0,0 +1,40 @@ +"""F9: per-job artifact filenames must not collide; uploads are per-worker.""" +import applypilot.config as config +import applypilot.apply.prompt as prompt_mod +from applypilot.scoring.tailor import make_filename_prefix + + +def test_same_title_site_different_url_distinct_prefix(): + a = make_filename_prefix({"title": "Software Engineer", "site": "linkedin", + "url": "https://example.com/a"}) + b = make_filename_prefix({"title": "Software Engineer", "site": "linkedin", + "url": "https://example.com/b"}) + assert a != b + + +def test_same_job_stable_prefix(): + job = {"title": "Software Engineer", "site": "linkedin", "url": "https://example.com/a"} + assert make_filename_prefix(job) == make_filename_prefix(job) + + +def _fixture_profile(): + return { + "personal": {"full_name": "Jane Doe", "email": "j@example.com", + "phone": "5551234567", "city": "Austin"}, + "work_authorization": {}, + "compensation": {"salary_expectation": "100000"}, + "experience": {}, "availability": {}, "eeo": {}, "skills_boundary": {}, + } + + +def test_upload_dir_is_per_worker(tmp_path, monkeypatch): + monkeypatch.setattr(config, "load_profile", _fixture_profile) + monkeypatch.setattr(config, "load_search_config", lambda: {}) + monkeypatch.setattr(config, "APPLY_WORKER_DIR", tmp_path) + pdf = tmp_path / "x.pdf" + pdf.write_bytes(b"%PDF-1.4 dummy") + job = {"url": "https://example.com/j", "title": "Engineer", "site": "linkedin", + "application_url": None, "fit_score": 8, "tailored_resume_path": str(tmp_path / "x.txt")} + out = prompt_mod.build_prompt(job=job, tailored_resume="r", worker_id=2) + assert "worker-2" in out + assert (tmp_path / "worker-2" / "current" / "Jane_Doe_Resume.pdf").exists() diff --git a/tests/test_smartextract_isolation.py b/tests/test_smartextract_isolation.py new file mode 100644 index 00000000..6f1c913e --- /dev/null +++ b/tests/test_smartextract_isolation.py @@ -0,0 +1,25 @@ +"""F14: one flaky site must not abort the whole smart-extract stage.""" +import applypilot.database as db +import applypilot.discovery.smartextract as se + + +def test_one_failing_site_does_not_abort(tmp_path, monkeypatch): + monkeypatch.setattr(db, "DB_PATH", tmp_path / "test.db") + + def fake_run_one_site(name, url): + if name == "A": + raise RuntimeError("network timeout") + return { + "name": "B", "status": "PASS", "strategy": "test", + "total": 1, "titles": 1, + "jobs": [{"url": "https://example.com/1", "title": "T", + "salary": None, "description": None, "location": "Remote"}], + } + + monkeypatch.setattr(se, "_run_one_site", fake_run_one_site) + stats = se._run_all( + [{"name": "A", "url": "u"}, {"name": "B", "url": "v"}], [], [], workers=1) + + assert stats["errors"] == 1 + assert stats["passed"] == 1 + assert stats["total_new"] == 1 diff --git a/tests/test_stage_limits.py b/tests/test_stage_limits.py new file mode 100644 index 00000000..524c6803 --- /dev/null +++ b/tests/test_stage_limits.py @@ -0,0 +1,19 @@ +"""F12: sequential pipeline must not silently cap tailor/cover at 20.""" +import applypilot.database as db + + +def test_get_jobs_by_stage_limit_zero_is_unlimited(tmp_path, monkeypatch): + monkeypatch.setattr(db, "DB_PATH", tmp_path / "test.db") + conn = db.init_db() + for i in range(25): + conn.execute( + "INSERT INTO jobs (url, title, fit_score, full_description, tailored_resume_path) " + "VALUES (?,?,?,?,NULL)", + (f"https://example.com/{i}", "Engineer", 8, "x"), + ) + conn.commit() + + all_jobs = db.get_jobs_by_stage(conn=conn, stage="pending_tailor", min_score=7, limit=0) + assert len(all_jobs) == 25 + capped = db.get_jobs_by_stage(conn=conn, stage="pending_tailor", min_score=7, limit=10) + assert len(capped) == 10 diff --git a/tests/test_stale_locks.py b/tests/test_stale_locks.py new file mode 100644 index 00000000..28a14e0c --- /dev/null +++ b/tests/test_stale_locks.py @@ -0,0 +1,19 @@ +"""F13: stale in_progress locks are recovered at apply startup.""" +import applypilot.database as db +from applypilot.apply.launcher import reset_stale_locks + + +def test_reset_stale_locks(tmp_path, monkeypatch): + monkeypatch.setattr(db, "DB_PATH", tmp_path / "test.db") + conn = db.init_db() + conn.execute( + "INSERT INTO jobs (url, title, apply_status, agent_id) VALUES (?,?,?,?)", + ("https://example.com/stuck", "Engineer", "in_progress", "worker-0"), + ) + conn.commit() + + assert reset_stale_locks() == 1 + row = conn.execute("SELECT apply_status, agent_id FROM jobs WHERE url=?", + ("https://example.com/stuck",)).fetchone() + assert row["apply_status"] is None + assert row["agent_id"] is None diff --git a/tests/test_validator_watchlist.py b/tests/test_validator_watchlist.py new file mode 100644 index 00000000..499b1504 --- /dev/null +++ b/tests/test_validator_watchlist.py @@ -0,0 +1,44 @@ +"""F10: fabrication watchlist must be word-boundary and profile-aware.""" +from applypilot.scoring.validator import find_watchlist_hits, validate_json_fields + + +def test_scalable_does_not_trip_scala(): + assert "scala" not in find_watchlist_hits("highly scalable systems", set()) + + +def test_guardrails_does_not_trip_rails(): + assert "rails" not in find_watchlist_hits("implemented guardrails everywhere", set()) + + +def test_real_rails_is_flagged(): + assert "rails" in find_watchlist_hits("Ruby on Rails developer", set()) + + +def test_certification_prefix_flagged(): + assert "certif" in find_watchlist_hits("AWS Certified Solutions Architect", set()) + + +def test_cplusplus_flagged_and_whitelistable(): + assert "c++" in find_watchlist_hits("C++ and Python experience", set()) + assert "c++" not in find_watchlist_hits("C++ and Python experience", {"c++"}) + + +def test_csharp_flagged_and_whitelistable(): + assert "c#" in find_watchlist_hits("C# backend work", set()) + assert "c#" not in find_watchlist_hits("C# backend work", {"c#"}) + + +def test_end_to_end_validate_json_fields_respects_profile(): + data = { + "title": "Engineer", "summary": "Engineer", + "skills": {"languages": ["C++", "Python"]}, + "experience": [{"company": "Acme", "bullets": ["did things"]}], + "education": [{"school": "State U"}], "projects": [{"name": "P"}], + } + # No skills_boundary -> C++ flagged as fabricated. + res_none = validate_json_fields(data, {"resume_facts": {}}, mode="normal") + assert any("c++" in e.lower() for e in res_none["errors"]) + # C++ in profile -> not flagged. + res_ok = validate_json_fields( + data, {"resume_facts": {}, "skills_boundary": {"languages": ["C++"]}}, mode="normal") + assert not any("c++" in e.lower() for e in res_ok["errors"])