Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Normalize line endings to LF in the repository and on checkout
* text=auto eol=lf

# Binary files — never touch line endings
*.png binary
*.jpg binary
*.jpeg binary
*.gif binary
*.ico binary
*.pdf binary
*.db binary
20 changes: 18 additions & 2 deletions src/applypilot/apply/chrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
worker profile setup/cloning, and cross-platform process cleanup.
"""

import hashlib
import json
import logging
import os
import platform
import shutil
import subprocess
Expand All @@ -17,8 +19,22 @@

logger = logging.getLogger(__name__)

# CDP port base — each worker uses BASE_CDP_PORT + worker_id
BASE_CDP_PORT = 9222

def _instance_base_port() -> int:
"""Derive a stable CDP base port from the instance name so concurrent
apply runs on different instances don't clobber each other's Chrome.
Maps each instance into a 10-port window in 9200-9399.
"""
instance_dir = os.environ.get("APPLYPILOT_DIR", "")
name = Path(instance_dir).name if instance_dir else "default"
offset = int(hashlib.md5(name.encode()).hexdigest()[:2], 16) % 20 * 10
return 9200 + offset


# CDP port base — each worker uses BASE_CDP_PORT + worker_id.
# Derived from the instance name so concurrent apply runs on different
# instances don't collide on the same port.
BASE_CDP_PORT = _instance_base_port()

# Track Chrome processes per worker for cleanup
_chrome_procs: dict[int, subprocess.Popen] = {}
Expand Down
3 changes: 3 additions & 0 deletions src/applypilot/apply/dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

import logging
import re
import threading
import time
from dataclasses import dataclass, field
Expand Down Expand Up @@ -81,6 +82,8 @@ def add_event(msg: str) -> None:
Args:
msg: Rich markup string describing the event.
"""
plain = re.sub(r'\[/?[^\]]*\]', '', msg)
logger.info("%s", plain)
ts = datetime.now().strftime("%H:%M:%S")
with _lock:
_events.append(f"[dim]{ts}[/dim] {msg}")
Expand Down
179 changes: 102 additions & 77 deletions src/applypilot/apply/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@
from datetime import datetime, timezone
from pathlib import Path

from rich.console import Console
from rich.live import Live

from applypilot import config
from applypilot.database import get_connection
from applypilot.database import get_connection, is_already_applied, record_applied, init_applied_db
from applypilot.apply import chrome, dashboard, prompt as prompt_mod
from applypilot.apply.chrome import (
launch_chrome, cleanup_worker, kill_all_chrome,
Expand Down Expand Up @@ -71,7 +68,7 @@ def _make_mcp_config(cdp_port: int) -> dict:
"command": "npx",
"args": [
"@playwright/mcp@latest",
f"--cdp-endpoint=http://localhost:{cdp_port}",
f"--cdp-endpoint=http://127.0.0.1:{cdp_port}",
f"--viewport-size={config.DEFAULTS['viewport']}",
],
},
Expand Down Expand Up @@ -147,7 +144,8 @@ def acquire_job(target_url: str | None = None, min_score: int = 7,

# Skip manual ATS sites (unsolvable CAPTCHAs)
from applypilot.config import is_manual_ats
apply_url = row["application_url"] or row["url"]
_raw_app_url = row["application_url"]
apply_url = (_raw_app_url if _raw_app_url and _raw_app_url != "None" else None) or row["url"]
if is_manual_ats(apply_url):
conn.execute(
"UPDATE jobs SET apply_status = 'manual', apply_error = 'manual ATS' WHERE url = ?",
Expand All @@ -157,6 +155,17 @@ def acquire_job(target_url: str | None = None, min_score: int = 7,
logger.info("Skipping manual ATS: %s", row["url"][:80])
return None

# Cross-instance dedup — skip if already applied via another instance
if is_already_applied(row["url"], row["application_url"]):
conn.execute(
"UPDATE jobs SET apply_status = 'skip_dedup', "
"apply_error = 'applied via another instance' WHERE url = ?",
(row["url"],),
)
conn.commit()
logger.info("Skipping (cross-instance dedup): %s", row["url"][:80])
return None

now = datetime.now(timezone.utc).isoformat()
conn.execute("""
UPDATE jobs SET apply_status = 'in_progress',
Expand Down Expand Up @@ -403,6 +412,10 @@ def run_job(job: dict, port: int, worker_id: int = 0,
if bt == "text":
text_parts.append(block["text"])
lf.write(block["text"] + "\n")
# Emit agent text to main log (truncated to avoid flooding)
for tline in block["text"].splitlines():
if tline.strip():
logger.info("[W%d] %s", worker_id, tline[:200])
elif bt == "tool_use":
name = (
block.get("name", "")
Expand All @@ -422,6 +435,7 @@ def run_job(job: dict, port: int, worker_id: int = 0,
desc = name

lf.write(f" >> {desc}\n")
logger.info("[W%d] >> %s", worker_id, desc)
ws = get_state(worker_id)
cur_actions = ws.actions if ws else 0
update_state(worker_id,
Expand Down Expand Up @@ -610,6 +624,8 @@ def worker_loop(worker_id: int = 0, limit: int = 1,
continue
elif result == "applied":
mark_result(job["url"], "applied", duration_ms=duration_ms)
instance_name = os.path.basename(os.environ.get("APPLYPILOT_DIR", "default"))
record_applied(job, instance=instance_name)
applied += 1
update_state(worker_id, jobs_applied=applied,
jobs_done=applied + failed)
Expand Down Expand Up @@ -672,7 +688,7 @@ def main(limit: int = 1, target_url: str | None = None,
_stop_event.clear()

config.ensure_dirs()
console = Console()
init_applied_db()

if continuous:
effective_limit = 0
Expand All @@ -686,8 +702,8 @@ def main(limit: int = 1, target_url: str | None = None,
init_worker(i)

worker_label = f"{workers} worker{'s' if workers > 1 else ''}"
console.print(f"Launching apply pipeline ({mode_label}, {worker_label}, poll every {POLL_INTERVAL}s)...")
console.print("[dim]Ctrl+C = skip current job(s) | Ctrl+C x2 = stop[/dim]")
logger.info("Launching apply pipeline (%s, %s, poll every %ds)...",
mode_label, worker_label, POLL_INTERVAL)

# Double Ctrl+C handler
_ctrl_c_count = 0
Expand All @@ -696,14 +712,13 @@ def _sigint_handler(sig, frame):
nonlocal _ctrl_c_count
_ctrl_c_count += 1
if _ctrl_c_count == 1:
console.print("\n[yellow]Skipping current job(s)... (Ctrl+C again to STOP)[/yellow]")
# Kill all active Claude processes to skip current jobs
logger.warning("Skipping current job(s)... (Ctrl+C again to STOP)")
with _claude_lock:
for wid, cproc in list(_claude_procs.items()):
if cproc.poll() is None:
_kill_process_tree(cproc.pid)
else:
console.print("\n[red bold]STOPPING[/red bold]")
logger.warning("STOPPING")
_stop_event.set()
with _claude_lock:
for wid, cproc in list(_claude_procs.items()):
Expand All @@ -714,78 +729,88 @@ def _sigint_handler(sig, frame):

signal.signal(signal.SIGINT, _sigint_handler)

def _run_workers() -> tuple[int, int]:
if workers == 1:
return worker_loop(
worker_id=0,
limit=effective_limit,
target_url=target_url,
min_score=min_score,
headless=headless,
model=model,
dry_run=dry_run,
)

if effective_limit:
base = effective_limit // workers
extra = effective_limit % workers
limits = [base + (1 if i < extra else 0) for i in range(workers)]
else:
limits = [0] * workers

with ThreadPoolExecutor(max_workers=workers,
thread_name_prefix="apply-worker") as executor:
futures = {
executor.submit(
worker_loop,
worker_id=i,
limit=limits[i],
target_url=target_url,
min_score=min_score,
headless=headless,
model=model,
dry_run=dry_run,
): i
for i in range(workers)
}
results: list[tuple[int, int]] = []
for future in as_completed(futures):
wid = futures[future]
try:
results.append(future.result())
except Exception:
logger.exception("Worker %d crashed", wid)
results.append((0, 0))

return sum(r[0] for r in results), sum(r[1] for r in results)

is_tty = sys.stdout.isatty()

try:
with Live(render_full(), console=console, refresh_per_second=2) as live:
# Daemon thread for display refresh only (no business logic)
if is_tty:
from rich.console import Console as _Console
_console = _Console()
_console.print("[dim]Ctrl+C = skip current job(s) | Ctrl+C x2 = stop[/dim]")

from rich.live import Live
_dashboard_running = True

def _refresh():
def _refresh(live):
while _dashboard_running:
live.update(render_full())
time.sleep(0.5)

refresh_thread = threading.Thread(target=_refresh, daemon=True)
refresh_thread.start()

if workers == 1:
# Single worker — run directly in main thread
total_applied, total_failed = worker_loop(
worker_id=0,
limit=effective_limit,
target_url=target_url,
min_score=min_score,
headless=headless,
model=model,
dry_run=dry_run,
)
else:
# Multi-worker — distribute limit across workers
if effective_limit:
base = effective_limit // workers
extra = effective_limit % workers
limits = [base + (1 if i < extra else 0)
for i in range(workers)]
else:
limits = [0] * workers # continuous mode

with ThreadPoolExecutor(max_workers=workers,
thread_name_prefix="apply-worker") as executor:
futures = {
executor.submit(
worker_loop,
worker_id=i,
limit=limits[i],
target_url=target_url,
min_score=min_score,
headless=headless,
model=model,
dry_run=dry_run,
): i
for i in range(workers)
}

results: list[tuple[int, int]] = []
for future in as_completed(futures):
wid = futures[future]
try:
results.append(future.result())
except Exception:
logger.exception("Worker %d crashed", wid)
results.append((0, 0))

total_applied = sum(r[0] for r in results)
total_failed = sum(r[1] for r in results)

_dashboard_running = False
refresh_thread.join(timeout=2)
live.update(render_full())

totals = get_totals()
console.print(
f"\n[bold]Done: {total_applied} applied, {total_failed} failed "
f"(${totals['cost']:.3f})[/bold]"
)
console.print(f"Logs: {config.LOG_DIR}")
with Live(render_full(), console=_console, refresh_per_second=2) as live:
refresh_thread = threading.Thread(
target=_refresh, args=(live,), daemon=True)
refresh_thread.start()
total_applied, total_failed = _run_workers()
_dashboard_running = False
refresh_thread.join(timeout=2)
live.update(render_full())

totals = get_totals()
_console.print(
f"\n[bold]Done: {total_applied} applied, {total_failed} failed "
f"(${totals['cost']:.3f})[/bold]"
)
_console.print(f"Logs: {config.LOG_DIR}")
else:
total_applied, total_failed = _run_workers()
totals = get_totals()
logger.info("Done: %d applied, %d failed ($%.3f)",
total_applied, total_failed, totals['cost'])
logger.info("Logs: %s", config.LOG_DIR)

except KeyboardInterrupt:
pass
Expand Down
4 changes: 2 additions & 2 deletions src/applypilot/apply/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ def build_prompt(job: dict, tailored_resume: str,
prompt = f"""You are an autonomous job application agent. Your ONE mission: get this candidate an interview. You have all the information and tools. Think strategically. Act decisively. Submit the application.

== JOB ==
URL: {job.get('application_url') or job['url']}
URL: {job.get('application_url') if job.get('application_url') and job.get('application_url') != 'None' else job['url']}
Title: {job['title']}
Company: {job.get('site', 'Unknown')}
Fit Score: {job.get('fit_score', 'N/A')}/10
Expand Down Expand Up @@ -568,7 +568,7 @@ def build_prompt(job: dict, tailored_resume: str,
5. Login wall?
5a. FIRST: check the URL. If you landed on {', '.join(blocked_sso)}, or any SSO/OAuth page -> STOP. Output RESULT:FAILED:sso_required. Do NOT try to sign in to Google/Microsoft/SSO.
5b. Check for popups. Run browser_tabs action "list". If a new tab/window appeared (login popup), switch to it with browser_tabs action "select". Check the URL there too -- if it's SSO -> RESULT:FAILED:sso_required.
5c. Regular login form (employer's own site)? Try sign in: {personal['email']} / {personal.get('password', '')}
5c. Regular login form (employer's own site)? Try sign in: {personal['email']} / {personal.get('site_passwords', {}).get(job.get('site', ''), personal.get('password', ''))}
5d. After clicking Login/Sign-in: run CAPTCHA DETECT. Login pages frequently have invisible CAPTCHAs that silently block form submissions. If found, solve it then retry login.
5e. Sign in failed? Try sign up with same email and password.
5f. Need email verification? Use search_emails + read_email to get the code.
Expand Down
16 changes: 15 additions & 1 deletion src/applypilot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,26 @@

from applypilot import __version__

import os as _os, sys as _sys, pathlib as _pathlib

_log_handlers: list[logging.Handler] = [logging.StreamHandler(_sys.stderr)]
_log_file_path = _os.environ.get("APPLYPILOT_LOG_FILE")
if _log_file_path:
_pathlib.Path(_log_file_path).parent.mkdir(parents=True, exist_ok=True)
_log_handlers.append(logging.FileHandler(_log_file_path, mode="a", encoding="utf-8"))

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
format="%(asctime)s %(levelname)s %(message)s",
datefmt="%H:%M:%S",
handlers=_log_handlers,
)

# Third-party HTTP libraries log every request/response at INFO by default.
# Our own WARNING already covers every 429 retry, so suppress their chatter.
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpcore").setLevel(logging.WARNING)

app = typer.Typer(
name="applypilot",
help="AI-powered end-to-end job application pipeline.",
Expand Down
Loading