diff --git a/src/roomieorder/purchase.py b/src/roomieorder/purchase.py index 12e794e..2be280f 100644 --- a/src/roomieorder/purchase.py +++ b/src/roomieorder/purchase.py @@ -268,6 +268,13 @@ class BasePurchaser(Generic[SourceT]): # Sold-out / not-carried / not-found markers that drive the Amazon fallback. OUT_OF_STOCK_MARKERS: tuple[str, ...] = () NOT_FOUND_MARKERS: tuple[str, ...] = () + # Positive order-confirmation signals, checked alongside the order id / total. + # A match means the store rendered a success page, so a placed order is no + # longer misread as `needs_review` when the id/total selectors miss (the + # confirmation can show a success banner with no scrapeable number). Both are + # lowercased substring tests; empty by default. + CONFIRMATION_MARKERS: tuple[str, ...] = () # success-banner body text + CONFIRMATION_URL_MARKERS: tuple[str, ...] = () # thank-you URL fragments ORDER_ID_RE = re.compile(r"\b\d{9,12}\b") # Label-anchored order-id capture, tried *before* the bare ORDER_ID_RE so a # phone number / item number / ZIP+4 elsewhere on the confirmation page can't @@ -648,16 +655,17 @@ def buy( return self._challenge(page, item_key, "confirm") self._settle(page) - order_id, total = self._scrape_confirmation(page) - if order_id is None and total is None: - # Submitted, but nothing confirmable scraped — don't claim a - # clean `placed` we can't evidence. Flag for human review, - # carrying the review-page total so the row still logs a - # dollar amount to split. + order_id, total, confirmed = self._scrape_confirmation(page) + if not confirmed: + # Submitted, but nothing confirmable scraped — no order id, + # no total, and no success banner / thank-you URL. Don't + # claim a clean `placed` we can't evidence. Flag for human + # review, carrying the review-page total so the row still + # logs a dollar amount to split. return self._submitted_unconfirmed( page, item_key, - "no order number or total on the confirmation page", + "no order number, total, or confirmation banner on the page", order_total=review_total, ) # The confirmation page rarely carries a total (Costco's v2 view @@ -1153,23 +1161,50 @@ def _read_total(self, page: "Page") -> Optional[float]: continue return None - def _scrape_confirmation(self, page: "Page") -> tuple[Optional[str], Optional[float]]: + def _scrape_confirmation( + self, page: "Page" + ) -> tuple[Optional[str], Optional[float], bool]: # Defensive read: the confirmation body can paint a beat after the order # POST returns, so a single read can miss it. Retry a few times before # giving up — a missed scrape here is what makes a placed order look # unconfirmed (see _submitted_unconfirmed). + # + # Returns (order_id, total, confirmed). `confirmed` is True when *any* + # positive signal is seen — a success banner / thank-you URL, an order + # id, or a total — so a real order whose number/total can't be scraped + # still reads as `placed` instead of `needs_review`. body = "" + confirmed = False for attempt in range(3): try: body = page.locator("body").inner_text(timeout=5_000) except Exception: # noqa: BLE001 body = "" - if self._find_order_id(body) is not None: + confirmed = self._looks_confirmed(page, body) + if confirmed or self._find_order_id(body) is not None: break if attempt < 2: self._settle(page) order_id = self._find_order_id(body) - return order_id, self._read_total(page) + total = self._read_total(page) + return order_id, total, (confirmed or order_id is not None or total is not None) + + def _looks_confirmed(self, page: "Page", body: str) -> bool: + """True when the page shows a recognized order-confirmation signal. + + A positive banner/URL match is authoritative — the store only renders + these on a successful order — so it confirms a placed order even when the + order id and total can't be scraped (Amazon's confirmation surfaced + 'Order placed, thanks!' with no scrapeable number, the + false-needs_review case this guards against).""" + try: + url = (page.url or "").lower() + except Exception: # noqa: BLE001 + url = "" + if any(m in url for m in self.CONFIRMATION_URL_MARKERS): + return True + text = body.lower() + return any(m in text for m in self.CONFIRMATION_MARKERS) def _find_order_id(self, body: str) -> Optional[str]: """Extract the order id from the confirmation body text. @@ -1435,6 +1470,11 @@ class CostcoPurchaser(BasePurchaser[CostcoSource]): r"(?:order|confirmation)\s*(?:number|no\.?|#)?\s*[:#]?\s*(\d{7,12})", re.I, ) + # Backup confirmation signals for CheckoutConfirmationView_v2 (already + # detected via the order number, so these only matter if the number scrape + # misses). TODO(costco): verify banner wording + URL against live DOM. + CONFIRMATION_MARKERS = ("thank you for your order", "order confirmation") + CONFIRMATION_URL_MARKERS = ("checkoutconfirmation", "orderconfirmation") def _resolve_url(self, source: CostcoSource) -> str: return source.url or self.config.costco_product_url(source.item_number) @@ -1641,7 +1681,9 @@ class AmazonPurchaser(BasePurchaser[AmazonSource]): PROVIDER = "amazon" STORE_NAME = "Amazon" - # TODO(amazon): verify against live DOM — PDP price block. + # Price block — verified against the live PDP dump (2026-06-21): the modern + # corePrice container and the generic a-price span both carry the amount. The + # legacy priceblock_* ids stay as fallbacks for older PDP variants. PRICE_SELECTORS = ( "#corePriceDisplay_desktop_feature_div span.a-offscreen", "#corePrice_feature_div span.a-offscreen", @@ -1649,16 +1691,22 @@ class AmazonPurchaser(BasePurchaser[AmazonSource]): "#priceblock_dealprice", "span.a-price span.a-offscreen", ) + # The modern PDP emits no product/price tags (none in the live dump); + # kept only as a best-effort fallback for pages that still render them. PRICE_META_SELECTORS = ( "meta[property='product:price:amount']", "meta[property='og:price:amount']", "meta[itemprop='price']", ) - # TODO(amazon): verify against live DOM — Buy Now / Add to Cart / proceed. + # Add to Cart verified against the live PDP dump (2026-06-21). The Buy Now + # button is injected by the turbo-checkout widget after load, so it isn't in + # the static DOM — Amazon's own turboState declares its initiate selector as + # [id^=buy-now-button], so lead with that and keep the exact / legacy ids as + # fallbacks. BUY_NOW_SELECTORS = ( + "[id^='buy-now-button']", "#buy-now-button", "input[name='submit.buy-now']", - "#submit\\.buy-now", ) ADD_TO_CART_SELECTORS = ( "#add-to-cart-button", @@ -1722,6 +1770,20 @@ class AmazonPurchaser(BasePurchaser[AmazonSource]): ) # Amazon order numbers look like 123-4567890-1234567. ORDER_ID_RE = re.compile(r"\b\d{3}-\d{7}-\d{7}\b") + # Order-confirmation success signals — the thank-you page renders a banner + # and lands on a /thankyou URL even when the dashed order number isn't in the + # scraped body text (the false-needs_review case). TODO(amazon): verify the + # banner wording against live DOM; "Order placed, thanks!" is observed. + CONFIRMATION_MARKERS = ( + "order placed, thank", # "Order placed, thanks!" (observed live) + "thank you, your order", + "your order has been placed", + "placed your order", + ) + CONFIRMATION_URL_MARKERS = ( + "/gp/buy/thankyou", + "thankyou", + ) def _resolve_url(self, source: AmazonSource) -> str: return source.url or self.config.amazon_product_url(source.asin) diff --git a/tests/test_purchase.py b/tests/test_purchase.py index 724bea5..ef26239 100644 --- a/tests/test_purchase.py +++ b/tests/test_purchase.py @@ -696,9 +696,12 @@ class _ConfirmPage: """Models an order-confirmation page: a body blob plus an optional grand-total element. Missing selectors report count=0 like a real locator miss.""" - def __init__(self, *, body: str = "", total_text: str | None = None) -> None: + def __init__( + self, *, body: str = "", total_text: str | None = None, url: str = "" + ) -> None: self._body = body self._total_text = total_text + self.url = url def locator(self, selector: str) -> _ConfirmLocator: if selector == "body": @@ -713,14 +716,35 @@ def wait_for_load_state(self, state: str, timeout: int | None = None) -> None: def test_scrape_confirmation_reads_labelled_id_and_total(config: Config) -> None: page = _ConfirmPage(body="Thanks! Order # 123456789 confirmed", total_text="$24.99") - order_id, total = _purchaser(config)._scrape_confirmation(page) + order_id, total, confirmed = _purchaser(config)._scrape_confirmation(page) assert order_id == "123456789" assert total == 24.99 + assert confirmed is True def test_scrape_confirmation_returns_none_when_blank(config: Config) -> None: - order_id, total = _purchaser(config)._scrape_confirmation(_ConfirmPage(body="loading…")) + # No id, no total, no success banner / thank-you URL → unconfirmed. + order_id, total, confirmed = _purchaser(config)._scrape_confirmation( + _ConfirmPage(body="loading…") + ) assert order_id is None and total is None + assert confirmed is False + + +def test_scrape_confirmation_confirms_on_success_banner(config: Config) -> None: + # The exact false-needs_review case: a placed order whose number/total can't + # be scraped, but the page shows Amazon's success banner. → confirmed. + page = _ConfirmPage(body="Order placed, thanks!") + order_id, total, confirmed = _amazon(config)._scrape_confirmation(page) + assert order_id is None and total is None + assert confirmed is True + + +def test_scrape_confirmation_confirms_on_thankyou_url(config: Config) -> None: + # Blank body but the thank-you URL alone confirms the order. + page = _ConfirmPage(url="https://www.amazon.com/gp/buy/thankyou/handlers/display.html") + _, _, confirmed = _amazon(config)._scrape_confirmation(page) + assert confirmed is True def test_submitted_unconfirmed_flags_needs_review(config: Config) -> None: