diff --git a/scripts/collision_allowlist.json b/scripts/collision_allowlist.json new file mode 100644 index 00000000..fbd93108 --- /dev/null +++ b/scripts/collision_allowlist.json @@ -0,0 +1,215 @@ +{ + "_comment": "Snapshot of bare type names defined in more than one non-bundled generated_poc module that are knowingly resolved by first-seen / stem-preference order in scripts/consolidate_exports.py. Each name is a class an adopter cannot import unambiguously from adcp.types. Regenerate with: python scripts/consolidate_exports.py --update-allowlist. A growing list is a signal to add disambiguated aliases in src/adcp/types/aliases.py (issue #911).", + "allowlist": [ + "Accessibility", + "Account", + "Action", + "Address", + "AgeRestriction", + "AiActRiskClass", + "Art9Basis", + "Artifact", + "AssetSource", + "Assets", + "AttributionWindow", + "AudioCodec", + "AudioSampleRate", + "Authentication", + "BrandContext", + "Budget", + "BuildCreativeInputRequired", + "BuildCreativeSubmitted", + "BuildCreativeWorking", + "BuyerAssetAcceptance", + "BuyingMode", + "ByPackageItem", + "CacheScope", + "CalibrationExemplars", + "Cancellation", + "Catalog", + "Category", + "ChangeSummary", + "Channel", + "ClaimType", + "Code", + "Collection", + "ColorSpace", + "Colors", + "Condition", + "Contact", + "Container", + "ContentStandards", + "ConversionTracking", + "Country", + "CoverageRate", + "CreateMediaBuyInputRequired", + "CreateMediaBuySubmitted", + "CreateMediaBuyWorking", + "Creative", + "CreativeAgent", + "CreditLimit", + "DaastVersion", + "DataSource", + "DataSubjectRights", + "DelegationType", + "DeliveryMetrics", + "Destination", + "Details", + "DevicePlatform", + "DeviceType", + "Dimensions", + "Direction", + "Disclosure", + "DiscoveryMode", + "Domain", + "DomainBreakdown", + "DurationMsRange", + "DurationMsRangeItem", + "EntityType", + "Event", + "EventSource", + "EventType", + "ExcludedCountry", + "Exclusivity", + "Exemplars", + "Fail", + "Feature", + "Field1", + "Filters", + "Finding", + "Format", + "Geo", + "GeoProximityItem", + "Geometry", + "GetProductsInputRequired", + "GetProductsSubmitted", + "GetProductsWorking", + "GetSignalsSubmitted", + "GetSignalsWorking", + "GovernanceAgent", + "HistoryItem", + "House", + "HumanOversight", + "Identifier", + "Identity", + "ImageFormat", + "IncompleteItem", + "Initiator", + "Input", + "Input2", + "Jurisdiction", + "Keyword", + "Kind", + "Location", + "Logo", + "MatchKey", + "MediaBuy", + "MediaBuyDelivery", + "Metadata", + "Method", + "Methodology", + "Metric", + "Metrics", + "Metro", + "Mode", + "Model", + "Modeling", + "MraidVersion", + "Multiplicity", + "NotificationType", + "Offering", + "Onboarder", + "Orientation", + "Pacing", + "Pagination", + "Parameters", + "ParentMatchBehavior", + "Pass", + "Payload", + "PaymentTerms", + "Period", + "Placement", + "Plan", + "Portfolio", + "PostalArea", + "PreOnboardingPrecisionLevel", + "Preview", + "Preview2", + "Preview3", + "Price", + "ProductCard", + "Progress", + "Property", + "PropertyFeature", + "PropertyType", + "Protocol", + "Provider", + "PublisherDomain", + "PurgeKind", + "Qualifier", + "QuerySummary", + "Radius", + "Range", + "ReachWindow", + "Reason", + "Recovery", + "RefreshCadence", + "Region", + "ReportingFrequency", + "ReportingPeriod", + "Response", + "Result", + "Right", + "Rights", + "Role", + "Scope", + "SeedSource", + "SelectionMode", + "Setup", + "Signal", + "SignalId", + "SignalTag", + "Signals", + "Size", + "Snapshot", + "Sort", + "SortApplied", + "Source", + "Status", + "StatusFilter", + "Summary", + "SupportedTarget", + "SupportedVersion", + "SyncCatalogsInputRequired", + "SyncCatalogsSubmitted", + "SyncCatalogsWorking", + "SyncCreativesInputRequired", + "SyncCreativesSubmitted", + "SyncCreativesWorking", + "Tag", + "Target", + "TargetingMode", + "Task", + "Taxonomy", + "TotalBudget", + "Totals", + "TrainingDataJurisdiction", + "Transition", + "TravelTime", + "TrustedMatch", + "Type", + "Uid", + "Unit", + "UpdateMediaBuyInputRequired", + "UpdateMediaBuySubmitted", + "UpdateMediaBuyWorking", + "Value", + "ValueMapping", + "Variant", + "VariantDimension", + "VastVersion", + "VendorMetricOptimization", + "Viewability", + "Violation" + ] +} diff --git a/scripts/consolidate_exports.py b/scripts/consolidate_exports.py index bfdeb0f2..6a64d6fa 100644 --- a/scripts/consolidate_exports.py +++ b/scripts/consolidate_exports.py @@ -4,11 +4,20 @@ This script analyzes all modules in generated_poc/ and creates a single generated.py that imports and re-exports all public types, handling naming conflicts appropriately. + +A build guard fails the consolidate step for any name collision that is neither +handled via qualified imports (KNOWN_COLLISIONS) nor recorded in the checked-in +allowlist (collision_allowlist.json). See issue #911. + +Usage: + python scripts/consolidate_exports.py # consolidate + guard + python scripts/consolidate_exports.py --update-allowlist # refresh the snapshot """ from __future__ import annotations import ast +import json import subprocess import sys from datetime import datetime, timezone @@ -17,6 +26,83 @@ GENERATED_POC_DIR = Path(__file__).parent.parent / "src" / "adcp" / "types" / "generated_poc" OUTPUT_FILE = Path(__file__).parent.parent / "src" / "adcp" / "types" / "_generated.py" +# Checked-in snapshot of every bare type name that is defined in more than one +# non-bundled generated module today and is knowingly resolved by first-seen / +# stem-preference order (see the build guard in generate_consolidated_exports). +# Regenerate with: python scripts/consolidate_exports.py --update-allowlist +COLLISION_ALLOWLIST_FILE = Path(__file__).parent / "collision_allowlist.json" + +# Names handled explicitly via qualified imports (see KNOWN_COLLISIONS below). +# These are NOT in the allowlist file — the qualified-import machinery already +# exports every variant, so neither version silently wins. +# +# We need BOTH versions of these types available, so import them with qualified +# names. +KNOWN_COLLISIONS: dict[str, set[str]] = { + "Package": {"package", "create_media_buy_response", "get_media_buys_response"}, + # DeliveryStatus appears in get_media_buy_delivery_response (5 values) and + # get_media_buys_response (6 values, adds not_delivering). Export both with + # qualified names so aliases.py can re-export the superset as the canonical one. + "DeliveryStatus": {"get_media_buy_delivery_response", "get_media_buys_response"}, + # Note: "Catalog" also collides between core.catalog and media_buy.sync_catalogs_response. + # We intentionally let core.catalog win (first-seen, since core/ sorts before media_buy/). + # The response-level Catalog is imported directly in aliases.py as SyncCatalogResult. + # Audience collides between get_media_buy_delivery_request (breakdown config) and + # sync_audiences_request (audience payload). aliases.py imports the request one directly. + "Audience": { + "get_media_buy_delivery_request", + "sync_audiences_request", + "sync_audiences_response", + }, + # Error collides between core.error (Pydantic model used everywhere) and + # compliance.comply_test_controller_response (test-only enum). Export both + # with qualified names; aliases/init re-export core Error as the canonical one. + "Error": {"error", "comply_test_controller_response"}, + # FormatId: AdCP 3.0.1 renamed core/format-id.json title from "Format ID" + # to "Format Reference (Structured Object)". The canonical class in + # core/format_id.py is now FormatReferenceStructuredObject, but every + # bundled-message file inlines a per-message duplicate still named + # FormatId. Without this entry, the bundled stale duplicate would win + # the bare-name slot in _generated.py and shadow the canonical class. + # aliases.py re-exports the canonical FormatReferenceStructuredObject as + # the public FormatId. + "FormatId": { + "build_creative_request", + "build_creative_response", + "calibrate_content_request", + "create_content_standards_request", + "create_media_buy_request", + "create_media_buy_response", + "get_content_standards_response", + "get_creative_delivery_response", + "get_creative_features_request", + "get_media_buy_artifacts_response", + "get_products_request", + "get_products_response", + "list_content_standards_response", + "list_creative_formats_request", + "list_creative_formats_response", + "list_creatives_request", + "list_creatives_response", + "package_request", + "preview_creative_request", + "preview_creative_response", + "sync_creatives_request", + "update_content_standards_request", + "update_media_buy_request", + "update_media_buy_response", + "validate_content_delivery_request", + }, +} + + +def load_collision_allowlist() -> set[str]: + """Load the checked-in snapshot of knowingly-resolved collision names.""" + if not COLLISION_ALLOWLIST_FILE.exists(): + return set() + data = json.loads(COLLISION_ALLOWLIST_FILE.read_text()) + return set(data["allowlist"]) + def extract_exports_from_module(module_path: Path) -> set[str]: """Extract all public class and type alias names from a Python module.""" @@ -52,6 +138,107 @@ def _add_public_type_name(name: str) -> None: return exports +def _collisions_from_accounting( + name_to_modules: dict[str, set[str]], known_names: set[str] +) -> set[str]: + """Names defined in >1 module, excluding the qualified-import known set.""" + return {n for n, mods in name_to_modules.items() if len(mods) > 1} - known_names + + +def _enforce_collision_allowlist( + name_to_modules: dict[str, set[str]], known_names: set[str] +) -> None: + """Fail the build for any collision not in the checked-in allowlist. + + A collision is a bare type name defined in more than one non-bundled + generated module. ``KNOWN_COLLISIONS`` handles its set via qualified + imports; every other collision is silently resolved by first-seen / + stem-preference order, which means one class shadows the others for + adopters importing from ``adcp.types``. The allowlist is a snapshot of the + collisions we knowingly tolerate today. Anything not in it is a NEW or + CHANGED collision and must be triaged before it ships. + """ + collisions = _collisions_from_accounting(name_to_modules, known_names) + allowlist = load_collision_allowlist() + + unexpected = sorted(collisions - allowlist) + if not unexpected: + return + + lines = [] + for name in unexpected: + mods = sorted(name_to_modules[name]) + lines.append(f" {name}: defined in {mods}") + details = "\n".join(lines) + raise ValueError( + f"{len(unexpected)} new name collision(s) in generated_poc are not in " + f"the checked-in allowlist:\n{details}\n\n" + "A collision means the same bare type name is defined in more than one " + "generated module, so 'from adcp.types import ' silently resolves " + "to whichever module wins the sort order — adopters get an unpredictable " + "class shape.\n\n" + "To fix, pick ONE:\n" + " 1. Add a disambiguated alias in src/adcp/types/aliases.py so adopters " + "can import each variant by an unambiguous name (preferred).\n" + f" 2. If this name genuinely belongs in {COLLISION_ALLOWLIST_FILE.name} " + "(first-seen resolution is acceptable), regenerate the allowlist with a " + "justification:\n" + " python scripts/consolidate_exports.py --update-allowlist\n" + " and review the diff — every added name is a class an adopter can no " + "longer import unambiguously from adcp.types.\n" + " 3. If the name needs BOTH variants exported under qualified names, add " + "it to KNOWN_COLLISIONS in scripts/consolidate_exports.py.\n" + ) + + +def update_collision_allowlist() -> int: + """Regenerate the checked-in collision allowlist from the current tree.""" + name_to_modules = _scan_name_to_modules() + collisions = _collisions_from_accounting(name_to_modules, set(KNOWN_COLLISIONS)) + payload = { + "_comment": ( + "Snapshot of bare type names defined in more than one non-bundled " + "generated_poc module that are knowingly resolved by first-seen / " + "stem-preference order in scripts/consolidate_exports.py. Each name " + "is a class an adopter cannot import unambiguously from adcp.types. " + "Regenerate with: python scripts/consolidate_exports.py " + "--update-allowlist. A growing list is a signal to add disambiguated " + "aliases in src/adcp/types/aliases.py (issue #911)." + ), + "allowlist": sorted(collisions), + } + COLLISION_ALLOWLIST_FILE.write_text(json.dumps(payload, indent=2) + "\n") + print(f"Wrote {len(collisions)} collision names to {COLLISION_ALLOWLIST_FILE}") + return 0 + + +def _scan_name_to_modules() -> dict[str, set[str]]: + """Map every public name to the set of non-bundled modules that define it.""" + + def _module_sort_key(p: Path) -> tuple[int, int, str]: + rel = p.relative_to(GENERATED_POC_DIR) + is_enum = rel.parts[0] == "enums" if len(rel.parts) > 1 else False + is_bundled = rel.parts[0] == "bundled" if len(rel.parts) > 1 else False + return (0 if is_enum else 1, 1 if is_bundled else 0, str(p)) + + modules = sorted(GENERATED_POC_DIR.rglob("*.py"), key=_module_sort_key) + modules = [ + m + for m in modules + if m.stem != "__init__" + and not m.stem.startswith(".") + and m.relative_to(GENERATED_POC_DIR).parts[0] != "bundled" + ] + + name_to_modules: dict[str, set[str]] = {} + for module_path in modules: + rel_path = module_path.relative_to(GENERATED_POC_DIR) + module_name = ".".join(list(rel_path.parts[:-1]) + [rel_path.stem]) + for export_name in extract_exports_from_module(module_path): + name_to_modules.setdefault(export_name, set()).add(module_name) + return name_to_modules + + def generate_consolidated_exports() -> str: """Generate the consolidated exports file content.""" @@ -90,62 +277,13 @@ def _module_sort_key(p: Path) -> tuple[int, int, str]: # Special handling for known collisions # We need BOTH versions of these types available, so import them with qualified names - known_collisions = { - "Package": {"package", "create_media_buy_response", "get_media_buys_response"}, - # DeliveryStatus appears in get_media_buy_delivery_response (5 values) and - # get_media_buys_response (6 values, adds not_delivering). Export both with - # qualified names so aliases.py can re-export the superset as the canonical one. - "DeliveryStatus": {"get_media_buy_delivery_response", "get_media_buys_response"}, - # Note: "Catalog" also collides between core.catalog and media_buy.sync_catalogs_response. - # We intentionally let core.catalog win (first-seen, since core/ sorts before media_buy/). - # The response-level Catalog is imported directly in aliases.py as SyncCatalogResult. - # Audience collides between get_media_buy_delivery_request (breakdown config) and - # sync_audiences_request (audience payload). aliases.py imports the request one directly. - "Audience": { - "get_media_buy_delivery_request", - "sync_audiences_request", - "sync_audiences_response", - }, - # Error collides between core.error (Pydantic model used everywhere) and - # compliance.comply_test_controller_response (test-only enum). Export both - # with qualified names; aliases/init re-export core Error as the canonical one. - "Error": {"error", "comply_test_controller_response"}, - # FormatId: AdCP 3.0.1 renamed core/format-id.json title from "Format ID" - # to "Format Reference (Structured Object)". The canonical class in - # core/format_id.py is now FormatReferenceStructuredObject, but every - # bundled-message file inlines a per-message duplicate still named - # FormatId. Without this entry, the bundled stale duplicate would win - # the bare-name slot in _generated.py and shadow the canonical class. - # aliases.py re-exports the canonical FormatReferenceStructuredObject as - # the public FormatId. - "FormatId": { - "build_creative_request", - "build_creative_response", - "calibrate_content_request", - "create_content_standards_request", - "create_media_buy_request", - "create_media_buy_response", - "get_content_standards_response", - "get_creative_delivery_response", - "get_creative_features_request", - "get_media_buy_artifacts_response", - "get_products_request", - "get_products_response", - "list_content_standards_response", - "list_creative_formats_request", - "list_creative_formats_response", - "list_creatives_request", - "list_creatives_response", - "package_request", - "preview_creative_request", - "preview_creative_response", - "sync_creatives_request", - "update_content_standards_request", - "update_media_buy_request", - "update_media_buy_response", - "validate_content_delivery_request", - }, - } + known_collisions = KNOWN_COLLISIONS + + # Record every module that defines each name so the build guard can detect + # name collisions independently of which module wins the bare-name slot. + # A name in >1 module is a collision regardless of whether it resolves via + # first-seen or stem-preference order. + name_to_modules: dict[str, set[str]] = {} special_imports = [] collision_modules_seen: dict[str, set[str]] = {name: set() for name in known_collisions} @@ -171,6 +309,8 @@ def _stem_matches_export(module_stem: str, export_name: str) -> bool: module_exports[module_name] = exports for export_name in exports: + name_to_modules.setdefault(export_name, set()).add(module_name) + if export_name in known_collisions and display_name in known_collisions[export_name]: collision_modules_seen[export_name].add(module_name) # Sentinel: known collisions are only imported via qualified @@ -197,6 +337,14 @@ def _stem_matches_export(module_stem: str, export_name: str) -> bool: else: export_to_module[export_name] = module_name + # Build guard: every name defined in more than one non-bundled module is a + # collision. KNOWN_COLLISIONS handles its set via qualified imports; the + # rest are knowingly resolved by sort order and must be snapshotted in the + # checked-in allowlist. A NEW or CHANGED collision that is in neither set + # would silently shadow a class for adopters importing from adcp.types — so + # we fail the build instead of logging a warning (issue #911, Step 1). + _enforce_collision_allowlist(name_to_modules, set(known_collisions)) + # Second pass: emit one import line per module with only the exports it owns. for module_name, exports in module_exports.items(): owned = {e for e in exports if export_to_module.get(e) == module_name} @@ -506,6 +654,12 @@ def _stem_matches_export(module_stem: str, export_name: str) -> bool: def main(): """Generate consolidated exports file.""" + if "--update-allowlist" in sys.argv: + if not GENERATED_POC_DIR.exists(): + print(f"Error: {GENERATED_POC_DIR} does not exist") + return 1 + return update_collision_allowlist() + print("Generating consolidated exports from generated_poc modules...") if not GENERATED_POC_DIR.exists(): diff --git a/tests/test_collision_guard.py b/tests/test_collision_guard.py new file mode 100644 index 00000000..3bb4ab1d --- /dev/null +++ b/tests/test_collision_guard.py @@ -0,0 +1,94 @@ +"""Tests for the consolidate-step name-collision build guard (issue #911, Step 1). + +`consolidate_exports.py` flattens every `generated_poc/` module into a single +namespace. When the same bare type name is defined in more than one module, one +class silently shadows the others for adopters importing from `adcp.types`. + +The build guard fails the consolidate step for any collision that is neither +handled via qualified imports (`KNOWN_COLLISIONS`) nor recorded in the +checked-in allowlist snapshot. These tests assert the guard passes on the +current tree and raises for a synthetic new collision. +""" + +from __future__ import annotations + +import pytest + +from scripts.consolidate_exports import ( + KNOWN_COLLISIONS, + _enforce_collision_allowlist, + _scan_name_to_modules, + load_collision_allowlist, +) + + +def test_allowlist_snapshot_is_present_and_nonempty(): + """The checked-in allowlist seeds the guard with today's collision set.""" + allowlist = load_collision_allowlist() + assert allowlist, "collision_allowlist.json is missing or empty" + # Sanity: a few names called out in issue #911 must be snapshotted. + for name in ("Creative", "Account", "Authentication", "Sort", "Unit"): + assert name in allowlist, f"{name} should be in the seeded allowlist" + + +def test_known_collisions_are_not_in_allowlist(): + """Qualified-import collisions are handled separately, not via the allowlist.""" + allowlist = load_collision_allowlist() + overlap = set(KNOWN_COLLISIONS) & allowlist + assert ( + overlap == set() + ), f"KNOWN_COLLISIONS names must not also be in the allowlist: {sorted(overlap)}" + + +def test_current_tree_consolidates_cleanly(): + """Guard passes on the current generated tree against the seeded allowlist.""" + name_to_modules = _scan_name_to_modules() + # Must not raise. + _enforce_collision_allowlist(name_to_modules, set(KNOWN_COLLISIONS)) + + +def test_allowlist_matches_current_collisions_exactly(): + """The snapshot is neither stale nor padded with non-colliding names. + + Every allowlisted name must still collide in the tree; every collision not + handled via qualified imports must be in the allowlist. + """ + name_to_modules = _scan_name_to_modules() + collisions = {name for name, mods in name_to_modules.items() if len(mods) > 1} - set( + KNOWN_COLLISIONS + ) + allowlist = load_collision_allowlist() + assert allowlist == collisions, ( + "Allowlist drifted from the real collision set. Regenerate with " + "`python scripts/consolidate_exports.py --update-allowlist`.\n" + f" In allowlist but no longer colliding: {sorted(allowlist - collisions)}\n" + f" Colliding but missing from allowlist: {sorted(collisions - allowlist)}" + ) + + +def test_new_collision_not_in_allowlist_raises(): + """A new bare name in two modules, absent from the allowlist, fails the build.""" + name_to_modules = _scan_name_to_modules() + # Synthetic collision: a name defined in two modules that is not in the + # allowlist and not a known collision. + synthetic = "WidgetCollisionGuardSentinel" + assert synthetic not in load_collision_allowlist() + name_to_modules[synthetic] = {"core.widget_a", "core.widget_b"} + + with pytest.raises(ValueError) as excinfo: + _enforce_collision_allowlist(name_to_modules, set(KNOWN_COLLISIONS)) + + message = str(excinfo.value) + assert synthetic in message + assert "not in the checked-in allowlist" in message + # The remediation guidance must tell a contributor what to do. + assert "aliases.py" in message + assert "--update-allowlist" in message + assert "KNOWN_COLLISIONS" in message + + +def test_single_definition_name_does_not_trip_guard(): + """A name defined in exactly one module is not a collision.""" + name_to_modules = {"SoloUniqueGuardSentinel": {"core.solo"}} + # Must not raise. + _enforce_collision_allowlist(name_to_modules, set(KNOWN_COLLISIONS))