diff --git a/compose.near-prod.yaml b/compose.near-prod.yaml index a8e2f53087a..63077f51c75 100644 --- a/compose.near-prod.yaml +++ b/compose.near-prod.yaml @@ -11,6 +11,8 @@ ## You probably want to run: ## COMPOSE_FILE="compose.yaml:compose.override.yaml:compose.near-prod.yaml" docker compose up -d ## +## COMPOSE_FILE="compose.yaml:compose.override.yaml:compose.near-prod.yaml" docker compose up -d affiliate-server squid +## services: solr: @@ -45,5 +47,32 @@ services: # Override with own copy of solr data - solr-replica-data:/var/solr + affiliate-server: + image: "${OLIMAGE:-openlibrary/olbase:latest}" + environment: + - AFFILIATE_CONFIG=./conf/openlibrary.yml + command: docker/ol-affiliate-server-start.sh + ports: + - 31337:31337 + networks: + - webnet + volumes: + - ${OL_MOUNT_DIR:-.}:/openlibrary + logging: + options: + max-size: "512m" + max-file: "4" + + squid: + image: ubuntu/squid:5.2-22.04_beta + ports: + - 3128:3128 + networks: + - webnet + logging: + options: + max-size: "512m" + max-file: "4" + volumes: solr-replica-data: diff --git a/conf/openlibrary.yml b/conf/openlibrary.yml index e75fd373088..68d6c5db1ee 100644 --- a/conf/openlibrary.yml +++ b/conf/openlibrary.yml @@ -190,3 +190,18 @@ sentry_cron_jobs: # Observations cache settings: observation_cache_duration: 86400 + +# Proxy configuration. +# http_proxy sets the global default (no auth) via HTTP_PROXY/HTTPS_PROXY env vars. +# http_proxies overrides per service with credentials; each entry has url/user/password. +# Dev/local: leave both unset — no proxy needed. +# http_proxy: http://squid.example.com:3128 +# http_proxies: +# recaptcha: +# url: http://squid.example.com:3128 +# user: '' +# password: '' +# amazon: +# url: http://squid.example.com:3128 +# user: '' +# password: '' diff --git a/openlibrary/core/vendors.py b/openlibrary/core/vendors.py index a038af74625..5c86c323b9f 100644 --- a/openlibrary/core/vendors.py +++ b/openlibrary/core/vendors.py @@ -401,34 +401,13 @@ def __init__( tag=tag, country=getattr(Country, country), throttling=0, + proxy=( + _build_authenticated_proxy_url(proxy_url, proxy_creds) + if proxy_url and proxy_creds + else (proxy_url or None) + ), ) - # Inject proxy into underlying SDK rest client, mirroring the PA-API approach. - # Required for ol-home0 which has no direct internet access. See #10310. - if proxy_url: - try: - from creatorsapi_python_sdk.configuration import ( - Configuration as CreatorsConfig, - ) - from creatorsapi_python_sdk.rest import ( - RESTClientObject as CreatorsRESTClient, - ) - from urllib3 import make_headers - - configuration = CreatorsConfig() - configuration.proxy = proxy_url - configuration.proxy_headers = make_headers(proxy_basic_auth=proxy_creds) - rest_client = CreatorsRESTClient(configuration=configuration) - # _api_client is the ApiClient instance stored directly on - # AmazonCreatorsApi; replace its rest_client to route all - # outbound HTTP through the proxy. - self.api._api_client.rest_client = rest_client - except (ImportError, AttributeError): - logger.warning( - "AmazonCreatorsAPI: could not inject proxy — falling back to environment-level proxy (HTTPS_PROXY)", - exc_info=True, - ) - def get_product(self, asin: str, serialize: bool = False, **kwargs): if products := self.get_products([asin], **kwargs): return next(self.serialize(p) if serialize else p for p in products) @@ -907,3 +886,22 @@ def betterworldbooks_fmt( "price_amt": price, "qlt": qlt, } + + +def _build_authenticated_proxy_url(proxy_url: str, proxy_creds: str) -> str: + """ + Parses proxy URL and credentials, returning a proxy URL with embedded auth. + + :param str proxy_url: HTTP proxy URL (e.g., 'http://proxy.example.com:3128') + :param str proxy_creds: Proxy credentials in 'user:password' format + :return: Proxy URL including credentials + """ + from urllib.parse import quote, urlparse, urlunparse + + user, _, password = proxy_creds.partition(":") + parsed = urlparse(proxy_url) + netloc = f"{quote(user, safe='')}:{quote(password, safe='')}@{parsed.hostname}" + if parsed.port: + netloc += f":{parsed.port}" + auth_proxy_url = urlunparse(parsed._replace(netloc=netloc)) + return auth_proxy_url diff --git a/openlibrary/plugins/recaptcha/recaptcha.py b/openlibrary/plugins/recaptcha/recaptcha.py index 08d09a04a6f..803556d9915 100644 --- a/openlibrary/plugins/recaptcha/recaptcha.py +++ b/openlibrary/plugins/recaptcha/recaptcha.py @@ -6,6 +6,7 @@ import web from infogami import config +from openlibrary.plugins.upstream.utils import get_proxy_params logger = logging.getLogger("openlibrary") @@ -44,7 +45,7 @@ def accept_error(error_codes: list[str]) -> bool: } try: - r = requests.get(url, params=params, timeout=3) + r = requests.get(url, params=params, timeout=3, proxies=get_proxy_params("recaptcha")) except requests.exceptions.RequestException: logger.exception("Recaptcha call failed: letting user through") return True diff --git a/openlibrary/plugins/upstream/tests/test_utils.py b/openlibrary/plugins/upstream/tests/test_utils.py index b9acbfee836..7a36a159346 100644 --- a/openlibrary/plugins/upstream/tests/test_utils.py +++ b/openlibrary/plugins/upstream/tests/test_utils.py @@ -383,3 +383,51 @@ def test_get_language_name(add_languages): # noqa: F811 assert utils.get_language_name("/languages/ger", "en") == "German" # Falls back to name when translation missing for requested language assert utils.get_language_name("/languages/ger", "fr") == "Deutsch" + + +class TestGetProxyParams: + def test_no_http_proxies_config(self): + with patch("openlibrary.plugins.upstream.utils.config") as mock_config: + mock_config.get.return_value = {} + assert utils.get_proxy_params("recaptcha") is None + + def test_unknown_service_tag(self): + with patch("openlibrary.plugins.upstream.utils.config") as mock_config: + mock_config.get.return_value = {"amazon": {"url": "http://proxy:3128"}} + assert utils.get_proxy_params("recaptcha") is None + + def test_url_only_no_auth(self): + with patch("openlibrary.plugins.upstream.utils.config") as mock_config: + mock_config.get.return_value = {"recaptcha": {"url": "http://proxy:3128"}} + result = utils.get_proxy_params("recaptcha") + assert result == {"http": "http://proxy:3128", "https": "http://proxy:3128"} + + def test_url_with_auth(self): + with patch("openlibrary.plugins.upstream.utils.config") as mock_config: + mock_config.get.return_value = { + "recaptcha": { + "url": "http://proxy:3128", + "user": "myuser", + "password": "mypass", + } + } + result = utils.get_proxy_params("recaptcha") + assert result == { + "http": "http://myuser:mypass@proxy:3128", + "https": "http://myuser:mypass@proxy:3128", + } + + def test_special_chars_in_credentials_are_encoded(self): + with patch("openlibrary.plugins.upstream.utils.config") as mock_config: + mock_config.get.return_value = { + "recaptcha": { + "url": "http://proxy:3128", + "user": "u@ser", + "password": "p@ss:word", + } + } + result = utils.get_proxy_params("recaptcha") + assert result == { + "http": "http://u%40ser:p%40ss%3Aword@proxy:3128", + "https": "http://u%40ser:p%40ss%3Aword@proxy:3128", + } diff --git a/openlibrary/plugins/upstream/utils.py b/openlibrary/plugins/upstream/utils.py index 82eb0a96f91..0531540c45b 100644 --- a/openlibrary/plugins/upstream/utils.py +++ b/openlibrary/plugins/upstream/utils.py @@ -1623,6 +1623,37 @@ def setup_requests(config=config) -> None: logger.info("Requests set up") +def get_proxy_params(service_tag: str) -> dict[str, str] | None: + """Return a requests-compatible proxies dict for a service requiring proxy auth. + + Reads from the ``http_proxies`` config section. Each entry may have: + url: proxy base URL + user: proxy username + password: proxy password + + Returns None when no service-specific config exists so that callers can + pass the result directly as ``proxies=`` to requests — None means requests + will fall back to the global HTTP_PROXY/HTTPS_PROXY env vars set by + setup_requests(). + """ + service = config.get("http_proxies", {}).get(service_tag) + if not service: + return None + + proxy_url = service.get("url", "") + user = service.get("user", "") + password = service.get("password", "") + + if user and proxy_url: + parsed = urlparse(proxy_url) + netloc = f"{quote(user, safe='')}:{quote(password, safe='')}@{parsed.hostname}" + if parsed.port: + netloc += f":{parsed.port}" + proxy_url = urlunparse(parsed._replace(netloc=netloc)) + + return {"http": proxy_url, "https": proxy_url} if proxy_url else None + + def setup() -> None: """Do required initialization""" # monkey-patch get_markdown to use OL Flavored Markdown diff --git a/requirements.txt b/requirements.txt index 7e6f63dde76..1749c3e8729 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ prometheus-fastapi-instrumentator==7.1.0 psycopg2==2.9.12 pydantic==2.13.4 pymarc==5.3.1 -python-amazon-paapi==6.2.0 +git+https://github.com/mekarpeles/python-amazon-paapi.git@proxy-support python-dateutil==2.9.0.post0 python-memcached==1.62 python-multipart==0.0.28 diff --git a/scripts/affiliate_server.py b/scripts/affiliate_server.py index 00c648bcd03..00ac9a2f2e7 100644 --- a/scripts/affiliate_server.py +++ b/scripts/affiliate_server.py @@ -638,8 +638,15 @@ def GET(self, identifier: str) -> str: def load_config(configfile): # This loads openlibrary.yml + infobase.yml openlibrary_load_config(configfile) - http_proxy_url = config.get("http_proxy") - http_proxy_creds = config.get("http_proxy_creds") + + # Prefer per-service proxy config under http_proxies.amazon; fall back to the + # legacy flat keys http_proxy / http_proxy_creds for backward compatibility. + amazon_proxy_cfg = config.get("http_proxies", {}).get("amazon", {}) + http_proxy_url = amazon_proxy_cfg.get("url") or config.get("http_proxy") + if amazon_proxy_cfg.get("user"): + http_proxy_creds = f"{amazon_proxy_cfg['user']}:{amazon_proxy_cfg.get('password', '')}" + else: + http_proxy_creds = config.get("http_proxy_creds", "") stats.client = stats.create_stats_client(cfg=config)