Skip to content
Draft

Pr 12724 #12898

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions compose.near-prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
## You probably want to run:
## COMPOSE_FILE="compose.yaml:compose.override.yaml:compose.near-prod.yaml" docker compose up -d
##
## COMPOSE_FILE="compose.yaml:compose.override.yaml:compose.near-prod.yaml" docker compose up -d affiliate-server squid
##

services:
solr:
Expand Down Expand Up @@ -45,5 +47,32 @@ services:
# Override with own copy of solr data
- solr-replica-data:/var/solr

affiliate-server:
image: "${OLIMAGE:-openlibrary/olbase:latest}"
environment:
- AFFILIATE_CONFIG=./conf/openlibrary.yml
command: docker/ol-affiliate-server-start.sh
ports:
- 31337:31337
networks:
- webnet
volumes:
- ${OL_MOUNT_DIR:-.}:/openlibrary
logging:
options:
max-size: "512m"
max-file: "4"

squid:
image: ubuntu/squid:5.2-22.04_beta
ports:
- 3128:3128
networks:
- webnet
logging:
options:
max-size: "512m"
max-file: "4"

volumes:
solr-replica-data:
15 changes: 15 additions & 0 deletions conf/openlibrary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,18 @@ sentry_cron_jobs:

# Observations cache settings:
observation_cache_duration: 86400

# Proxy configuration.
# http_proxy sets the global default (no auth) via HTTP_PROXY/HTTPS_PROXY env vars.
# http_proxies overrides per service with credentials; each entry has url/user/password.
# Dev/local: leave both unset — no proxy needed.
# http_proxy: http://squid.example.com:3128
# http_proxies:
# recaptcha:
# url: http://squid.example.com:3128
# user: ''
# password: ''
# amazon:
# url: http://squid.example.com:3128
# user: ''
# password: ''
50 changes: 24 additions & 26 deletions openlibrary/core/vendors.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,34 +401,13 @@ def __init__(
tag=tag,
country=getattr(Country, country),
throttling=0,
proxy=(
_build_authenticated_proxy_url(proxy_url, proxy_creds)
if proxy_url and proxy_creds
else (proxy_url or None)
),
)

# Inject proxy into underlying SDK rest client, mirroring the PA-API approach.
# Required for ol-home0 which has no direct internet access. See #10310.
if proxy_url:
try:
from creatorsapi_python_sdk.configuration import (
Configuration as CreatorsConfig,
)
from creatorsapi_python_sdk.rest import (
RESTClientObject as CreatorsRESTClient,
)
from urllib3 import make_headers

configuration = CreatorsConfig()
configuration.proxy = proxy_url
configuration.proxy_headers = make_headers(proxy_basic_auth=proxy_creds)
rest_client = CreatorsRESTClient(configuration=configuration)
# _api_client is the ApiClient instance stored directly on
# AmazonCreatorsApi; replace its rest_client to route all
# outbound HTTP through the proxy.
self.api._api_client.rest_client = rest_client
except (ImportError, AttributeError):
logger.warning(
"AmazonCreatorsAPI: could not inject proxy — falling back to environment-level proxy (HTTPS_PROXY)",
exc_info=True,
)

def get_product(self, asin: str, serialize: bool = False, **kwargs):
if products := self.get_products([asin], **kwargs):
return next(self.serialize(p) if serialize else p for p in products)
Expand Down Expand Up @@ -907,3 +886,22 @@ def betterworldbooks_fmt(
"price_amt": price,
"qlt": qlt,
}


def _build_authenticated_proxy_url(proxy_url: str, proxy_creds: str) -> str:
"""
Parses proxy URL and credentials, returning a proxy URL with embedded auth.

:param str proxy_url: HTTP proxy URL (e.g., 'http://proxy.example.com:3128')
:param str proxy_creds: Proxy credentials in 'user:password' format
:return: Proxy URL including credentials
"""
from urllib.parse import quote, urlparse, urlunparse

user, _, password = proxy_creds.partition(":")
parsed = urlparse(proxy_url)
netloc = f"{quote(user, safe='')}:{quote(password, safe='')}@{parsed.hostname}"
if parsed.port:
netloc += f":{parsed.port}"
auth_proxy_url = urlunparse(parsed._replace(netloc=netloc))
return auth_proxy_url
3 changes: 2 additions & 1 deletion openlibrary/plugins/recaptcha/recaptcha.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import web

from infogami import config
from openlibrary.plugins.upstream.utils import get_proxy_params

logger = logging.getLogger("openlibrary")

Expand Down Expand Up @@ -44,7 +45,7 @@ def accept_error(error_codes: list[str]) -> bool:
}

try:
r = requests.get(url, params=params, timeout=3)
r = requests.get(url, params=params, timeout=3, proxies=get_proxy_params("recaptcha"))
except requests.exceptions.RequestException:
logger.exception("Recaptcha call failed: letting user through")
return True
Expand Down
48 changes: 48 additions & 0 deletions openlibrary/plugins/upstream/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,3 +383,51 @@ def test_get_language_name(add_languages): # noqa: F811
assert utils.get_language_name("/languages/ger", "en") == "German"
# Falls back to name when translation missing for requested language
assert utils.get_language_name("/languages/ger", "fr") == "Deutsch"


class TestGetProxyParams:
def test_no_http_proxies_config(self):
with patch("openlibrary.plugins.upstream.utils.config") as mock_config:
mock_config.get.return_value = {}
assert utils.get_proxy_params("recaptcha") is None

def test_unknown_service_tag(self):
with patch("openlibrary.plugins.upstream.utils.config") as mock_config:
mock_config.get.return_value = {"amazon": {"url": "http://proxy:3128"}}
assert utils.get_proxy_params("recaptcha") is None

def test_url_only_no_auth(self):
with patch("openlibrary.plugins.upstream.utils.config") as mock_config:
mock_config.get.return_value = {"recaptcha": {"url": "http://proxy:3128"}}
result = utils.get_proxy_params("recaptcha")
assert result == {"http": "http://proxy:3128", "https": "http://proxy:3128"}

def test_url_with_auth(self):
with patch("openlibrary.plugins.upstream.utils.config") as mock_config:
mock_config.get.return_value = {
"recaptcha": {
"url": "http://proxy:3128",
"user": "myuser",
"password": "mypass",
}
}
result = utils.get_proxy_params("recaptcha")
assert result == {
"http": "http://myuser:mypass@proxy:3128",
"https": "http://myuser:mypass@proxy:3128",
}

def test_special_chars_in_credentials_are_encoded(self):
with patch("openlibrary.plugins.upstream.utils.config") as mock_config:
mock_config.get.return_value = {
"recaptcha": {
"url": "http://proxy:3128",
"user": "u@ser",
"password": "p@ss:word",
}
}
result = utils.get_proxy_params("recaptcha")
assert result == {
"http": "http://u%40ser:p%40ss%3Aword@proxy:3128",
"https": "http://u%40ser:p%40ss%3Aword@proxy:3128",
}
31 changes: 31 additions & 0 deletions openlibrary/plugins/upstream/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1623,6 +1623,37 @@ def setup_requests(config=config) -> None:
logger.info("Requests set up")


def get_proxy_params(service_tag: str) -> dict[str, str] | None:
"""Return a requests-compatible proxies dict for a service requiring proxy auth.

Reads from the ``http_proxies`` config section. Each entry may have:
url: proxy base URL
user: proxy username
password: proxy password

Returns None when no service-specific config exists so that callers can
pass the result directly as ``proxies=`` to requests — None means requests
will fall back to the global HTTP_PROXY/HTTPS_PROXY env vars set by
setup_requests().
"""
service = config.get("http_proxies", {}).get(service_tag)
if not service:
return None

proxy_url = service.get("url", "")
user = service.get("user", "")
password = service.get("password", "")

if user and proxy_url:
parsed = urlparse(proxy_url)
netloc = f"{quote(user, safe='')}:{quote(password, safe='')}@{parsed.hostname}"
if parsed.port:
netloc += f":{parsed.port}"
proxy_url = urlunparse(parsed._replace(netloc=netloc))

return {"http": proxy_url, "https": proxy_url} if proxy_url else None


def setup() -> None:
"""Do required initialization"""
# monkey-patch get_markdown to use OL Flavored Markdown
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ prometheus-fastapi-instrumentator==7.1.0
psycopg2==2.9.12
pydantic==2.13.4
pymarc==5.3.1
python-amazon-paapi==6.2.0
git+https://github.com/mekarpeles/python-amazon-paapi.git@proxy-support
python-dateutil==2.9.0.post0
python-memcached==1.62
python-multipart==0.0.28
Expand Down
11 changes: 9 additions & 2 deletions scripts/affiliate_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,15 @@ def GET(self, identifier: str) -> str:
def load_config(configfile):
# This loads openlibrary.yml + infobase.yml
openlibrary_load_config(configfile)
http_proxy_url = config.get("http_proxy")
http_proxy_creds = config.get("http_proxy_creds")

# Prefer per-service proxy config under http_proxies.amazon; fall back to the
# legacy flat keys http_proxy / http_proxy_creds for backward compatibility.
amazon_proxy_cfg = config.get("http_proxies", {}).get("amazon", {})
http_proxy_url = amazon_proxy_cfg.get("url") or config.get("http_proxy")
if amazon_proxy_cfg.get("user"):
http_proxy_creds = f"{amazon_proxy_cfg['user']}:{amazon_proxy_cfg.get('password', '')}"
else:
http_proxy_creds = config.get("http_proxy_creds", "")

stats.client = stats.create_stats_client(cfg=config)

Expand Down