diff --git a/CHANGELOG.md b/CHANGELOG.md index f56416b..9cf502e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,26 @@ ### Changed +- `EndpointChecker` (the `rl.php` accessibility check used by + `hook_requirements()`) hardened against false negatives behind reverse + proxies and false positives from misconfigured redirects (#50): + - validates the response body is `pong`, not just a 200 status; + - upgrades the probe URL to HTTPS based on `X-Forwarded-Proto` even when + `$settings['reverse_proxy']` isn't configured (safe for a same-site + self-probe); + - on public-URL failure, retries on `http://127.0.0.1` with the original + `Host` header to distinguish "rl.php is broken" from "the proxy / + scheme / DNS chain to the public hostname is broken"; + - returns a structured result so the status-report description names the + actual failure mode (`redirected`, `http_error`, `body_mismatch`, + `connection_error`, `file_missing`) instead of a generic "not + accessible"; + - caches success for 1 hour but failures for only 5 minutes, so a fixed + misconfig clears without a manual cache rebuild; + - narrows the swallow-all `\Exception` fallback to cURL `errno 6/7` + (DNS / TCP-connect) so SSL handshake errors and timeouts are surfaced. + Public API: existing `EndpointChecker::isAccessible(): bool` is preserved + as a thin wrapper over the new `EndpointChecker::getResult(): array`. - **BC break (minor):** `ExperimentManagerInterface` now declares three new methods: - `purgeExperiment(string $experiment_id)` - removes turns, rewards, diff --git a/rl.install b/rl.install index d7fcab0..edaa390 100644 --- a/rl.install +++ b/rl.install @@ -7,6 +7,7 @@ use Drupal\Core\Database\SchemaObjectExistsException; use Drupal\Core\Database\SchemaObjectDoesNotExistException; +use Drupal\rl\Service\EndpointChecker; /** * Implements hook_install(). @@ -449,17 +450,82 @@ function rl_requirements($phase) { /** @var \Drupal\rl\Service\EndpointChecker $checker */ $checker = \Drupal::service('rl.endpoint_checker'); - $accessible = $checker->isAccessible(); - $requirements['rl_php_accessibility'] = [ + $result = $checker->getResult(); + $readme = 'https://git.drupalcode.org/project/rl/-/blob/1.x/README.md#post-installation-verify-rlphp-access'; + $loopback_hint = !empty($result['loopback_ok']) + ? t('Note: rl.php is reachable on loopback (@loopback), so the local web server is fine — the failure is in the public hostname → web server path (proxy, scheme, redirect rule).', ['@loopback' => $result['loopback_url'] ?? '']) + : ''; + + $requirement = [ 'title' => t('RL: rl.php accessibility'), - 'severity' => $accessible ? REQUIREMENT_OK : REQUIREMENT_ERROR, - 'value' => $accessible ? t('rl.php is accessible') : t('rl.php is not accessible'), ]; - if (!$accessible) { - $requirements['rl_php_accessibility']['description'] = t('The web server is not serving rl.php. Ensure .htaccess files are being processed (Apache) or rewrite rules are configured (Nginx). See the README for configuration instructions.', [ - '@readme' => 'https://git.drupalcode.org/project/rl/-/blob/1.x/README.md#post-installation-verify-rlphp-access', - ]); + + switch ($result['status']) { + case EndpointChecker::STATUS_OK: + $requirement['severity'] = REQUIREMENT_OK; + $requirement['value'] = t('rl.php is accessible'); + break; + + case EndpointChecker::STATUS_REDIRECTED: + $requirement['severity'] = REQUIREMENT_ERROR; + $requirement['value'] = t('rl.php probe was redirected to an unexpected target'); + $requirement['description'] = t('The probe to @url followed a redirect chain to @final and the final body was not "pong". This usually means Drupal generates absolute URLs with the wrong scheme — set $settings[\'reverse_proxy\'] and $settings[\'reverse_proxy_trusted_headers\'], or fix the redirect rule that drops the host. @hint See the README.', [ + '@url' => $result['public_url'] ?? '(unknown URL)', + '@final' => isset($result['redirects']) ? end($result['redirects']) : '(unknown)', + '@hint' => $loopback_hint, + '@readme' => $readme, + ]); + break; + + case EndpointChecker::STATUS_HTTP_ERROR: + $requirement['severity'] = REQUIREMENT_ERROR; + $requirement['value'] = t('rl.php returned HTTP @code', ['@code' => $result['code'] ?? 0]); + $requirement['description'] = t('The web server returned HTTP @code for rl.php at @url. Ensure .htaccess is processed (Apache) or rewrite rules pass real files through (Nginx). @hint See the README.', [ + '@code' => $result['code'] ?? 0, + '@url' => $result['public_url'] ?? '(unknown URL)', + '@hint' => $loopback_hint, + '@readme' => $readme, + ]); + break; + + case EndpointChecker::STATUS_BODY_MISMATCH: + $requirement['severity'] = REQUIREMENT_ERROR; + $requirement['value'] = t('rl.php returned unexpected content'); + $requirement['description'] = t('rl.php is served at @url but returned unexpected content (@detail). Another module or rewrite rule may be intercepting the request, or a redirect led somewhere wrong. @hint See the README.', [ + '@url' => $result['public_url'] ?? '(unknown URL)', + '@detail' => $result['detail'] ?? '', + '@hint' => $loopback_hint, + '@readme' => $readme, + ]); + break; + + case EndpointChecker::STATUS_CONNECTION_ERROR: + $requirement['severity'] = REQUIREMENT_ERROR; + $requirement['value'] = t('rl.php connection failed'); + $requirement['description'] = t('The HTTP probe to rl.php failed at the network layer: @detail. The web server may not be running, the public hostname may not resolve from inside the container, or TLS may be misconfigured. See the README.', [ + '@detail' => $result['detail'] ?? '', + '@readme' => $readme, + ]); + break; + + case EndpointChecker::STATUS_FILE_MISSING: + $requirement['severity'] = REQUIREMENT_ERROR; + $requirement['value'] = t('rl.php is missing on disk'); + $requirement['description'] = t('rl.php is not present where it should be: @detail. Reinstall or re-deploy the rl module.', [ + '@detail' => $result['detail'] ?? '', + ]); + break; + + default: + $requirement['severity'] = REQUIREMENT_ERROR; + $requirement['value'] = t('rl.php accessibility unknown'); + $requirement['description'] = t('The endpoint check returned an unrecognised status (@status). See the README.', [ + '@status' => $result['status'] ?? '(none)', + '@readme' => $readme, + ]); } + + $requirements['rl_php_accessibility'] = $requirement; } // Check if installed AI skill files are outdated. diff --git a/src/Service/EndpointChecker.php b/src/Service/EndpointChecker.php index 8ef58bd..bd071cd 100644 --- a/src/Service/EndpointChecker.php +++ b/src/Service/EndpointChecker.php @@ -2,24 +2,53 @@ namespace Drupal\rl\Service; -use Drupal\Core\State\StateInterface; use Drupal\Component\Datetime\TimeInterface; use Drupal\Core\Extension\ModuleExtensionList; +use Drupal\Core\State\StateInterface; use GuzzleHttp\ClientInterface; +use Symfony\Component\HttpFoundation\Request; use Symfony\Component\HttpFoundation\RequestStack; /** - * Checks whether rl.php is accessible via HTTP, cached for 1 hour. + * Checks whether rl.php is accessible via HTTP. + * + * Cached for 1 hour on success, 5 minutes on failure (so a fixed misconfig + * clears quickly without forcing a manual cache rebuild). * - * The check pings rl.php from the browser's origin. When the HTTP - * request fails due to networking (e.g. Docker container isolation) - * but the file exists on disk, the endpoint is assumed accessible. + * Strategy: + * 1. Probe the public URL Drupal would emit on the current request, + * preferring HTTPS when X-Forwarded-Proto signals it (even if + * $settings['reverse_proxy'] isn't configured — for a same-site + * self-probe this is safe and avoids the most common false negative). + * 2. If that fails, retry on http://127.0.0.1[:port] with the original + * Host header so we can tell "rl.php is broken" from "the proxy / + * scheme / DNS chain to the public hostname is broken." + * 3. Return a structured result so hook_requirements() can give a + * pointed description rather than a generic "not accessible." */ class EndpointChecker { - protected const CACHE_TTL = 3600; + /** + * Cache TTLs. + */ + protected const SUCCESS_TTL = 3600; + protected const FAILURE_TTL = 300; + + /** + * State key for the cached result. + */ protected const STATE_KEY = 'rl.endpoint_accessible'; + /** + * Result statuses returned by ::check(). + */ + public const STATUS_OK = 'ok'; + public const STATUS_REDIRECTED = 'redirected'; + public const STATUS_HTTP_ERROR = 'http_error'; + public const STATUS_BODY_MISMATCH = 'body_mismatch'; + public const STATUS_CONNECTION_ERROR = 'connection_error'; + public const STATUS_FILE_MISSING = 'file_missing'; + public function __construct( protected StateInterface $state, protected TimeInterface $time, @@ -30,15 +59,38 @@ public function __construct( /** * Returns TRUE when rl.php is believed accessible by the web server. + * + * Thin wrapper over ::getResult() for callers that only need a boolean. */ public function isAccessible(): bool { - $cached = $this->state->get(static::STATE_KEY); - if (is_array($cached) && isset($cached['checked']) && ($this->time->getRequestTime() - $cached['checked']) < static::CACHE_TTL) { - return !empty($cached['accessible']); + return $this->getResult()['status'] === self::STATUS_OK; + } + + /** + * Returns the cached check result with full diagnostic detail. + * + * @return array + * An associative array with at least: + * - status: one of the STATUS_* constants. + * - detail: a human-readable explanation, or NULL on success. + * And optionally, depending on status: + * - code: HTTP status code of the failed response. + * - public_url / loopback_url: URLs probed. + * - redirects: redirect chain followed during the probe. + */ + public function getResult(): array { + $cached = $this->state->get(self::STATE_KEY); + if (is_array($cached) && isset($cached['checked'], $cached['result']) && is_array($cached['result'])) { + $ttl = $cached['result']['status'] === self::STATUS_OK + ? self::SUCCESS_TTL + : self::FAILURE_TTL; + if (($this->time->getRequestTime() - $cached['checked']) < $ttl) { + return $cached['result']; + } } $result = $this->check(); - $this->state->set(static::STATE_KEY, [ - 'accessible' => $result, + $this->state->set(self::STATE_KEY, [ + 'result' => $result, 'checked' => $this->time->getRequestTime(), ]); return $result; @@ -48,38 +100,198 @@ public function isAccessible(): bool { * Clears the cached result so the next call re-checks. */ public function resetCache(): void { - $this->state->delete(static::STATE_KEY); + $this->state->delete(self::STATE_KEY); } /** * Performs the actual rl.php accessibility check. */ - protected function check(): bool { + protected function check(): array { $rl_path = $this->moduleList->getPath('rl'); $file_path = DRUPAL_ROOT . '/' . $rl_path . '/rl.php'; if (!file_exists($file_path)) { - return FALSE; + return [ + 'status' => self::STATUS_FILE_MISSING, + 'detail' => sprintf('rl.php is missing on disk at %s.', $file_path), + ]; } $request = $this->requestStack->getCurrentRequest(); - $base_url = $request ? $request->getSchemeAndHttpHost() : ''; - $base_path = $request ? $request->getBasePath() : ''; - $url = $base_url . $base_path . '/' . $rl_path . '/rl.php'; + $public_url = $this->buildPublicUrl($request, $rl_path); + $public_result = $this->probe($public_url); + $public_result['public_url'] = $public_url; + + if ($public_result['status'] === self::STATUS_OK) { + return $public_result; + } + + // Loopback fallback. Only attempt when we have a real Request — in CLI + // there's nothing to fall back to, and the public probe already returned + // a deterministic failure. + if ($request === NULL) { + return $public_result; + } + $loopback_url = $this->buildLoopbackUrl($request, $rl_path); + $loopback_result = $this->probe($loopback_url, $request->getHost()); + + if ($loopback_result['status'] !== self::STATUS_OK) { + return $public_result; + } + + // Network-layer failure on public + loopback OK = browser-reachable + // (Docker / split-horizon DNS only blocks us, not browsers). Upgrade. + if ($public_result['status'] === self::STATUS_CONNECTION_ERROR) { + return [ + 'status' => self::STATUS_OK, + 'detail' => sprintf( + 'rl.php served on loopback (%s). Public probe failed at network layer: %s. This does not affect browser access.', + $loopback_url, + $public_result['detail'] ?? '(no detail)' + ), + 'public_url' => $public_url, + 'loopback_url' => $loopback_url, + ]; + } + return $public_result + [ + 'loopback_ok' => TRUE, + 'loopback_url' => $loopback_url, + ]; + } + + /** + * Builds the URL Drupal would emit for rl.php on the current request. + * + * Self-probe-only exception: trusts X-Forwarded-Proto / -Port without + * $settings['reverse_proxy'] (safe here, do not copy elsewhere). + */ + protected function buildPublicUrl(?Request $request, string $rl_path): string { + if ($request === NULL) { + return ''; + } + $forwarded_proto = $request->headers->get('X-Forwarded-Proto'); + if (in_array($forwarded_proto, ['http', 'https'], TRUE)) { + $scheme = $forwarded_proto; + $host = $request->getHost(); + $forwarded_port = $request->headers->get('X-Forwarded-Port'); + $port = ($forwarded_port !== NULL && ctype_digit($forwarded_port)) + ? (int) $forwarded_port + : NULL; + } + else { + $scheme = $request->getScheme(); + $host = $request->getHost(); + $port = $request->getPort(); + } + $is_default_port = $port === NULL + || ($scheme === 'http' && $port === 80) + || ($scheme === 'https' && $port === 443); + if (!$is_default_port) { + $host .= ':' . $port; + } + return sprintf( + '%s://%s%s/%s/rl.php', + $scheme, + $host, + $request->getBasePath(), + $rl_path + ); + } + + /** + * Builds an http://127.0.0.1[:port]/... URL for the loopback probe. + * + * Uses the local request port since the loopback bypasses any public TLS + * terminator and lands on the same web server that just served us. + */ + protected function buildLoopbackUrl(Request $request, string $rl_path): string { + $port = $request->getPort(); + $port_part = ($port && $port !== 80) ? ':' . $port : ''; + return sprintf( + 'http://127.0.0.1%s%s/%s/rl.php', + $port_part, + $request->getBasePath(), + $rl_path + ); + } + + /** + * POSTs action=ping to a URL and classifies the response. + * + * Redirects ARE followed (legit setups may HTTP→HTTPS upgrade or do + * canonical-host redirects), but the chain is tracked so a body-mismatch + * diagnostic can name the redirect target — usually the smoking gun for + * scheme or host misconfiguration. + */ + protected function probe(string $url, ?string $host_header = NULL): array { + if ($url === '') { + return [ + 'status' => self::STATUS_HTTP_ERROR, + 'detail' => 'No URL available to probe (CLI / no current request).', + ]; + } + $options = [ + 'form_params' => ['action' => 'ping'], + 'timeout' => 3, + 'http_errors' => FALSE, + 'allow_redirects' => [ + 'max' => 5, + 'strict' => TRUE, + 'track_redirects' => TRUE, + ], + ]; + if ($host_header !== NULL) { + $options['headers'] = ['Host' => $host_header]; + } try { - $response = $this->httpClient->post($url, [ - 'form_params' => ['action' => 'ping'], - 'timeout' => 3, - 'http_errors' => FALSE, - ]); - return $response->getStatusCode() === 200; - } - catch (\Exception $e) { - // Connection failed (cURL error 6/7 in Docker, firewalls, etc.). - // The file exists on disk, so assume the web server serves it. - return TRUE; + $response = $this->httpClient->post($url, $options); + } + catch (\Throwable $e) { + // Transport-layer failure (DNS / TCP / TLS / timeout); check() will + // upgrade to OK iff the loopback probe succeeds. + return [ + 'status' => self::STATUS_CONNECTION_ERROR, + 'detail' => $e->getMessage(), + ]; } + + $code = $response->getStatusCode(); + $redirect_chain = $response->getHeader('X-Guzzle-Redirect-History'); + + if ($code !== 200) { + $result = [ + 'status' => self::STATUS_HTTP_ERROR, + 'detail' => sprintf('rl.php probe returned HTTP %d.', $code), + 'code' => $code, + ]; + if (!empty($redirect_chain)) { + $result['redirects'] = $redirect_chain; + } + return $result; + } + + $body = trim((string) $response->getBody()); + if ($body !== 'pong') { + $result = [ + 'status' => self::STATUS_BODY_MISMATCH, + 'detail' => sprintf( + 'rl.php probe returned HTTP 200 but body was %s, not "pong".', + $body === '' ? '(empty)' : '"' . substr($body, 0, 80) . '"' + ), + 'code' => $code, + ]; + if (!empty($redirect_chain)) { + $result['detail'] .= ' Followed redirect to: ' . end($redirect_chain) . '.'; + $result['redirects'] = $redirect_chain; + } + return $result; + } + + return [ + 'status' => self::STATUS_OK, + 'detail' => NULL, + ]; } }