diff --git a/src/wp-includes/class-wp-email-address.php b/src/wp-includes/class-wp-email-address.php index fd4f0ef8937ba..bf3e862ab6825 100644 --- a/src/wp-includes/class-wp-email-address.php +++ b/src/wp-includes/class-wp-email-address.php @@ -34,27 +34,53 @@ */ final class WP_Email_Address { /** - * Regex for the local part when Unicode is not enabled. + * Pattern for a single ASCII local-part atom (no dot). * - * Matches the character set from the WHATWG email specification: + * Matches the character set from the WHATWG email specification, minus the + * dot, which the dot-atom structure handles as a separator: * https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email) * * @since 7.1.0 * @var string */ - const LOCAL_PART_ASCII_REGEX = '/^[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+$/'; + const LOCAL_PART_ATOM_ASCII = '[a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~-]+'; + + /** + * Pattern for a single Unicode local-part atom (no dot). + * + * Extends the ASCII atom to allow Unicode letters and numbers, with the same + * grapheme-cluster structure used for domain labels: each cluster must open + * with a non-combining character, followed by zero or more combining marks. + * + * @since 7.1.0 + * @var string + */ + const LOCAL_PART_ATOM_UNICODE = '(?:[\p{L}\p{N}!#$%&\'*+\/=?^_`{|}~-]\p{M}*)+'; + + /** + * Regex for the local part when Unicode is not enabled. + * + * Assembled from {@see self::LOCAL_PART_ATOM_ASCII} as a dot-atom: one atom, + * then zero or more dot-separated atoms. A leading dot, trailing dot, or two + * consecutive dots are rejected, matching the stricter RFC 5321/5322 syntax + * and PHP's FILTER_VALIDATE_EMAIL, because such addresses are undeliverable. + * + * @since 7.1.0 + * @var string + */ + const LOCAL_PART_ASCII_REGEX = '/^' . self::LOCAL_PART_ATOM_ASCII . '(?:\.' . self::LOCAL_PART_ATOM_ASCII . ')*$/'; /** * Regex for the local part when Unicode is enabled. * - * Extends the WHATWG character set to allow Unicode letters and numbers, - * and applies the same grapheme-cluster structure used for domain labels: - * each cluster must open with a non-combining character. + * Assembled from {@see self::LOCAL_PART_ATOM_UNICODE} as a dot-atom: one atom, + * then zero or more dot-separated atoms. As with the ASCII variant, a leading + * dot, trailing dot, or two consecutive dots are rejected. * * @since 7.1.0 * @var string */ - const LOCAL_PART_UNICODE_REGEX = '/^([\p{L}\p{N}.!#$%&\'*+\/=?^_`{|}~-]\p{M}*)+$/u'; + const LOCAL_PART_UNICODE_REGEX = '/^' . self::LOCAL_PART_ATOM_UNICODE . '(?:\.' . self::LOCAL_PART_ATOM_UNICODE . ')*$/u'; /** * Pattern for a single ASCII domain label (no dot). diff --git a/tests/phpunit/tests/formatting/antispambot.php b/tests/phpunit/tests/formatting/antispambot.php index 5f5de80e1f5b8..5cb43a562e285 100644 --- a/tests/phpunit/tests/formatting/antispambot.php +++ b/tests/phpunit/tests/formatting/antispambot.php @@ -30,16 +30,16 @@ public function test_returns_valid_utf8( $email ) { */ public function data_returns_valid_utf8() { return array( - 'plain' => array( 'bob@example.com' ), - 'plain with ip' => array( 'ace@204.32.222.14' ), - 'deep subdomain' => array( 'kevin@many.subdomains.make.a.happy.man.edu' ), - 'short address' => array( 'a@b.co' ), - 'ascii@nonascii' => array( 'info@grå.org' ), - 'nonascii@nonascii' => array( 'grå@grå.org' ), - 'decomposed unicode' => array( "gr\u{0061}\u{030a}blå@grå.org" ), - 'weird but legal dots' => array( '..@example.com' ), - 'umlauts' => array( 'bücher@gmx.de' ), - 'three-byte UTF-8' => array( "\u{FFFD}@who.knows.com" ), + 'plain' => array( 'bob@example.com' ), + 'plain with ip' => array( 'ace@204.32.222.14' ), + 'deep subdomain' => array( 'kevin@many.subdomains.make.a.happy.man.edu' ), + 'short address' => array( 'a@b.co' ), + 'ascii@nonascii' => array( 'info@grå.org' ), + 'nonascii@nonascii' => array( 'grå@grå.org' ), + 'decomposed unicode' => array( "gr\u{0061}\u{030a}blå@grå.org" ), + 'consecutive dots' => array( '..@example.com' ), + 'umlauts' => array( 'bücher@gmx.de' ), + 'three-byte UTF-8' => array( "\u{FFFD}@who.knows.com" ), ); } diff --git a/tests/phpunit/tests/formatting/isEmail.php b/tests/phpunit/tests/formatting/isEmail.php index db37ca0311380..b1287019d028b 100644 --- a/tests/phpunit/tests/formatting/isEmail.php +++ b/tests/phpunit/tests/formatting/isEmail.php @@ -42,7 +42,6 @@ public static function data_valid_email_provider() { 'info@grå.org', 'grå@grå.org', "gr\u{0061}\u{030a}blå@grå.org", - '..@example.com', ); foreach ( $valid_emails as $email ) { @@ -54,6 +53,7 @@ public static function data_valid_email_provider() { * Ensures that unrecognized email addresses are rejected. * * @ticket 31992 + * @ticket 55821 * * @dataProvider data_invalid_email_provider * @@ -120,6 +120,17 @@ public static function data_invalid_email_provider() { */ "a\x80b@example.com", // invalid UTF-8 in local part. "abc@\x80.org", // invalid UTF-8 in domain subdomain. + + /* + * The local part is a dot-atom: dots may only separate non-empty + * atoms. Leading dots, trailing dots, and consecutive dots are + * invalid per RFC 5321/5322 and PHP's FILTER_VALIDATE_EMAIL, so + * such addresses are undeliverable and best rejected. See #55821. + */ + 'abc..def@xyz.com', // consecutive dots in local part. + '.abc@xyz.com', // leading dot in local part. + 'abc.@xyz.com', // trailing dot in local part. + '..@example.com', // only dots in local part. ); foreach ( $invalid_emails as $email ) { diff --git a/tests/phpunit/tests/wp-email-address/wpEmailAddress.php b/tests/phpunit/tests/wp-email-address/wpEmailAddress.php index afcc52354ee5e..04b896316aa23 100644 --- a/tests/phpunit/tests/wp-email-address/wpEmailAddress.php +++ b/tests/phpunit/tests/wp-email-address/wpEmailAddress.php @@ -215,6 +215,40 @@ public function data_invalid_unicode_addresses() { ); } + /** + * Tests that the local part is treated as a dot-atom, rejecting leading, + * trailing, and consecutive dots. + * + * These addresses are invalid per the stricter RFC 5321/5322 syntax and + * PHP's FILTER_VALIDATE_EMAIL, so WordPress cannot deliver mail to them. + * + * @ticket 55821 + * + * @dataProvider data_invalid_local_part_dots + * @covers WP_Email_Address::from_string + * + * @param string $address The invalid email address string. + */ + public function test_from_string_rejects_invalid_local_part_dots( $address ) { + $this->assertNull( WP_Email_Address::from_string( $address, 'unicode' ), 'Should reject in Unicode mode.' ); + $this->assertNull( WP_Email_Address::from_string( $address, 'ascii' ), 'Should reject in ASCII mode.' ); + } + + /** + * Data provider for local parts with invalid dot placement. + * + * @return array[] + */ + public function data_invalid_local_part_dots() { + return array( + 'consecutive dots in local part' => array( 'abc..def@example.com' ), + 'leading dot in local part' => array( '.abc@example.com' ), + 'trailing dot in local part' => array( 'abc.@example.com' ), + 'only dots in local part' => array( '..@example.com' ), + 'single dot as local part' => array( '.@example.com' ), + ); + } + /** * Data provider for several tests. *