diff --git a/src/changes/changes.xml b/src/changes/changes.xml index b0428fc45b..6bfe83538b 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -45,6 +45,7 @@ The type attribute can be add,update,fix,remove. + URLCodec.encodeUrl(BitSet, byte[]) allows custom safe sets to emit URL encoding control characters. PercentCodec loses literal '+' when plusForSpace is enabled. Digest ALL reuses System.in, so only the first algorithm sees the real input (#431). diff --git a/src/main/java/org/apache/commons/codec/net/URLCodec.java b/src/main/java/org/apache/commons/codec/net/URLCodec.java index 20f25e2535..462c8a6966 100644 --- a/src/main/java/org/apache/commons/codec/net/URLCodec.java +++ b/src/main/java/org/apache/commons/codec/net/URLCodec.java @@ -53,6 +53,8 @@ public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, St */ protected static final byte ESCAPE_CHAR = '%'; + private static final byte PLUS_CHAR = '+'; + /** * BitSet of www-form-url safe characters. * This is a copy of the internal BitSet which is now used for the conversion. @@ -107,7 +109,7 @@ public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); for (int i = 0; i < bytes.length; i++) { final int b = bytes[i]; - if (b == '+') { + if (b == PLUS_CHAR) { buffer.write(' '); } else if (b == ESCAPE_CHAR) { try { @@ -126,9 +128,11 @@ public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException /** * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. + * The characters {@code %} and {@code +} are always escaped because {@link #decodeUrl(byte[])} + * treats them as URL-encoding syntax. * * @param urlsafe - * bitset of characters deemed URL safe. + * bitset of characters deemed URL safe, except for {@code %} and {@code +}. * @param bytes * array of bytes to convert to URL safe characters. * @return array of bytes containing URL safe characters. @@ -147,9 +151,9 @@ public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) { if (b < 0) { b = 256 + b; } - if (urlsafe.get(b)) { + if (urlsafe.get(b) && b != ESCAPE_CHAR && b != PLUS_CHAR) { if (b == ' ') { - b = '+'; + b = PLUS_CHAR; } buffer.write(b); } else { diff --git a/src/test/java/org/apache/commons/codec/net/URLCodecTest.java b/src/test/java/org/apache/commons/codec/net/URLCodecTest.java index b862803c0e..fa374e31f1 100644 --- a/src/test/java/org/apache/commons/codec/net/URLCodecTest.java +++ b/src/test/java/org/apache/commons/codec/net/URLCodecTest.java @@ -22,6 +22,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import java.nio.charset.StandardCharsets; +import java.util.BitSet; import org.apache.commons.codec.CharEncoding; import org.apache.commons.codec.DecoderException; @@ -180,6 +181,30 @@ void testEncodeUrlWithNullBitSet() throws Exception { validateState(urlCodec); } + @Test + void testEncodeUrlWithPercentMarkedSafeEscapesPercent() throws Exception { + final BitSet safe = new BitSet(); + safe.set('%'); + final String plain = "%"; + final byte[] encoded = URLCodec.encodeUrl(safe, plain.getBytes(StandardCharsets.US_ASCII)); + final String encodedS = new String(encoded, StandardCharsets.US_ASCII); + assertEquals("%25", encodedS, "URLCodec should escape percent even when marked safe"); + final byte[] decoded = URLCodec.decodeUrl(encoded); + assertEquals(plain, new String(decoded, StandardCharsets.US_ASCII), "URLCodec percent decoding test"); + } + + @Test + void testEncodeUrlWithPlusMarkedSafeEscapesPlus() throws Exception { + final BitSet safe = new BitSet(); + safe.set('+'); + final String plain = "+"; + final byte[] encoded = URLCodec.encodeUrl(safe, plain.getBytes(StandardCharsets.US_ASCII)); + final String encodedS = new String(encoded, StandardCharsets.US_ASCII); + assertEquals("%2B", encodedS, "URLCodec should escape plus even when marked safe"); + final byte[] decoded = URLCodec.decodeUrl(encoded); + assertEquals(plain, new String(decoded, StandardCharsets.US_ASCII), "URLCodec plus decoding test"); + } + @Test void testInvalidEncoding() { final URLCodec urlCodec = new URLCodec("NONSENSE");