diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 6bfe83538b..f43f032911 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -45,6 +45,7 @@ The type attribute can be add,update,fix,remove. + Base16.Builder.setEncodeTable(byte...) can create a codec that cannot decode its own output. URLCodec.encodeUrl(BitSet, byte[]) allows custom safe sets to emit URL encoding control characters. PercentCodec loses literal '+' when plusForSpace is enabled. Digest ALL reuses System.in, so only the first algorithm sees the real input (#431). diff --git a/src/main/java/org/apache/commons/codec/binary/Base16.java b/src/main/java/org/apache/commons/codec/binary/Base16.java index cd4b893825..c8f9450116 100644 --- a/src/main/java/org/apache/commons/codec/binary/Base16.java +++ b/src/main/java/org/apache/commons/codec/binary/Base16.java @@ -33,7 +33,7 @@ *

*

* The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case - * alphabet. + * alphabet, and configuring a custom 16-byte alphabet with {@link Builder#setEncodeTable(byte...)}. *

* * @see Base16InputStream @@ -78,10 +78,18 @@ public Base16 get() { return new Base16(this); } + /** + * Sets the Base16 encode table and derives the matching decode table. + * + * @param encodeTable 16 unique bytes, null resets to the default upper-case table. + * @return {@code this} instance. + * @throws IllegalArgumentException if {@code encodeTable} does not contain 16 unique bytes. + */ @Override public Builder setEncodeTable(final byte... encodeTable) { - super.setDecodeTableRaw(Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE); - return super.setEncodeTable(encodeTable); + final byte[] table = encodeTable != null ? encodeTable : UPPER_CASE_ENCODE_TABLE; + super.setDecodeTableRaw(toDecodeTable(table)); + return super.setEncodeTable(table); } /** @@ -91,8 +99,7 @@ public Builder setEncodeTable(final byte... encodeTable) { * @return {@code this} instance. */ public Builder setLowerCase(final boolean lowerCase) { - setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE); - return asThis(); + return setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE); } } @@ -152,6 +159,32 @@ public Builder setLowerCase(final boolean lowerCase) { /** Mask used to extract 4 bits, used when decoding character. */ private static final int MASK_4_BITS = 0x0f; + private static byte[] toDecodeTable(final byte[] encodeTable) { + if (Arrays.equals(encodeTable, UPPER_CASE_ENCODE_TABLE)) { + return UPPER_CASE_DECODE_TABLE; + } + if (Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE)) { + return LOWER_CASE_DECODE_TABLE; + } + if (encodeTable.length != 1 << BITS_PER_ENCODED_BYTE) { + throw new IllegalArgumentException("Base16 encode table must contain 16 entries."); + } + int max = -1; + for (final byte b : encodeTable) { + max = Math.max(max, b & 0xff); + } + final byte[] decodeTable = new byte[max + 1]; + Arrays.fill(decodeTable, (byte) -1); + for (int i = 0; i < encodeTable.length; i++) { + final int b = encodeTable[i] & 0xff; + if (decodeTable[b] != -1) { + throw new IllegalArgumentException("Duplicate value in Base16 encode table: " + b); + } + decodeTable[b] = (byte) i; + } + return decodeTable; + } + /** * Constructs a new builder. * @@ -241,8 +274,9 @@ void decode(final byte[] data, int offset, final int length, final Context conte private int decodeOctet(final byte octet) { int decoded = -1; - if ((octet & 0xff) < decodeTable.length) { - decoded = decodeTable[octet]; + final int b = octet & 0xff; + if (b < decodeTable.length) { + decoded = decodeTable[b]; } if (decoded == -1) { throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet); @@ -282,7 +316,8 @@ void encode(final byte[] data, final int offset, final int length, final Context */ @Override public boolean isInAlphabet(final byte octet) { - return isInAlphabet((byte) (octet & 0xff), decodeTable); + final int b = octet & 0xff; + return b < decodeTable.length && decodeTable[b] != -1; } /** diff --git a/src/test/java/org/apache/commons/codec/binary/Base16Test.java b/src/test/java/org/apache/commons/codec/binary/Base16Test.java index 7ef4cc6f67..46a356bdeb 100644 --- a/src/test/java/org/apache/commons/codec/binary/Base16Test.java +++ b/src/test/java/org/apache/commons/codec/binary/Base16Test.java @@ -149,6 +149,39 @@ void testConstructors() { new Base16(false, CodecPolicy.STRICT); } + @Test + void testCustomEncodeTableAffectsDecodeTable() { + final byte[] encodeTable = "0123456789ABCDEF".getBytes(StandardCharsets.US_ASCII); + final byte tmp = encodeTable[0]; + encodeTable[0] = encodeTable[1]; + encodeTable[1] = tmp; + final Base16 base16 = Base16.builder().setEncodeTable(encodeTable).get(); + final byte[] encoded = base16.encode(new byte[] { 1 }); + assertEquals("10", new String(encoded, StandardCharsets.US_ASCII)); + assertArrayEquals(new byte[] { 1 }, base16.decode(encoded)); + } + + @Test + void testCustomEncodeTableRejectsDuplicates() { + final byte[] encodeTable = "00123456789ABCDE".getBytes(StandardCharsets.US_ASCII); + assertThrows(IllegalArgumentException.class, () -> Base16.builder().setEncodeTable(encodeTable)); + } + + @Test + void testCustomEncodeTableRejectsInvalidLength() { + assertThrows(IllegalArgumentException.class, + () -> Base16.builder().setEncodeTable("0123456789ABCDE".getBytes(StandardCharsets.US_ASCII))); + } + + @Test + void testBuilderSetLowerCaseDecodesOwnOutput() { + final Base16 base16 = Base16.builder().setLowerCase(true).get(); + final byte[] data = { (byte) 0xab }; + final byte[] encoded = base16.encode(data); + assertEquals("ab", new String(encoded, StandardCharsets.US_ASCII)); + assertArrayEquals(data, base16.decode(encoded)); + } + @Test void testDecodeSingleBytes() { final String encoded = "556E74696C206E6578742074696D6521";