From fc1dc56c3cfd08f1c08d8ea86044825e4e7a03c2 Mon Sep 17 00:00:00 2001 From: OldTruckDriver Date: Wed, 17 Jun 2026 19:29:44 +1000 Subject: [PATCH 1/3] [CODEC-341] Fix Base16 custom alphabet decode table Derive Base16 decode tables from custom encode tables so a configured codec can decode its own output. Reject encode tables that do not contain exactly 16 unique byte values. Reviewed-by: OpenAI Codex Reviewed-by: Anthropic Claude Code --- src/changes/changes.xml | 1 + .../apache/commons/codec/binary/Base16.java | 51 ++++++++++++++++--- .../commons/codec/binary/Base16Test.java | 34 +++++++++++++ 3 files changed, 78 insertions(+), 8 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 3dc2f6c409..5a6cad9db7 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -45,6 +45,7 @@ The type attribute can be add,update,fix,remove. + Base16.Builder.setEncodeTable(byte...) can create a codec that cannot decode its own output. Digest ALL reuses System.in, so only the first algorithm sees the real input (#431). diff --git a/src/main/java/org/apache/commons/codec/binary/Base16.java b/src/main/java/org/apache/commons/codec/binary/Base16.java index cd4b893825..c8f9450116 100644 --- a/src/main/java/org/apache/commons/codec/binary/Base16.java +++ b/src/main/java/org/apache/commons/codec/binary/Base16.java @@ -33,7 +33,7 @@ *

*

* The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case - * alphabet. + * alphabet, and configuring a custom 16-byte alphabet with {@link Builder#setEncodeTable(byte...)}. *

* * @see Base16InputStream @@ -78,10 +78,18 @@ public Base16 get() { return new Base16(this); } + /** + * Sets the Base16 encode table and derives the matching decode table. + * + * @param encodeTable 16 unique bytes, null resets to the default upper-case table. + * @return {@code this} instance. + * @throws IllegalArgumentException if {@code encodeTable} does not contain 16 unique bytes. + */ @Override public Builder setEncodeTable(final byte... encodeTable) { - super.setDecodeTableRaw(Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE); - return super.setEncodeTable(encodeTable); + final byte[] table = encodeTable != null ? encodeTable : UPPER_CASE_ENCODE_TABLE; + super.setDecodeTableRaw(toDecodeTable(table)); + return super.setEncodeTable(table); } /** @@ -91,8 +99,7 @@ public Builder setEncodeTable(final byte... encodeTable) { * @return {@code this} instance. */ public Builder setLowerCase(final boolean lowerCase) { - setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE); - return asThis(); + return setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE); } } @@ -152,6 +159,32 @@ public Builder setLowerCase(final boolean lowerCase) { /** Mask used to extract 4 bits, used when decoding character. */ private static final int MASK_4_BITS = 0x0f; + private static byte[] toDecodeTable(final byte[] encodeTable) { + if (Arrays.equals(encodeTable, UPPER_CASE_ENCODE_TABLE)) { + return UPPER_CASE_DECODE_TABLE; + } + if (Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE)) { + return LOWER_CASE_DECODE_TABLE; + } + if (encodeTable.length != 1 << BITS_PER_ENCODED_BYTE) { + throw new IllegalArgumentException("Base16 encode table must contain 16 entries."); + } + int max = -1; + for (final byte b : encodeTable) { + max = Math.max(max, b & 0xff); + } + final byte[] decodeTable = new byte[max + 1]; + Arrays.fill(decodeTable, (byte) -1); + for (int i = 0; i < encodeTable.length; i++) { + final int b = encodeTable[i] & 0xff; + if (decodeTable[b] != -1) { + throw new IllegalArgumentException("Duplicate value in Base16 encode table: " + b); + } + decodeTable[b] = (byte) i; + } + return decodeTable; + } + /** * Constructs a new builder. * @@ -241,8 +274,9 @@ void decode(final byte[] data, int offset, final int length, final Context conte private int decodeOctet(final byte octet) { int decoded = -1; - if ((octet & 0xff) < decodeTable.length) { - decoded = decodeTable[octet]; + final int b = octet & 0xff; + if (b < decodeTable.length) { + decoded = decodeTable[b]; } if (decoded == -1) { throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet); @@ -282,7 +316,8 @@ void encode(final byte[] data, final int offset, final int length, final Context */ @Override public boolean isInAlphabet(final byte octet) { - return isInAlphabet((byte) (octet & 0xff), decodeTable); + final int b = octet & 0xff; + return b < decodeTable.length && decodeTable[b] != -1; } /** diff --git a/src/test/java/org/apache/commons/codec/binary/Base16Test.java b/src/test/java/org/apache/commons/codec/binary/Base16Test.java index 7ef4cc6f67..c70a0ca87b 100644 --- a/src/test/java/org/apache/commons/codec/binary/Base16Test.java +++ b/src/test/java/org/apache/commons/codec/binary/Base16Test.java @@ -149,6 +149,40 @@ void testConstructors() { new Base16(false, CodecPolicy.STRICT); } + @Test + void testCustomEncodeTableAffectsDecodeTable() { + final byte[] encodeTable = "0123456789ABCDEF".getBytes(StandardCharsets.US_ASCII); + final byte tmp = encodeTable[0]; + encodeTable[0] = encodeTable[1]; + encodeTable[1] = tmp; + + final Base16 base16 = Base16.builder().setEncodeTable(encodeTable).get(); + final byte[] encoded = base16.encode(new byte[] { 1 }); + assertEquals("10", new String(encoded, StandardCharsets.US_ASCII), "Custom Base16 alphabet encoding test"); + assertArrayEquals(new byte[] { 1 }, base16.decode(encoded), "Custom Base16 alphabet decoding test"); + } + + @Test + void testCustomEncodeTableRejectsDuplicates() { + final byte[] encodeTable = "00123456789ABCDE".getBytes(StandardCharsets.US_ASCII); + assertThrows(IllegalArgumentException.class, () -> Base16.builder().setEncodeTable(encodeTable)); + } + + @Test + void testCustomEncodeTableRejectsInvalidLength() { + assertThrows(IllegalArgumentException.class, + () -> Base16.builder().setEncodeTable("0123456789ABCDE".getBytes(StandardCharsets.US_ASCII))); + } + + @Test + void testBuilderSetLowerCaseDecodesOwnOutput() { + final Base16 base16 = Base16.builder().setLowerCase(true).get(); + final byte[] data = { (byte) 0xab }; + final byte[] encoded = base16.encode(data); + assertEquals("ab", new String(encoded, StandardCharsets.US_ASCII), "Base16 builder lower-case encoding test"); + assertArrayEquals(data, base16.decode(encoded), "Base16 builder lower-case decoding test"); + } + @Test void testDecodeSingleBytes() { final String encoded = "556E74696C206E6578742074696D6521"; From 08b574578a8f723e70cd56765a5fd839298a165e Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Wed, 17 Jun 2026 21:51:56 -0400 Subject: [PATCH 2/3] Update Base16Test.java --- src/test/java/org/apache/commons/codec/binary/Base16Test.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/org/apache/commons/codec/binary/Base16Test.java b/src/test/java/org/apache/commons/codec/binary/Base16Test.java index c70a0ca87b..bda5e2a24c 100644 --- a/src/test/java/org/apache/commons/codec/binary/Base16Test.java +++ b/src/test/java/org/apache/commons/codec/binary/Base16Test.java @@ -155,7 +155,6 @@ void testCustomEncodeTableAffectsDecodeTable() { final byte tmp = encodeTable[0]; encodeTable[0] = encodeTable[1]; encodeTable[1] = tmp; - final Base16 base16 = Base16.builder().setEncodeTable(encodeTable).get(); final byte[] encoded = base16.encode(new byte[] { 1 }); assertEquals("10", new String(encoded, StandardCharsets.US_ASCII), "Custom Base16 alphabet encoding test"); From 2c6580a39d436c6e2a43e2d892658418040126f8 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Wed, 17 Jun 2026 21:53:24 -0400 Subject: [PATCH 3/3] Remove test clutter. --- .../java/org/apache/commons/codec/binary/Base16Test.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/java/org/apache/commons/codec/binary/Base16Test.java b/src/test/java/org/apache/commons/codec/binary/Base16Test.java index bda5e2a24c..46a356bdeb 100644 --- a/src/test/java/org/apache/commons/codec/binary/Base16Test.java +++ b/src/test/java/org/apache/commons/codec/binary/Base16Test.java @@ -157,8 +157,8 @@ void testCustomEncodeTableAffectsDecodeTable() { encodeTable[1] = tmp; final Base16 base16 = Base16.builder().setEncodeTable(encodeTable).get(); final byte[] encoded = base16.encode(new byte[] { 1 }); - assertEquals("10", new String(encoded, StandardCharsets.US_ASCII), "Custom Base16 alphabet encoding test"); - assertArrayEquals(new byte[] { 1 }, base16.decode(encoded), "Custom Base16 alphabet decoding test"); + assertEquals("10", new String(encoded, StandardCharsets.US_ASCII)); + assertArrayEquals(new byte[] { 1 }, base16.decode(encoded)); } @Test @@ -178,8 +178,8 @@ void testBuilderSetLowerCaseDecodesOwnOutput() { final Base16 base16 = Base16.builder().setLowerCase(true).get(); final byte[] data = { (byte) 0xab }; final byte[] encoded = base16.encode(data); - assertEquals("ab", new String(encoded, StandardCharsets.US_ASCII), "Base16 builder lower-case encoding test"); - assertArrayEquals(data, base16.decode(encoded), "Base16 builder lower-case decoding test"); + assertEquals("ab", new String(encoded, StandardCharsets.US_ASCII)); + assertArrayEquals(data, base16.decode(encoded)); } @Test