From 7dcaa2a994c374d71303bd32ae2b7693aad48008 Mon Sep 17 00:00:00 2001 From: OldTruckDriver Date: Wed, 17 Jun 2026 22:28:52 +1000 Subject: [PATCH 1/2] [CODEC-343] Fix Base32 hex decode table builder Configure setHexDecodeTable(boolean) with the matching encode table instead of passing a decode lookup table to setEncodeTable(byte...). Add a regression test showing the configured codec encodes with the Base32-Hex alphabet and decodes its own output. Reviewed-by: OpenAI Codex Reviewed-by: Anthropic Claude Code --- src/changes/changes.xml | 1 + .../java/org/apache/commons/codec/binary/Base32.java | 4 ++-- .../java/org/apache/commons/codec/binary/Base32Test.java | 9 +++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 3dc2f6c409..a74637c174 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -45,6 +45,7 @@ The type attribute can be add,update,fix,remove. + Base32.Builder.setHexDecodeTable(boolean) sets the encode table to a decode lookup table. Digest ALL reuses System.in, so only the first algorithm sees the real input (#431). diff --git a/src/main/java/org/apache/commons/codec/binary/Base32.java b/src/main/java/org/apache/commons/codec/binary/Base32.java index 1de642387e..a1206c6732 100644 --- a/src/main/java/org/apache/commons/codec/binary/Base32.java +++ b/src/main/java/org/apache/commons/codec/binary/Base32.java @@ -103,7 +103,7 @@ public Builder setEncodeTable(final byte... encodeTable) { } /** - * Sets the decode table to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet. + * Sets the encode and decode tables to use Base32 hexadecimal if {@code true}, otherwise use the Base32 alphabet. *

* This overrides a value previously set with {@link #setEncodeTable(byte...)}. *

@@ -113,7 +113,7 @@ public Builder setEncodeTable(final byte... encodeTable) { * @since 1.18.0 */ public Builder setHexDecodeTable(final boolean useHex) { - return setEncodeTable(decodeTable(useHex)); + return setEncodeTable(encodeTable(useHex)); } /** diff --git a/src/test/java/org/apache/commons/codec/binary/Base32Test.java b/src/test/java/org/apache/commons/codec/binary/Base32Test.java index c332c448d0..ed3e0b9c46 100644 --- a/src/test/java/org/apache/commons/codec/binary/Base32Test.java +++ b/src/test/java/org/apache/commons/codec/binary/Base32Test.java @@ -319,6 +319,15 @@ void testBase32HexImpossibleSamples() { // @formatter:on } + @Test + void testBuilderSetHexDecodeTableDecodesOwnOutput() { + final Base32 base32 = Base32.builder().setHexDecodeTable(true).setLineLength(0).get(); + final byte[] data = { 0 }; + final byte[] encoded = base32.encode(data); + assertEquals("00======", new String(encoded, StandardCharsets.US_ASCII)); + assertArrayEquals(data, base32.decode(encoded)); + } + @Test void testBase32HexSamples() throws Exception { final Base32 codec = new Base32(true); From faa5b84e23bd3230a0da9be71c5b6953a5743f49 Mon Sep 17 00:00:00 2001 From: OldTruckDriver Date: Wed, 17 Jun 2026 21:29:32 +1000 Subject: [PATCH 2/2] [CODEC-342] Fix Base32 custom alphabet decode table Derive Base32 decode tables from custom encode tables so a configured codec can decode its own output. Reject encode tables that do not contain exactly 32 unique byte values. Reviewed-by: OpenAI Codex Reviewed-by: Anthropic Claude Code --- src/changes/changes.xml | 1 + .../apache/commons/codec/binary/Base32.java | 63 +++++++++++++++++-- .../commons/codec/binary/Base32Test.java | 39 ++++++++++++ 3 files changed, 98 insertions(+), 5 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index a74637c174..c00108cf86 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -45,6 +45,7 @@ The type attribute can be add,update,fix,remove. + Base32.Builder.setEncodeTable(byte...) can create a codec that cannot decode its own output. Base32.Builder.setHexDecodeTable(boolean) sets the encode table to a decode lookup table. Digest ALL reuses System.in, so only the first algorithm sees the real input (#431). diff --git a/src/main/java/org/apache/commons/codec/binary/Base32.java b/src/main/java/org/apache/commons/codec/binary/Base32.java index a1206c6732..19e15fc62d 100644 --- a/src/main/java/org/apache/commons/codec/binary/Base32.java +++ b/src/main/java/org/apache/commons/codec/binary/Base32.java @@ -96,9 +96,19 @@ public Base32 get() { return new Base32(this); } + /** + * Sets the encode table and derives the matching decode table. + *

+ * The RFC 4648 Base32 and Base32 Hex tables keep their case-insensitive decoders. + *

+ * + * @param encodeTable the encode table with exactly 32 unique entries, null resets to the default. + * @return {@code this} instance. + * @throws IllegalArgumentException if the encode table does not contain exactly 32 unique entries. + */ @Override public Builder setEncodeTable(final byte... encodeTable) { - super.setDecodeTableRaw(Arrays.equals(encodeTable, HEX_ENCODE_TABLE) ? HEX_DECODE_TABLE : DECODE_TABLE); + super.setDecodeTableRaw(toDecodeTable(encodeTable)); return super.setEncodeTable(encodeTable); } @@ -145,6 +155,8 @@ public Builder setHexEncodeTable(final boolean useHex) { private static final int BYTES_PER_ENCODED_BLOCK = 8; private static final int BYTES_PER_UNENCODED_BLOCK = 5; + private static final int DECODING_TABLE_LENGTH = 256; + private static final int ENCODING_TABLE_LENGTH = 1 << BITS_PER_ENCODED_BYTE; /** * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified in Table 3 of RFC 4648) into their 5-bit @@ -256,6 +268,29 @@ public static Builder builder() { return new Builder(); } + /** + * Calculates a decode table for a given encode table. + * + * @param encodeTable that is used to determine decode lookup table. + * @return A new decode table. + * @throws IllegalArgumentException if the encode table does not contain exactly 32 unique entries. + */ + private static byte[] calculateDecodeTable(final byte[] encodeTable) { + if (encodeTable.length != ENCODING_TABLE_LENGTH) { + throw new IllegalArgumentException("encodeTable must have exactly 32 entries."); + } + final byte[] decodeTable = new byte[DECODING_TABLE_LENGTH]; + Arrays.fill(decodeTable, (byte) -1); + for (int i = 0; i < encodeTable.length; i++) { + final int encodedByte = encodeTable[i] & 0xff; + if (decodeTable[encodedByte] != -1) { + throw new IllegalArgumentException("encodeTable must not contain duplicate entries."); + } + decodeTable[encodedByte] = (byte) i; + } + return decodeTable; + } + private static byte[] decodeTable(final boolean useHex) { return useHex ? HEX_DECODE_TABLE : DECODE_TABLE; } @@ -276,6 +311,23 @@ private static byte[] encodeTable(final boolean useHex) { return useHex ? HEX_ENCODE_TABLE : ENCODE_TABLE; } + /** + * Gets the decode table that matches the given encode table. + * + * @param encodeTable that is used to determine decode lookup table. + * @return the matching decode table. + */ + private static byte[] toDecodeTable(final byte[] encodeTable) { + final byte[] table = encodeTable != null ? encodeTable : ENCODE_TABLE; + if (Arrays.equals(table, ENCODE_TABLE)) { + return DECODE_TABLE; + } + if (Arrays.equals(table, HEX_ENCODE_TABLE)) { + return HEX_DECODE_TABLE; + } + return calculateDecodeTable(table); + } + /** * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = {@link * #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;} @@ -530,14 +582,14 @@ void decode(final byte[] input, int inPos, final int inAvail, final Context cont } final int decodeSize = this.encodeSize - 1; for (int i = 0; i < inAvail; i++) { - final byte b = input[inPos++]; - if (b == pad) { + final int b = input[inPos++] & 0xff; + if (b == (pad & 0xff)) { // We're done. context.eof = true; break; } final byte[] buffer = ensureBufferSize(decodeSize, context); - if (b >= 0 && b < this.decodeTable.length) { + if (b < this.decodeTable.length) { final int result = this.decodeTable[b]; if (result >= 0) { context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK; @@ -738,7 +790,8 @@ byte[] getLineSeparator() { */ @Override public boolean isInAlphabet(final byte octet) { - return isInAlphabet(octet, decodeTable); + final int value = octet & 0xff; + return value < decodeTable.length && decodeTable[value] != -1; } /** diff --git a/src/test/java/org/apache/commons/codec/binary/Base32Test.java b/src/test/java/org/apache/commons/codec/binary/Base32Test.java index ed3e0b9c46..dbc0ff3ff4 100644 --- a/src/test/java/org/apache/commons/codec/binary/Base32Test.java +++ b/src/test/java/org/apache/commons/codec/binary/Base32Test.java @@ -398,6 +398,45 @@ void testBuilderCodecPolicy() { assertEquals(CodecPolicy.LENIENT, Base32.builder().setDecodingPolicy(null).get().getCodecPolicy()); } + @Test + void testBuilderCustomEncodeTableAffectsDecodeTable() { + final byte[] encodeTable = ENCODE_TABLE.clone(); + final byte temp = encodeTable[0]; + encodeTable[0] = encodeTable[1]; + encodeTable[1] = temp; + final Base32 base32 = Base32.builder().setEncodeTable(encodeTable).setLineLength(0).get(); + final byte[] data = { 0 }; + final byte[] encoded = base32.encode(data); + assertEquals("BB======", new String(encoded, StandardCharsets.US_ASCII)); + assertArrayEquals(data, base32.decode(encoded)); + } + + @Test + void testBuilderCustomEncodeTableRejectsDuplicateEntries() { + final byte[] encodeTable = ENCODE_TABLE.clone(); + encodeTable[1] = encodeTable[0]; + assertThrows(IllegalArgumentException.class, () -> Base32.builder().setEncodeTable(encodeTable)); + } + + @Test + void testBuilderCustomEncodeTableRejectsInvalidLength() { + assertThrows(IllegalArgumentException.class, () -> Base32.builder().setEncodeTable(Arrays.copyOf(ENCODE_TABLE, ENCODE_TABLE.length - 1))); + } + + @Test + void testBuilderCustomEncodeTableWithNonAsciiBytes() { + final byte[] encodeTable = new byte[32]; + for (int i = 0; i < encodeTable.length; i++) { + encodeTable[i] = (byte) (0x80 + i); + } + final Base32 base32 = Base32.builder().setEncodeTable(encodeTable).setLineLength(0).get(); + final byte[] data = { 0 }; + final byte[] encoded = base32.encode(data); + assertArrayEquals(new byte[] { (byte) 0x80, (byte) 0x80, '=', '=', '=', '=', '=', '=' }, encoded); + assertTrue(base32.isInAlphabet((byte) 0x80)); + assertArrayEquals(data, base32.decode(encoded)); + } + @Test void testBuilderLineAttributes() { assertNull(Base32.builder().get().getLineSeparator());