diff --git a/parquet-column/src/main/java/org/apache/parquet/column/Encoding.java b/parquet-column/src/main/java/org/apache/parquet/column/Encoding.java index cadf8f2e0e..874c99fded 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/Encoding.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/Encoding.java @@ -40,6 +40,7 @@ import org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader; import org.apache.parquet.column.values.dictionary.DictionaryValuesReader; import org.apache.parquet.column.values.dictionary.PlainValuesDictionary.PlainBinaryDictionary; +import org.apache.parquet.column.values.dictionary.PlainValuesDictionary.PlainBooleanDictionary; import org.apache.parquet.column.values.dictionary.PlainValuesDictionary.PlainDoubleDictionary; import org.apache.parquet.column.values.dictionary.PlainValuesDictionary.PlainFloatDictionary; import org.apache.parquet.column.values.dictionary.PlainValuesDictionary.PlainIntegerDictionary; @@ -102,6 +103,8 @@ public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dic return new PlainIntegerDictionary(dictionaryPage); case FLOAT: return new PlainFloatDictionary(dictionaryPage); + case BOOLEAN: + return new PlainBooleanDictionary(dictionaryPage); default: throw new ParquetDecodingException( "Dictionary encoding not supported for type: " + descriptor.getType()); diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/dictionary/PlainValuesDictionary.java b/parquet-column/src/main/java/org/apache/parquet/column/values/dictionary/PlainValuesDictionary.java index 436bddd3c1..468c7d110f 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/values/dictionary/PlainValuesDictionary.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/values/dictionary/PlainValuesDictionary.java @@ -28,6 +28,7 @@ import org.apache.parquet.bytes.ByteBufferInputStream; import org.apache.parquet.column.Dictionary; import org.apache.parquet.column.page.DictionaryPage; +import org.apache.parquet.column.values.plain.BooleanPlainValuesReader; import org.apache.parquet.column.values.plain.PlainValuesReader.DoublePlainValuesReader; import org.apache.parquet.column.values.plain.PlainValuesReader.FloatPlainValuesReader; import org.apache.parquet.column.values.plain.PlainValuesReader.IntegerPlainValuesReader; @@ -300,4 +301,46 @@ public int getMaxId() { return floatDictionaryContent.length - 1; } } + + /** + * a simple implementation of dictionary for plain encoded boolean values + */ + public static class PlainBooleanDictionary extends PlainValuesDictionary { + + private final boolean[] boolDictionaryContent; + + /** + * @param dictionaryPage a dictionary page of encoded boolean values + * @throws IOException if there is an exception while decoding the dictionary page + */ + public PlainBooleanDictionary(DictionaryPage dictionaryPage) throws IOException { + super(dictionaryPage); + ByteBufferInputStream in = dictionaryPage.getBytes().toInputStream(); + boolDictionaryContent = new boolean[dictionaryPage.getDictionarySize()]; + BooleanPlainValuesReader boolReader = new BooleanPlainValuesReader(); + boolReader.initFromPage(dictionaryPage.getDictionarySize(), in); + for (int i = 0; i < boolDictionaryContent.length; i++) { + boolDictionaryContent[i] = boolReader.readBoolean(); + } + } + + @Override + public boolean decodeToBoolean(int id) { + return boolDictionaryContent[id]; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("PlainBooleanDictionary {\n"); + for (int i = 0; i < boolDictionaryContent.length; i++) { + sb.append(i).append(" => ").append(boolDictionaryContent[i]).append("\n"); + } + return sb.append("}").toString(); + } + + @Override + public int getMaxId() { + return boolDictionaryContent.length - 1; + } + } } diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java b/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java index 6f7116bc36..a91f807e73 100644 --- a/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java +++ b/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java @@ -25,6 +25,8 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import java.io.IOException; import java.nio.ByteBuffer; @@ -44,6 +46,7 @@ import org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter; import org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter; import org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter; +import org.apache.parquet.column.values.dictionary.PlainValuesDictionary.PlainBooleanDictionary; import org.apache.parquet.column.values.fallback.FallbackValuesWriter; import org.apache.parquet.column.values.plain.BinaryPlainValuesReader; import org.apache.parquet.column.values.plain.PlainValuesReader; @@ -678,6 +681,72 @@ public void testZeroValues() throws IOException { } } + @Test + public void testBooleanDictionary() throws IOException { + // Create a dictionary page with boolean values (false, true) + // Bit-packed: bit 0 = false (0), bit 1 = true (1) => byte = 0b00000010 = 0x02 + BytesInput bytes = BytesInput.from(new byte[] {0x02}); + DictionaryPage dictionaryPage = new DictionaryPage(bytes, 2, PLAIN); + + PlainBooleanDictionary dictionary = new PlainBooleanDictionary(dictionaryPage); + + // Verify dictionary decoding + assertFalse(dictionary.decodeToBoolean(0)); + assertTrue(dictionary.decodeToBoolean(1)); + assertEquals(1, dictionary.getMaxId()); + } + + @Test + public void testBooleanDictionarySingleValue() throws IOException { + // Test dictionary with only true value + // Bit-packed: bit 0 = true (1) => byte = 0b00000001 = 0x01 + BytesInput bytesTrue = BytesInput.from(new byte[] {0x01}); + DictionaryPage dictionaryPageTrue = new DictionaryPage(bytesTrue, 1, PLAIN); + + PlainBooleanDictionary dictionaryTrue = new PlainBooleanDictionary(dictionaryPageTrue); + + assertTrue(dictionaryTrue.decodeToBoolean(0)); + assertEquals(0, dictionaryTrue.getMaxId()); + + // Test dictionary with only false value + // Bit-packed: bit 0 = false (0) => byte = 0b00000000 = 0x00 + BytesInput bytesFalse = BytesInput.from(new byte[] {0x00}); + DictionaryPage dictionaryPageFalse = new DictionaryPage(bytesFalse, 1, PLAIN); + + PlainBooleanDictionary dictionaryFalse = new PlainBooleanDictionary(dictionaryPageFalse); + + assertFalse(dictionaryFalse.decodeToBoolean(0)); + assertEquals(0, dictionaryFalse.getMaxId()); + } + + @Test + public void testBooleanDictionaryToString() throws IOException { + // Bit-packed: bit 0 = false (0), bit 1 = true (1) => byte = 0b00000010 = 0x02 + BytesInput bytes = BytesInput.from(new byte[] {0x02}); + DictionaryPage dictionaryPage = new DictionaryPage(bytes, 2, PLAIN); + + PlainBooleanDictionary dictionary = new PlainBooleanDictionary(dictionaryPage); + + String str = dictionary.toString(); + Assert.assertTrue(str.contains("PlainBooleanDictionary")); + Assert.assertTrue(str.contains("0 => false")); + Assert.assertTrue(str.contains("1 => true")); + } + + @Test + public void testBooleanDictionaryWithDictionaryEncoding() throws IOException { + // Test with PLAIN_DICTIONARY encoding (both PLAIN and PLAIN_DICTIONARY should work) + // Bit-packed: bit 0 = true (1), bit 1 = false (0) => byte = 0b00000001 = 0x01 + BytesInput bytes = BytesInput.from(new byte[] {0x01}); + DictionaryPage dictionaryPage = new DictionaryPage(bytes, 2, PLAIN_DICTIONARY); + + PlainBooleanDictionary dictionary = new PlainBooleanDictionary(dictionaryPage); + + assertEquals(true, dictionary.decodeToBoolean(0)); + assertEquals(false, dictionary.decodeToBoolean(1)); + assertEquals(1, dictionary.getMaxId()); + } + private DictionaryValuesReader initDicReader(ValuesWriter cw, PrimitiveTypeName type) throws IOException { final DictionaryPage dictionaryPage = cw.toDictPageAndClose().copy(); final ColumnDescriptor descriptor = new ColumnDescriptor(new String[] {"foo"}, type, 0, 0);