Search in sources :

Example 6 with ValuesWriter

use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.

the class DefaultValuesWriterFactoryTest method doTestValueWriter.

private void doTestValueWriter(PrimitiveTypeName typeName, WriterVersion version, boolean enableDictionary, Class<? extends ValuesWriter> expectedValueWriterClass) {
    ColumnDescriptor mockPath = getMockColumn(typeName);
    ValuesWriterFactory factory = getDefaultFactory(version, enableDictionary);
    ValuesWriter writer = factory.newValuesWriter(mockPath);
    validateWriterType(writer, expectedValueWriterClass);
}
Also used : ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ValuesWriter(org.apache.parquet.column.values.ValuesWriter) RunLengthBitPackingHybridValuesWriter(org.apache.parquet.column.values.rle.RunLengthBitPackingHybridValuesWriter) DictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter) BooleanPlainValuesWriter(org.apache.parquet.column.values.plain.BooleanPlainValuesWriter) PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) DeltaBinaryPackingValuesWriter(org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesWriter) FallbackValuesWriter(org.apache.parquet.column.values.fallback.FallbackValuesWriter) FixedLenByteArrayPlainValuesWriter(org.apache.parquet.column.values.plain.FixedLenByteArrayPlainValuesWriter)

Example 7 with ValuesWriter

use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.

the class TestBitPackingColumn method validateEncodeDecode.

private void validateEncodeDecode(int bitLength, int[] vals, String expected) throws IOException {
    for (PACKING_TYPE type : PACKING_TYPE.values()) {
        LOG.debug("{}", type);
        final int bound = (int) Math.pow(2, bitLength) - 1;
        ValuesWriter w = type.getWriter(bound);
        for (int i : vals) {
            w.writeInteger(i);
        }
        byte[] bytes = w.getBytes().toByteArray();
        LOG.debug("vals (" + bitLength + "): " + TestBitPacking.toString(vals));
        LOG.debug("bytes: {}", TestBitPacking.toString(bytes));
        assertEquals(type.toString(), expected, TestBitPacking.toString(bytes));
        ValuesReader r = type.getReader(bound);
        r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
        int[] result = new int[vals.length];
        for (int i = 0; i < result.length; i++) {
            result[i] = r.readInteger();
        }
        LOG.debug("result: {}", TestBitPacking.toString(result));
        assertArrayEquals(type + " result: " + TestBitPacking.toString(result), vals, result);
    }
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) ValuesWriter(org.apache.parquet.column.values.ValuesWriter)

Example 8 with ValuesWriter

use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.

the class TestDictionary method testFirstPageFallBack.

@Test
public void testFirstPageFallBack() throws IOException {
    int COUNT = 1000;
    ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(10000, 10000);
    writeDistinct(COUNT, cw, "a");
    // not efficient so falls back
    BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN);
    writeRepeated(COUNT, cw, "b");
    // still plain because we fell back on first page
    BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN);
    ValuesReader cr = new BinaryPlainValuesReader();
    checkDistinct(COUNT, bytes1, cr, "a");
    checkRepeated(COUNT, bytes2, cr, "b");
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) PlainValuesReader(org.apache.parquet.column.values.plain.PlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BytesInput(org.apache.parquet.bytes.BytesInput) PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) PlainIntegerDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter) PlainFloatDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter) ValuesWriter(org.apache.parquet.column.values.ValuesWriter) PlainBinaryDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter) PlainLongDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter) FallbackValuesWriter(org.apache.parquet.column.values.fallback.FallbackValuesWriter) PlainDoubleDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter) Test(org.junit.Test)

Example 9 with ValuesWriter

use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.

the class TestDictionary method testBinaryDictionaryFallBack.

@Test
public void testBinaryDictionaryFallBack() throws IOException {
    int slabSize = 100;
    int maxDictionaryByteSize = 50;
    final ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(maxDictionaryByteSize, slabSize);
    int fallBackThreshold = maxDictionaryByteSize;
    int dataSize = 0;
    for (long i = 0; i < 100; i++) {
        Binary binary = Binary.fromString("str" + i);
        cw.writeBytes(binary);
        dataSize += (binary.length() + 4);
        if (dataSize < fallBackThreshold) {
            assertEquals(PLAIN_DICTIONARY, cw.getEncoding());
        } else {
            assertEquals(PLAIN, cw.getEncoding());
        }
    }
    // Fallbacked to Plain encoding, therefore use PlainValuesReader to read it back
    ValuesReader reader = new BinaryPlainValuesReader();
    reader.initFromPage(100, cw.getBytes().toInputStream());
    for (long i = 0; i < 100; i++) {
        assertEquals(Binary.fromString("str" + i), reader.readBytes());
    }
    // simulate cutting the page
    cw.reset();
    assertEquals(0, cw.getBufferedSize());
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) PlainValuesReader(org.apache.parquet.column.values.plain.PlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) Binary(org.apache.parquet.io.api.Binary) PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) PlainIntegerDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter) PlainFloatDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter) ValuesWriter(org.apache.parquet.column.values.ValuesWriter) PlainBinaryDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter) PlainLongDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter) FallbackValuesWriter(org.apache.parquet.column.values.fallback.FallbackValuesWriter) PlainDoubleDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter) Test(org.junit.Test)

Example 10 with ValuesWriter

use of org.apache.parquet.column.values.ValuesWriter in project parquet-mr by apache.

the class TestDictionary method testSecondPageFallBack.

@Test
public void testSecondPageFallBack() throws IOException {
    int COUNT = 1000;
    ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(1000, 10000);
    writeRepeated(COUNT, cw, "a");
    BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
    writeDistinct(COUNT, cw, "b");
    // not efficient so falls back
    BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN);
    writeRepeated(COUNT, cw, "a");
    // still plain because we fell back on previous page
    BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN);
    ValuesReader cr = initDicReader(cw, BINARY);
    checkRepeated(COUNT, bytes1, cr, "a");
    cr = new BinaryPlainValuesReader();
    checkDistinct(COUNT, bytes2, cr, "b");
    checkRepeated(COUNT, bytes3, cr, "a");
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) PlainValuesReader(org.apache.parquet.column.values.plain.PlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BytesInput(org.apache.parquet.bytes.BytesInput) PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) PlainIntegerDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter) PlainFloatDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter) ValuesWriter(org.apache.parquet.column.values.ValuesWriter) PlainBinaryDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter) PlainLongDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter) FallbackValuesWriter(org.apache.parquet.column.values.fallback.FallbackValuesWriter) PlainDoubleDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter) Test(org.junit.Test)

Aggregations

ValuesWriter (org.apache.parquet.column.values.ValuesWriter)11 Test (org.junit.Test)8 FallbackValuesWriter (org.apache.parquet.column.values.fallback.FallbackValuesWriter)6 PlainValuesWriter (org.apache.parquet.column.values.plain.PlainValuesWriter)6 BytesInput (org.apache.parquet.bytes.BytesInput)5 PlainBinaryDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter)5 PlainDoubleDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter)5 PlainFloatDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter)5 PlainIntegerDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter)5 PlainLongDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter)5 BinaryPlainValuesReader (org.apache.parquet.column.values.plain.BinaryPlainValuesReader)5 ValuesReader (org.apache.parquet.column.values.ValuesReader)4 RunLengthBitPackingHybridValuesWriter (org.apache.parquet.column.values.rle.RunLengthBitPackingHybridValuesWriter)4 DirectByteBufferAllocator (org.apache.parquet.bytes.DirectByteBufferAllocator)3 PlainValuesReader (org.apache.parquet.column.values.plain.PlainValuesReader)3 BenchmarkOptions (com.carrotsearch.junitbenchmarks.BenchmarkOptions)2 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)2 DeltaBinaryPackingValuesWriter (org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesWriter)2 Binary (org.apache.parquet.io.api.Binary)2 ArrayList (java.util.ArrayList)1