Search in sources :

Example 6 with BinaryPlainValuesReader

use of org.apache.parquet.column.values.plain.BinaryPlainValuesReader in project parquet-mr by apache.

the class TestDictionary method testBinaryDictionaryFallBack.

@Test
public void testBinaryDictionaryFallBack() throws IOException {
    int slabSize = 100;
    int maxDictionaryByteSize = 50;
    final ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(maxDictionaryByteSize, slabSize);
    int fallBackThreshold = maxDictionaryByteSize;
    int dataSize = 0;
    for (long i = 0; i < 100; i++) {
        Binary binary = Binary.fromString("str" + i);
        cw.writeBytes(binary);
        dataSize += (binary.length() + 4);
        if (dataSize < fallBackThreshold) {
            assertEquals(PLAIN_DICTIONARY, cw.getEncoding());
        } else {
            assertEquals(PLAIN, cw.getEncoding());
        }
    }
    // Fallbacked to Plain encoding, therefore use PlainValuesReader to read it back
    ValuesReader reader = new BinaryPlainValuesReader();
    reader.initFromPage(100, cw.getBytes().toInputStream());
    for (long i = 0; i < 100; i++) {
        assertEquals(Binary.fromString("str" + i), reader.readBytes());
    }
    // simulate cutting the page
    cw.reset();
    assertEquals(0, cw.getBufferedSize());
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) PlainValuesReader(org.apache.parquet.column.values.plain.PlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) Binary(org.apache.parquet.io.api.Binary) PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) PlainIntegerDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter) PlainFloatDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter) ValuesWriter(org.apache.parquet.column.values.ValuesWriter) PlainBinaryDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter) PlainLongDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter) FallbackValuesWriter(org.apache.parquet.column.values.fallback.FallbackValuesWriter) PlainDoubleDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter) Test(org.junit.Test)

Example 7 with BinaryPlainValuesReader

use of org.apache.parquet.column.values.plain.BinaryPlainValuesReader in project parquet-mr by apache.

the class TestDictionary method testSecondPageFallBack.

@Test
public void testSecondPageFallBack() throws IOException {
    int COUNT = 1000;
    ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(1000, 10000);
    writeRepeated(COUNT, cw, "a");
    BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
    writeDistinct(COUNT, cw, "b");
    // not efficient so falls back
    BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN);
    writeRepeated(COUNT, cw, "a");
    // still plain because we fell back on previous page
    BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN);
    ValuesReader cr = initDicReader(cw, BINARY);
    checkRepeated(COUNT, bytes1, cr, "a");
    cr = new BinaryPlainValuesReader();
    checkDistinct(COUNT, bytes2, cr, "b");
    checkRepeated(COUNT, bytes3, cr, "a");
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) PlainValuesReader(org.apache.parquet.column.values.plain.PlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BytesInput(org.apache.parquet.bytes.BytesInput) PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) PlainIntegerDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter) PlainFloatDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter) ValuesWriter(org.apache.parquet.column.values.ValuesWriter) PlainBinaryDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter) PlainLongDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter) FallbackValuesWriter(org.apache.parquet.column.values.fallback.FallbackValuesWriter) PlainDoubleDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter) Test(org.junit.Test)

Example 8 with BinaryPlainValuesReader

use of org.apache.parquet.column.values.plain.BinaryPlainValuesReader in project parquet-mr by apache.

the class BenchmarkDeltaByteArray method benchmarkSortedStringsWithPlainValuesWriter.

@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkSortedStringsWithPlainValuesWriter() throws IOException {
    PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
    BinaryPlainValuesReader reader = new BinaryPlainValuesReader();
    Utils.writeData(writer, sortedVals);
    ByteBufferInputStream data = writer.getBytes().toInputStream();
    Binary[] bin = Utils.readData(reader, data, values.length);
    System.out.println("size " + data.position());
}
Also used : PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) DirectByteBufferAllocator(org.apache.parquet.bytes.DirectByteBufferAllocator) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) Binary(org.apache.parquet.io.api.Binary) Test(org.junit.Test) BenchmarkOptions(com.carrotsearch.junitbenchmarks.BenchmarkOptions)

Aggregations

BinaryPlainValuesReader (org.apache.parquet.column.values.plain.BinaryPlainValuesReader)8 PlainValuesWriter (org.apache.parquet.column.values.plain.PlainValuesWriter)8 Test (org.junit.Test)8 ValuesWriter (org.apache.parquet.column.values.ValuesWriter)5 PlainBinaryDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter)5 PlainDoubleDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter)5 PlainFloatDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter)5 PlainIntegerDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter)5 PlainLongDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter)5 FallbackValuesWriter (org.apache.parquet.column.values.fallback.FallbackValuesWriter)5 BytesInput (org.apache.parquet.bytes.BytesInput)4 Binary (org.apache.parquet.io.api.Binary)4 BenchmarkOptions (com.carrotsearch.junitbenchmarks.BenchmarkOptions)3 ByteBufferInputStream (org.apache.parquet.bytes.ByteBufferInputStream)3 DirectByteBufferAllocator (org.apache.parquet.bytes.DirectByteBufferAllocator)3 ValuesReader (org.apache.parquet.column.values.ValuesReader)3 PlainValuesReader (org.apache.parquet.column.values.plain.PlainValuesReader)3