Search in sources :

Example 6 with PlainValuesWriter

use of org.apache.parquet.column.values.plain.PlainValuesWriter in project parquet-mr by apache.

the class TestDictionary method testFloatDictionaryFallBack.

@Test
public void testFloatDictionaryFallBack() throws IOException {
    int slabSize = 100;
    int maxDictionaryByteSize = 50;
    final FallbackValuesWriter<PlainFloatDictionaryValuesWriter, PlainValuesWriter> cw = newPlainFloatDictionaryValuesWriter(maxDictionaryByteSize, slabSize);
    // Fallbacked to Plain encoding, therefore use PlainValuesReader to read it back
    ValuesReader reader = new PlainValuesReader.FloatPlainValuesReader();
    roundTripFloat(cw, reader, maxDictionaryByteSize);
    // simulate cutting the page
    cw.reset();
    assertEquals(0, cw.getBufferedSize());
    cw.resetDictionary();
    roundTripFloat(cw, reader, maxDictionaryByteSize);
}
Also used : PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) ValuesReader(org.apache.parquet.column.values.ValuesReader) PlainValuesReader(org.apache.parquet.column.values.plain.PlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) PlainFloatDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter) Test(org.junit.Test)

Example 7 with PlainValuesWriter

use of org.apache.parquet.column.values.plain.PlainValuesWriter in project parquet-mr by apache.

the class BenchmarkDeltaLengthByteArray method benchmarkRandomStringsWithPlainValuesWriter.

@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithPlainValuesWriter() throws IOException {
    PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
    BinaryPlainValuesReader reader = new BinaryPlainValuesReader();
    Utils.writeData(writer, values);
    ByteBufferInputStream data = writer.getBytes().toInputStream();
    Binary[] bin = Utils.readData(reader, data, values.length);
    System.out.println("size " + data.position());
}
Also used : PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) DirectByteBufferAllocator(org.apache.parquet.bytes.DirectByteBufferAllocator) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) Binary(org.apache.parquet.io.api.Binary) Test(org.junit.Test) BenchmarkOptions(com.carrotsearch.junitbenchmarks.BenchmarkOptions)

Example 8 with PlainValuesWriter

use of org.apache.parquet.column.values.plain.PlainValuesWriter in project parquet-mr by apache.

the class TestDictionary method testDoubleDictionary.

@Test
public void testDoubleDictionary() throws IOException {
    int COUNT = 1000;
    int COUNT2 = 2000;
    final FallbackValuesWriter<PlainDoubleDictionaryValuesWriter, PlainValuesWriter> cw = newPlainDoubleDictionaryValuesWriter(10000, 10000);
    for (double i = 0; i < COUNT; i++) {
        cw.writeDouble(i % 50);
    }
    BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
    assertEquals(50, cw.initialWriter.getDictionarySize());
    for (double i = COUNT2; i > 0; i--) {
        cw.writeDouble(i % 50);
    }
    BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
    assertEquals(50, cw.initialWriter.getDictionarySize());
    final DictionaryValuesReader cr = initDicReader(cw, DOUBLE);
    cr.initFromPage(COUNT, bytes1.toInputStream());
    for (double i = 0; i < COUNT; i++) {
        double back = cr.readDouble();
        assertEquals(i % 50, back, 0.0);
    }
    cr.initFromPage(COUNT2, bytes2.toInputStream());
    for (double i = COUNT2; i > 0; i--) {
        double back = cr.readDouble();
        assertEquals(i % 50, back, 0.0);
    }
}
Also used : PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) BytesInput(org.apache.parquet.bytes.BytesInput) PlainDoubleDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter) Test(org.junit.Test)

Example 9 with PlainValuesWriter

use of org.apache.parquet.column.values.plain.PlainValuesWriter in project parquet-mr by apache.

the class BenchmarkDeltaByteArray method benchmarkRandomStringsWithPlainValuesWriter.

@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithPlainValuesWriter() throws IOException {
    PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
    BinaryPlainValuesReader reader = new BinaryPlainValuesReader();
    Utils.writeData(writer, values);
    ByteBufferInputStream data = writer.getBytes().toInputStream();
    Binary[] bin = Utils.readData(reader, data, values.length);
    System.out.println("size " + data.position());
}
Also used : PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) DirectByteBufferAllocator(org.apache.parquet.bytes.DirectByteBufferAllocator) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) Binary(org.apache.parquet.io.api.Binary) Test(org.junit.Test) BenchmarkOptions(com.carrotsearch.junitbenchmarks.BenchmarkOptions)

Example 10 with PlainValuesWriter

use of org.apache.parquet.column.values.plain.PlainValuesWriter in project parquet-mr by apache.

the class TestDictionary method testLongDictionary.

@Test
public void testLongDictionary() throws IOException {
    int COUNT = 1000;
    int COUNT2 = 2000;
    final FallbackValuesWriter<PlainLongDictionaryValuesWriter, PlainValuesWriter> cw = newPlainLongDictionaryValuesWriter(10000, 10000);
    for (long i = 0; i < COUNT; i++) {
        cw.writeLong(i % 50);
    }
    BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
    assertEquals(50, cw.initialWriter.getDictionarySize());
    for (long i = COUNT2; i > 0; i--) {
        cw.writeLong(i % 50);
    }
    BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
    assertEquals(50, cw.initialWriter.getDictionarySize());
    DictionaryValuesReader cr = initDicReader(cw, PrimitiveTypeName.INT64);
    cr.initFromPage(COUNT, bytes1.toInputStream());
    for (long i = 0; i < COUNT; i++) {
        long back = cr.readLong();
        assertEquals(i % 50, back);
    }
    cr.initFromPage(COUNT2, bytes2.toInputStream());
    for (long i = COUNT2; i > 0; i--) {
        long back = cr.readLong();
        assertEquals(i % 50, back);
    }
}
Also used : PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) BytesInput(org.apache.parquet.bytes.BytesInput) PlainLongDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter) Test(org.junit.Test)

Aggregations

PlainValuesWriter (org.apache.parquet.column.values.plain.PlainValuesWriter)12 Test (org.junit.Test)12 BinaryPlainValuesReader (org.apache.parquet.column.values.plain.BinaryPlainValuesReader)7 ByteBufferInputStream (org.apache.parquet.bytes.ByteBufferInputStream)4 BytesInput (org.apache.parquet.bytes.BytesInput)4 ValuesReader (org.apache.parquet.column.values.ValuesReader)4 PlainValuesReader (org.apache.parquet.column.values.plain.PlainValuesReader)4 BenchmarkOptions (com.carrotsearch.junitbenchmarks.BenchmarkOptions)3 DirectByteBufferAllocator (org.apache.parquet.bytes.DirectByteBufferAllocator)3 PlainIntegerDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter)3 Binary (org.apache.parquet.io.api.Binary)3 PlainDoubleDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter)2 PlainFloatDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter)2 PlainLongDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter)2 ByteBuffer (java.nio.ByteBuffer)1