Search in sources :

Example 11 with ValuesReader

use of org.apache.parquet.column.values.ValuesReader in project parquet-mr by apache.

the class TestBitPackingColumn method validateEncodeDecode.

private void validateEncodeDecode(int bitLength, int[] vals, String expected) throws IOException {
    for (PACKING_TYPE type : PACKING_TYPE.values()) {
        LOG.debug("{}", type);
        final int bound = (int) Math.pow(2, bitLength) - 1;
        ValuesWriter w = type.getWriter(bound);
        for (int i : vals) {
            w.writeInteger(i);
        }
        byte[] bytes = w.getBytes().toByteArray();
        LOG.debug("vals (" + bitLength + "): " + TestBitPacking.toString(vals));
        LOG.debug("bytes: {}", TestBitPacking.toString(bytes));
        assertEquals(type.toString(), expected, TestBitPacking.toString(bytes));
        ValuesReader r = type.getReader(bound);
        r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
        int[] result = new int[vals.length];
        for (int i = 0; i < result.length; i++) {
            result[i] = r.readInteger();
        }
        LOG.debug("result: {}", TestBitPacking.toString(result));
        assertArrayEquals(type + " result: " + TestBitPacking.toString(result), vals, result);
    }
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) ValuesWriter(org.apache.parquet.column.values.ValuesWriter)

Example 12 with ValuesReader

use of org.apache.parquet.column.values.ValuesReader in project parquet-mr by apache.

the class TestDictionary method testFirstPageFallBack.

@Test
public void testFirstPageFallBack() throws IOException {
    int COUNT = 1000;
    ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(10000, 10000);
    writeDistinct(COUNT, cw, "a");
    // not efficient so falls back
    BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN);
    writeRepeated(COUNT, cw, "b");
    // still plain because we fell back on first page
    BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN);
    ValuesReader cr = new BinaryPlainValuesReader();
    checkDistinct(COUNT, bytes1, cr, "a");
    checkRepeated(COUNT, bytes2, cr, "b");
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) PlainValuesReader(org.apache.parquet.column.values.plain.PlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BytesInput(org.apache.parquet.bytes.BytesInput) PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) PlainIntegerDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter) PlainFloatDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter) ValuesWriter(org.apache.parquet.column.values.ValuesWriter) PlainBinaryDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter) PlainLongDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter) FallbackValuesWriter(org.apache.parquet.column.values.fallback.FallbackValuesWriter) PlainDoubleDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter) Test(org.junit.Test)

Example 13 with ValuesReader

use of org.apache.parquet.column.values.ValuesReader in project parquet-mr by apache.

the class TestDictionary method testBinaryDictionaryFallBack.

@Test
public void testBinaryDictionaryFallBack() throws IOException {
    int slabSize = 100;
    int maxDictionaryByteSize = 50;
    final ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(maxDictionaryByteSize, slabSize);
    int fallBackThreshold = maxDictionaryByteSize;
    int dataSize = 0;
    for (long i = 0; i < 100; i++) {
        Binary binary = Binary.fromString("str" + i);
        cw.writeBytes(binary);
        dataSize += (binary.length() + 4);
        if (dataSize < fallBackThreshold) {
            assertEquals(PLAIN_DICTIONARY, cw.getEncoding());
        } else {
            assertEquals(PLAIN, cw.getEncoding());
        }
    }
    // Fallbacked to Plain encoding, therefore use PlainValuesReader to read it back
    ValuesReader reader = new BinaryPlainValuesReader();
    reader.initFromPage(100, cw.getBytes().toInputStream());
    for (long i = 0; i < 100; i++) {
        assertEquals(Binary.fromString("str" + i), reader.readBytes());
    }
    // simulate cutting the page
    cw.reset();
    assertEquals(0, cw.getBufferedSize());
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) PlainValuesReader(org.apache.parquet.column.values.plain.PlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) Binary(org.apache.parquet.io.api.Binary) PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) PlainIntegerDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter) PlainFloatDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter) ValuesWriter(org.apache.parquet.column.values.ValuesWriter) PlainBinaryDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter) PlainLongDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter) FallbackValuesWriter(org.apache.parquet.column.values.fallback.FallbackValuesWriter) PlainDoubleDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter) Test(org.junit.Test)

Example 14 with ValuesReader

use of org.apache.parquet.column.values.ValuesReader in project parquet-mr by apache.

the class TestDictionary method testSecondPageFallBack.

@Test
public void testSecondPageFallBack() throws IOException {
    int COUNT = 1000;
    ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(1000, 10000);
    writeRepeated(COUNT, cw, "a");
    BytesInput bytes1 = getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY);
    writeDistinct(COUNT, cw, "b");
    // not efficient so falls back
    BytesInput bytes2 = getBytesAndCheckEncoding(cw, PLAIN);
    writeRepeated(COUNT, cw, "a");
    // still plain because we fell back on previous page
    BytesInput bytes3 = getBytesAndCheckEncoding(cw, PLAIN);
    ValuesReader cr = initDicReader(cw, BINARY);
    checkRepeated(COUNT, bytes1, cr, "a");
    cr = new BinaryPlainValuesReader();
    checkDistinct(COUNT, bytes2, cr, "b");
    checkRepeated(COUNT, bytes3, cr, "a");
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) PlainValuesReader(org.apache.parquet.column.values.plain.PlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) BytesInput(org.apache.parquet.bytes.BytesInput) PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) PlainIntegerDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter) PlainFloatDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter) ValuesWriter(org.apache.parquet.column.values.ValuesWriter) PlainBinaryDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter) PlainLongDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter) FallbackValuesWriter(org.apache.parquet.column.values.fallback.FallbackValuesWriter) PlainDoubleDictionaryValuesWriter(org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter) Test(org.junit.Test)

Example 15 with ValuesReader

use of org.apache.parquet.column.values.ValuesReader in project parquet-mr by apache.

the class ColumnReaderImpl method initDataReader.

private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
    ValuesReader previousReader = this.dataColumn;
    this.currentEncoding = dataEncoding;
    this.pageValueCount = valueCount;
    this.endOfPageValueCount = readValues + pageValueCount;
    if (dataEncoding.usesDictionary()) {
        if (dictionary == null) {
            throw new ParquetDecodingException("could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding);
        }
        this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary);
    } else {
        this.dataColumn = dataEncoding.getValuesReader(path, VALUES);
    }
    if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
        bindToDictionary(dictionary);
    } else {
        bind(path.getType());
    }
    try {
        dataColumn.initFromPage(pageValueCount, in);
    } catch (IOException e) {
        throw new ParquetDecodingException("could not read page in col " + path, e);
    }
    if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) && previousReader != null && previousReader instanceof RequiresPreviousReader) {
        // previous reader can only be set if reading sequentially
        ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader);
    }
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) RequiresPreviousReader(org.apache.parquet.column.values.RequiresPreviousReader) IOException(java.io.IOException)

Aggregations

ValuesReader (org.apache.parquet.column.values.ValuesReader)15 Test (org.junit.Test)10 BinaryPlainValuesReader (org.apache.parquet.column.values.plain.BinaryPlainValuesReader)7 PlainValuesReader (org.apache.parquet.column.values.plain.PlainValuesReader)7 PlainValuesWriter (org.apache.parquet.column.values.plain.PlainValuesWriter)7 IOException (java.io.IOException)4 ValuesWriter (org.apache.parquet.column.values.ValuesWriter)4 PlainDoubleDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter)4 PlainFloatDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter)4 PlainIntegerDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter)4 PlainLongDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter)4 ParquetDecodingException (org.apache.parquet.io.ParquetDecodingException)4 BytesInput (org.apache.parquet.bytes.BytesInput)3 DeltaBinaryPackingValuesReader (org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader)3 PlainBinaryDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter)3 FallbackValuesWriter (org.apache.parquet.column.values.fallback.FallbackValuesWriter)3 ByteBufferInputStream (org.apache.parquet.bytes.ByteBufferInputStream)2 BenchmarkOptions (com.carrotsearch.junitbenchmarks.BenchmarkOptions)1 DirectByteBufferAllocator (org.apache.parquet.bytes.DirectByteBufferAllocator)1 RequiresPreviousReader (org.apache.parquet.column.values.RequiresPreviousReader)1