Search in sources :

Example 6 with DictionaryPage

use of com.facebook.presto.parquet.DictionaryPage in project presto by prestodb.

the class TestValuesDecoders method testInt32RLEDictionary.

@Test
public void testInt32RLEDictionary() throws IOException {
    Random random = new Random(83);
    int valueCount = 2048;
    int dictionarySize = 29;
    List<Object> dictionary = new ArrayList<>();
    List<Integer> dictionaryIds = new ArrayList<>();
    byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 32, random, dictionary);
    byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, random, dictionaryIds);
    List<Object> expectedValues = new ArrayList<>();
    for (Integer dictionaryId : dictionaryIds) {
        expectedValues.add(dictionary.get(dictionaryId));
    }
    IntegerDictionary integerDictionary = new IntegerDictionary(new DictionaryPage(Slices.wrappedBuffer(dictionaryPage), dictionarySize, PLAIN_DICTIONARY));
    int32BatchReadWithSkipHelper(valueCount, 0, valueCount, int32Dictionary(dataPage, dictionarySize, integerDictionary), expectedValues);
    int32BatchReadWithSkipHelper(29, 0, valueCount, int32Dictionary(dataPage, dictionarySize, integerDictionary), expectedValues);
    int32BatchReadWithSkipHelper(89, 0, valueCount, int32Dictionary(dataPage, dictionarySize, integerDictionary), expectedValues);
    int32BatchReadWithSkipHelper(1024, 0, valueCount, int32Dictionary(dataPage, dictionarySize, integerDictionary), expectedValues);
    int32BatchReadWithSkipHelper(256, 29, valueCount, int32Dictionary(dataPage, dictionarySize, integerDictionary), expectedValues);
    int32BatchReadWithSkipHelper(89, 29, valueCount, int32Dictionary(dataPage, dictionarySize, integerDictionary), expectedValues);
    int32BatchReadWithSkipHelper(1024, 1024, valueCount, int32Dictionary(dataPage, dictionarySize, integerDictionary), expectedValues);
}
Also used : IntegerDictionary(com.facebook.presto.parquet.dictionary.IntegerDictionary) Random(java.util.Random) ArrayList(java.util.ArrayList) DictionaryPage(com.facebook.presto.parquet.DictionaryPage) Test(org.testng.annotations.Test)

Example 7 with DictionaryPage

use of com.facebook.presto.parquet.DictionaryPage in project presto by prestodb.

the class TestValuesDecoders method testTimestampRLEDictionary.

@Test
public void testTimestampRLEDictionary() throws IOException {
    Random random = new Random(83);
    int valueCount = 2048;
    int dictionarySize = 29;
    List<Object> dictionary = new ArrayList<>();
    List<Integer> dictionaryIds = new ArrayList<>();
    byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 96, random, dictionary);
    byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, random, dictionaryIds);
    List<Object> expectedValues = new ArrayList<>();
    for (Integer dictionaryId : dictionaryIds) {
        expectedValues.add(dictionary.get(dictionaryId));
    }
    TimestampDictionary tsDictionary = new TimestampDictionary(new DictionaryPage(Slices.wrappedBuffer(dictionaryPage), dictionarySize, PLAIN_DICTIONARY));
    timestampBatchReadWithSkipHelper(valueCount, 0, valueCount, timestampDictionary(dataPage, dictionarySize, tsDictionary), expectedValues);
    timestampBatchReadWithSkipHelper(29, 0, valueCount, timestampDictionary(dataPage, dictionarySize, tsDictionary), expectedValues);
    timestampBatchReadWithSkipHelper(89, 0, valueCount, timestampDictionary(dataPage, dictionarySize, tsDictionary), expectedValues);
    timestampBatchReadWithSkipHelper(1024, 0, valueCount, timestampDictionary(dataPage, dictionarySize, tsDictionary), expectedValues);
    timestampBatchReadWithSkipHelper(256, 29, valueCount, timestampDictionary(dataPage, dictionarySize, tsDictionary), expectedValues);
    timestampBatchReadWithSkipHelper(89, 29, valueCount, timestampDictionary(dataPage, dictionarySize, tsDictionary), expectedValues);
    timestampBatchReadWithSkipHelper(1024, 1024, valueCount, timestampDictionary(dataPage, dictionarySize, tsDictionary), expectedValues);
}
Also used : TimestampDictionary(com.facebook.presto.parquet.batchreader.dictionary.TimestampDictionary) Random(java.util.Random) ArrayList(java.util.ArrayList) DictionaryPage(com.facebook.presto.parquet.DictionaryPage) Test(org.testng.annotations.Test)

Example 8 with DictionaryPage

use of com.facebook.presto.parquet.DictionaryPage in project presto by prestodb.

the class Int32FlatBatchReader method init.

@Override
public void init(PageReader pageReader, Field field, RowRanges rowRanges) {
    checkArgument(!isInitialized(), "Parquet batch reader already initialized");
    this.pageReader = requireNonNull(pageReader, "pageReader is null");
    checkArgument(pageReader.getTotalValueCount() > 0, "page is empty");
    this.field = requireNonNull(field, "field is null");
    DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
    if (dictionaryPage != null) {
        dictionary = Dictionaries.createDictionary(columnDescriptor, dictionaryPage);
    }
}
Also used : DictionaryPage(com.facebook.presto.parquet.DictionaryPage)

Example 9 with DictionaryPage

use of com.facebook.presto.parquet.DictionaryPage in project presto by prestodb.

the class TupleDomainParquetPredicate method getDomain.

@VisibleForTesting
public static Domain getDomain(Type type, DictionaryDescriptor dictionaryDescriptor) {
    if (dictionaryDescriptor == null) {
        return Domain.all(type);
    }
    ColumnDescriptor columnDescriptor = dictionaryDescriptor.getColumnDescriptor();
    Optional<DictionaryPage> dictionaryPage = dictionaryDescriptor.getDictionaryPage();
    if (!dictionaryPage.isPresent()) {
        return Domain.all(type);
    }
    Dictionary dictionary;
    try {
        dictionary = dictionaryPage.get().getEncoding().initDictionary(columnDescriptor, dictionaryPage.get());
    } catch (Exception e) {
        // OK to ignore exception when reading dictionaries
        return Domain.all(type);
    }
    int dictionarySize = dictionaryPage.get().getDictionarySize();
    DictionaryValueConverter converter = new DictionaryValueConverter(dictionary);
    Function<Integer, Object> convertFunction = converter.getConverter(columnDescriptor.getPrimitiveType());
    List<Object> values = new ArrayList<>();
    for (int i = 0; i < dictionarySize; i++) {
        values.add(convertFunction.apply(i));
    }
    // TODO: when min == max (i.e., singleton ranges, the construction of Domains can be done more efficiently
    return getDomain(columnDescriptor, type, values, values, true);
}
Also used : Dictionary(com.facebook.presto.parquet.dictionary.Dictionary) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ArrayList(java.util.ArrayList) DictionaryPage(com.facebook.presto.parquet.DictionaryPage) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) VerifyException(com.google.common.base.VerifyException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 10 with DictionaryPage

use of com.facebook.presto.parquet.DictionaryPage in project presto by prestodb.

the class AbstractNestedBatchReader method init.

@Override
public void init(PageReader pageReader, Field field, RowRanges rowRanges) {
    Preconditions.checkState(!isInitialized(), "already initialized");
    this.pageReader = requireNonNull(pageReader, "pageReader is null");
    checkArgument(pageReader.getTotalValueCount() > 0, "page is empty");
    this.field = requireNonNull(field, "field is null");
    DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
    if (dictionaryPage != null) {
        dictionary = Dictionaries.createDictionary(columnDescriptor, dictionaryPage);
    }
}
Also used : DictionaryPage(com.facebook.presto.parquet.DictionaryPage)

Aggregations

DictionaryPage (com.facebook.presto.parquet.DictionaryPage)11 ArrayList (java.util.ArrayList)5 Random (java.util.Random)4 Test (org.testng.annotations.Test)4 IOException (java.io.IOException)3 ParquetCorruptionException (com.facebook.presto.parquet.ParquetCorruptionException)2 BinaryBatchDictionary (com.facebook.presto.parquet.batchreader.dictionary.BinaryBatchDictionary)2 TimestampDictionary (com.facebook.presto.parquet.batchreader.dictionary.TimestampDictionary)2 IntegerDictionary (com.facebook.presto.parquet.dictionary.IntegerDictionary)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DictionaryPageHeader (org.apache.parquet.format.DictionaryPageHeader)2 PageHeader (org.apache.parquet.format.PageHeader)2 DataPage (com.facebook.presto.parquet.DataPage)1 ParquetEncoding (com.facebook.presto.parquet.ParquetEncoding)1 PLAIN_DICTIONARY (com.facebook.presto.parquet.ParquetEncoding.PLAIN_DICTIONARY)1 ParquetTypeUtils.getParquetEncoding (com.facebook.presto.parquet.ParquetTypeUtils.getParquetEncoding)1 RichColumnDescriptor (com.facebook.presto.parquet.RichColumnDescriptor)1 TestParquetUtils.generateDictionaryIdPage2048 (com.facebook.presto.parquet.batchreader.decoders.TestParquetUtils.generateDictionaryIdPage2048)1 TestParquetUtils.generatePlainValuesPage (com.facebook.presto.parquet.batchreader.decoders.TestParquetUtils.generatePlainValuesPage)1 BinaryValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)1