use of com.facebook.presto.parquet.DictionaryPage in project presto by prestodb.
the class TestValuesDecoders method testBinaryRLEDictionary.
@Test
public void testBinaryRLEDictionary() throws IOException {
Random random = new Random(83);
int valueCount = 2048;
int dictionarySize = 29;
List<Object> dictionary = new ArrayList<>();
List<Integer> dictionaryIds = new ArrayList<>();
byte[] dictionaryPage = TestParquetUtils.generatePlainValuesPage(dictionarySize, -1, random, dictionary);
byte[] dataPage = TestParquetUtils.generateDictionaryIdPage2048(dictionarySize - 1, random, dictionaryIds);
List<Object> expectedValues = new ArrayList<>();
for (Integer dictionaryId : dictionaryIds) {
expectedValues.add(dictionary.get(dictionaryId));
}
BinaryBatchDictionary binaryDictionary = new BinaryBatchDictionary(new DictionaryPage(Slices.wrappedBuffer(dictionaryPage), dictionarySize, PLAIN_DICTIONARY));
binaryBatchReadWithSkipHelper(valueCount, 0, valueCount, binaryDictionary(dataPage, dictionarySize, binaryDictionary), expectedValues);
binaryBatchReadWithSkipHelper(29, 0, valueCount, binaryDictionary(dataPage, dictionarySize, binaryDictionary), expectedValues);
binaryBatchReadWithSkipHelper(89, 0, valueCount, binaryDictionary(dataPage, dictionarySize, binaryDictionary), expectedValues);
binaryBatchReadWithSkipHelper(1024, 0, valueCount, binaryDictionary(dataPage, dictionarySize, binaryDictionary), expectedValues);
binaryBatchReadWithSkipHelper(256, 29, valueCount, binaryDictionary(dataPage, dictionarySize, binaryDictionary), expectedValues);
binaryBatchReadWithSkipHelper(89, 29, valueCount, binaryDictionary(dataPage, dictionarySize, binaryDictionary), expectedValues);
binaryBatchReadWithSkipHelper(1024, 1024, valueCount, binaryDictionary(dataPage, dictionarySize, binaryDictionary), expectedValues);
}
use of com.facebook.presto.parquet.DictionaryPage in project presto by prestodb.
the class TestValuesDecoders method testInt64RLEDictionary.
@Test
public void testInt64RLEDictionary() throws IOException {
Random random = new Random(83);
int valueCount = 2048;
int dictionarySize = 29;
List<Object> dictionary = new ArrayList<>();
List<Integer> dictionaryIds = new ArrayList<>();
byte[] dictionaryPage = generatePlainValuesPage(dictionarySize, 64, random, dictionary);
byte[] dataPage = generateDictionaryIdPage2048(dictionarySize - 1, random, dictionaryIds);
List<Object> expectedValues = new ArrayList<>();
for (Integer dictionaryId : dictionaryIds) {
expectedValues.add(dictionary.get(dictionaryId));
}
LongDictionary longDictionary = new LongDictionary(new DictionaryPage(Slices.wrappedBuffer(dictionaryPage), dictionarySize, PLAIN_DICTIONARY));
int64BatchReadWithSkipHelper(valueCount, 0, valueCount, int64Dictionary(dataPage, dictionarySize, longDictionary), expectedValues);
int64BatchReadWithSkipHelper(29, 0, valueCount, int64Dictionary(dataPage, dictionarySize, longDictionary), expectedValues);
int64BatchReadWithSkipHelper(89, 0, valueCount, int64Dictionary(dataPage, dictionarySize, longDictionary), expectedValues);
int64BatchReadWithSkipHelper(1024, 0, valueCount, int64Dictionary(dataPage, dictionarySize, longDictionary), expectedValues);
int64BatchReadWithSkipHelper(256, 29, valueCount, int64Dictionary(dataPage, dictionarySize, longDictionary), expectedValues);
int64BatchReadWithSkipHelper(89, 29, valueCount, int64Dictionary(dataPage, dictionarySize, longDictionary), expectedValues);
int64BatchReadWithSkipHelper(1024, 1024, valueCount, int64Dictionary(dataPage, dictionarySize, longDictionary), expectedValues);
List<Object> expectedTimestampValues = expectedValues.stream().map(v -> (long) v / 1000L).collect(Collectors.toList());
int64BatchReadWithSkipHelper(valueCount, 0, valueCount, int64TimestampMicrosDictionary(dataPage, dictionarySize, longDictionary), expectedTimestampValues);
int64BatchReadWithSkipHelper(29, 0, valueCount, int64TimestampMicrosDictionary(dataPage, dictionarySize, longDictionary), expectedTimestampValues);
int64BatchReadWithSkipHelper(89, 0, valueCount, int64TimestampMicrosDictionary(dataPage, dictionarySize, longDictionary), expectedTimestampValues);
int64BatchReadWithSkipHelper(1024, 0, valueCount, int64TimestampMicrosDictionary(dataPage, dictionarySize, longDictionary), expectedTimestampValues);
int64BatchReadWithSkipHelper(256, 29, valueCount, int64TimestampMicrosDictionary(dataPage, dictionarySize, longDictionary), expectedTimestampValues);
int64BatchReadWithSkipHelper(89, 29, valueCount, int64TimestampMicrosDictionary(dataPage, dictionarySize, longDictionary), expectedTimestampValues);
int64BatchReadWithSkipHelper(1024, 1024, valueCount, int64TimestampMicrosDictionary(dataPage, dictionarySize, longDictionary), expectedTimestampValues);
}
use of com.facebook.presto.parquet.DictionaryPage in project presto by prestodb.
the class BinaryFlatBatchReader method init.
@Override
public void init(PageReader pageReader, Field field, RowRanges rowRanges) {
checkArgument(!isInitialized(), "Parquet batch reader already initialized");
this.pageReader = requireNonNull(pageReader, "pageReader is null");
checkArgument(pageReader.getTotalValueCount() > 0, "page is empty");
this.field = requireNonNull(field, "field is null");
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
dictionary = Dictionaries.createDictionary(columnDescriptor, dictionaryPage);
}
}
use of com.facebook.presto.parquet.DictionaryPage in project presto by prestodb.
the class ParquetColumnChunk method readAllPages.
public PageReader readAllPages() throws IOException {
LinkedList<DataPage> pages = new LinkedList<>();
DictionaryPage dictionaryPage = null;
long valueCount = 0;
int dataPageCount = 0;
while (hasMorePages(valueCount, dataPageCount)) {
PageHeader pageHeader = readPageHeader();
int uncompressedPageSize = pageHeader.getUncompressed_page_size();
int compressedPageSize = pageHeader.getCompressed_page_size();
long firstRowIndex = -1;
switch(pageHeader.type) {
case DICTIONARY_PAGE:
if (dictionaryPage != null) {
throw new ParquetCorruptionException("%s has more than one dictionary page in column chunk", descriptor.getColumnDescriptor());
}
dictionaryPage = readDictionaryPage(pageHeader, uncompressedPageSize, compressedPageSize);
break;
case DATA_PAGE:
firstRowIndex = PageReader.getFirstRowIndex(dataPageCount, offsetIndex);
valueCount += readDataPageV1(pageHeader, uncompressedPageSize, compressedPageSize, firstRowIndex, pages);
dataPageCount = dataPageCount + 1;
break;
case DATA_PAGE_V2:
firstRowIndex = PageReader.getFirstRowIndex(dataPageCount, offsetIndex);
valueCount += readDataPageV2(pageHeader, uncompressedPageSize, compressedPageSize, firstRowIndex, pages);
dataPageCount = dataPageCount + 1;
break;
default:
stream.skipFully(compressedPageSize);
break;
}
}
return new PageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage, offsetIndex);
}
use of com.facebook.presto.parquet.DictionaryPage in project presto by prestodb.
the class PredicateUtils method readDictionaryPage.
private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) {
try {
ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
PageHeader pageHeader = Util.readPageHeader(inputStream);
if (pageHeader.type != PageType.DICTIONARY_PAGE) {
return Optional.empty();
}
Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size());
DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()));
int dictionarySize = dicHeader.getNum_values();
return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding));
} catch (IOException ignored) {
return Optional.empty();
}
}
Aggregations