use of com.facebook.presto.parquet.reader.ColumnIndexFilterUtils.OffsetRange in project presto by prestodb.
the class ParquetReader method concatRanges.
private List<OffsetRange> concatRanges(List<OffsetRange> offsetRanges) {
List<OffsetRange> pageRanges = new ArrayList<>();
OffsetRange currentParts = null;
for (OffsetRange range : offsetRanges) {
long startPosition = range.getOffset();
// first part or not consecutive => new list
if (currentParts == null || currentParts.endPos() != startPosition) {
currentParts = new OffsetRange(startPosition, 0);
}
pageRanges.add(currentParts);
currentParts.extendLength(range.getLength());
}
return pageRanges;
}
use of com.facebook.presto.parquet.reader.ColumnIndexFilterUtils.OffsetRange in project presto by prestodb.
the class ParquetReader method readPrimitive.
private ColumnChunk readPrimitive(PrimitiveField field) throws IOException {
ColumnDescriptor columnDescriptor = field.getDescriptor();
int fieldId = field.getId();
ColumnReader columnReader = columnReaders[fieldId];
if (!columnReader.isInitialized()) {
validateParquet(currentBlockMetadata.getRowCount() > 0, "Row group has 0 rows");
ColumnChunkMetaData metadata = getColumnChunkMetaData(columnDescriptor);
long startingPosition = metadata.getStartingPos();
int totalSize = toIntExact(metadata.getTotalSize());
if (shouldUseColumnIndex(metadata.getPath())) {
OffsetIndex offsetIndex = blockIndexStores.get(currentBlock).getOffsetIndex(metadata.getPath());
OffsetIndex filteredOffsetIndex = ColumnIndexFilterUtils.filterOffsetIndex(offsetIndex, currentGroupRowRanges, blocks.get(currentBlock).getRowCount());
List<OffsetRange> offsetRanges = ColumnIndexFilterUtils.calculateOffsetRanges(filteredOffsetIndex, metadata, offsetIndex.getOffset(0), startingPosition);
List<OffsetRange> consecutiveRanges = concatRanges(offsetRanges);
List<ByteBuffer> buffers = allocateBlocks(consecutiveRanges);
for (int i = 0; i < consecutiveRanges.size(); i++) {
ByteBuffer buffer = buffers.get(i);
dataSource.readFully(startingPosition + consecutiveRanges.get(i).getOffset(), buffer.array());
}
PageReader pageReader = createPageReader(buffers, totalSize, metadata, columnDescriptor, filteredOffsetIndex);
columnReader.init(pageReader, field, currentGroupRowRanges);
if (enableVerification) {
ColumnReader verificationColumnReader = verificationColumnReaders[field.getId()];
PageReader pageReaderVerification = createPageReader(buffers, totalSize, metadata, columnDescriptor, filteredOffsetIndex);
verificationColumnReader.init(pageReaderVerification, field, currentGroupRowRanges);
}
} else {
byte[] buffer = allocateBlock(totalSize);
dataSource.readFully(startingPosition, buffer);
PageReader pageReader = createPageReader(buffer, totalSize, metadata, columnDescriptor);
columnReader.init(pageReader, field, null);
if (enableVerification) {
ColumnReader verificationColumnReader = verificationColumnReaders[field.getId()];
PageReader pageReaderVerification = createPageReader(buffer, totalSize, metadata, columnDescriptor);
verificationColumnReader.init(pageReaderVerification, field, null);
}
}
}
ColumnChunk columnChunk = columnReader.readNext();
columnChunk = typeCoercion(columnChunk, field.getDescriptor().getPrimitiveType().getPrimitiveTypeName(), field.getType());
if (enableVerification) {
ColumnReader verificationColumnReader = verificationColumnReaders[field.getId()];
ColumnChunk expected = verificationColumnReader.readNext();
ParquetResultVerifierUtils.verifyColumnChunks(columnChunk, expected, columnDescriptor.getPath().length > 1, field, dataSource.getId());
}
// update max size per primitive column chunk
long bytesPerCell = columnChunk.getBlock().getSizeInBytes() / batchSize;
if (maxBytesPerCell[fieldId] < bytesPerCell) {
// update batch size
maxCombinedBytesPerRow = maxCombinedBytesPerRow - maxBytesPerCell[fieldId] + bytesPerCell;
maxBatchSize = toIntExact(min(maxBatchSize, max(1, maxReadBlockBytes / maxCombinedBytesPerRow)));
maxBytesPerCell[fieldId] = bytesPerCell;
}
return columnChunk;
}
Aggregations