use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.
the class Int32FlatBatchReader method readNext.
@Override
public ColumnChunk readNext() {
ColumnChunk columnChunk = null;
try {
seek();
if (field.isRequired()) {
columnChunk = readWithoutNull();
} else {
columnChunk = readWithNull();
}
} catch (IOException exception) {
throw new PrestoException(PARQUET_IO_READ_ERROR, "Error reading Parquet column " + columnDescriptor, exception);
}
readOffset = 0;
nextBatchSize = 0;
return columnChunk;
}
use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.
the class AbstractNestedBatchReader method readNext.
@Override
public ColumnChunk readNext() {
ColumnChunk columnChunk = null;
try {
seek();
if (field.isRequired()) {
columnChunk = readNestedNoNull();
} else {
columnChunk = readNestedWithNull();
}
} catch (IOException ex) {
throw new ParquetDecodingException("Failed to decode.", ex);
}
readOffset = 0;
nextBatchSize = 0;
return columnChunk;
}
use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.
the class BinaryNestedBatchReader method readNestedNoNull.
@Override
protected ColumnChunk readNestedNoNull() throws IOException {
int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();
RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize);
DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length);
int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels();
int newBatchSize = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
int valueCount = 0;
for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) {
valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0);
}
newBatchSize += valueCount;
valuesDecoderContext.setNonNullCount(valueCount);
valuesDecoderContext.setValueCount(valueCount);
}
List<ValueBuffer> valueBuffers = new ArrayList<>();
int bufferSize = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
ValueBuffer valueBuffer = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(valuesDecoderContext.getNonNullCount());
bufferSize += valueBuffer.getBufferSize();
valueBuffers.add(valueBuffer);
}
byte[] byteBuffer = new byte[bufferSize];
int[] offsets = new int[newBatchSize + 1];
int i = 0;
int bufferIndex = 0;
int offsetIndex = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
ValueBuffer value = valueBuffers.get(i);
bufferIndex = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
offsetIndex += valuesDecoderContext.getValueCount();
i++;
}
Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
Block block = new VariableWidthBlock(newBatchSize, buffer, offsets, Optional.empty());
return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels());
}
use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.
the class Int32FlatBatchReader method readWithNull.
private ColumnChunk readWithNull() throws IOException {
int[] values = new int[nextBatchSize];
boolean[] isNull = new boolean[nextBatchSize];
int totalNonNullCount = 0;
int remainingInBatch = nextBatchSize;
int startOffset = 0;
while (remainingInBatch > 0) {
if (remainingCountInPage == 0) {
if (!readNextPage()) {
break;
}
}
int chunkSize = Math.min(remainingCountInPage, remainingInBatch);
int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize);
totalNonNullCount += nonNullCount;
if (nonNullCount > 0) {
valuesDecoder.readNext(values, startOffset, nonNullCount);
int valueDestinationIndex = startOffset + chunkSize - 1;
int valueSourceIndex = startOffset + nonNullCount - 1;
while (valueDestinationIndex >= startOffset) {
if (!isNull[valueDestinationIndex]) {
values[valueDestinationIndex] = values[valueSourceIndex];
valueSourceIndex--;
}
valueDestinationIndex--;
}
}
startOffset += chunkSize;
remainingInBatch -= chunkSize;
remainingCountInPage -= chunkSize;
}
if (remainingInBatch != 0) {
throw new ParquetDecodingException("Still remaining to be read in current batch.");
}
if (totalNonNullCount == 0) {
Block block = RunLengthEncodedBlock.create(field.getType(), null, nextBatchSize);
return new ColumnChunk(block, new int[0], new int[0]);
}
boolean hasNoNull = totalNonNullCount == nextBatchSize;
Block block = new IntArrayBlock(nextBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values);
return new ColumnChunk(block, new int[0], new int[0]);
}
use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.
the class Int32FlatBatchReader method readWithoutNull.
private ColumnChunk readWithoutNull() throws IOException {
int[] values = new int[nextBatchSize];
int remainingInBatch = nextBatchSize;
int startOffset = 0;
while (remainingInBatch > 0) {
if (remainingCountInPage == 0) {
if (!readNextPage()) {
break;
}
}
int chunkSize = Math.min(remainingCountInPage, remainingInBatch);
valuesDecoder.readNext(values, startOffset, chunkSize);
startOffset += chunkSize;
remainingInBatch -= chunkSize;
remainingCountInPage -= chunkSize;
}
if (remainingInBatch != 0) {
throw new ParquetDecodingException(format("Corrupted Parquet file: extra %d values to be consumed when scanning current batch", remainingInBatch));
}
Block block = new IntArrayBlock(nextBatchSize, Optional.empty(), values);
return new ColumnChunk(block, new int[0], new int[0]);
}
Aggregations