use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.
the class BinaryFlatBatchReader method readNext.
@Override
public ColumnChunk readNext() {
ColumnChunk columnChunk = null;
try {
seek();
if (field.isRequired()) {
columnChunk = readWithoutNull();
} else {
columnChunk = readWithNull();
}
} catch (IOException ex) {
throw new PrestoException(PARQUET_IO_READ_ERROR, "Error reading Parquet column " + columnDescriptor, ex);
}
readOffset = 0;
nextBatchSize = 0;
return columnChunk;
}
use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.
the class BinaryFlatBatchReader method readWithoutNull.
private ColumnChunk readWithoutNull() throws IOException {
boolean[] isNull = new boolean[nextBatchSize];
List<ValueBuffer> valueBuffers = new ArrayList<>();
List<ValuesDecoderContext> valuesDecoderContexts = new ArrayList<>();
int bufferSize = 0;
int remainingInBatch = nextBatchSize;
int startOffset = 0;
while (remainingInBatch > 0) {
if (remainingCountInPage == 0) {
if (!readNextPage()) {
break;
}
}
int readChunkSize = Math.min(remainingCountInPage, remainingInBatch);
ValueBuffer valueBuffer = valuesDecoder.readNext(readChunkSize);
bufferSize += valueBuffer.getBufferSize();
valueBuffers.add(valueBuffer);
ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext = new ValuesDecoderContext(valuesDecoder, startOffset, startOffset + readChunkSize);
valuesDecoderContext.setValueCount(readChunkSize);
valuesDecoderContext.setNonNullCount(readChunkSize);
valuesDecoderContexts.add(valuesDecoderContext);
startOffset += readChunkSize;
remainingInBatch -= readChunkSize;
remainingCountInPage -= readChunkSize;
}
byte[] byteBuffer = new byte[bufferSize];
int[] offsets = new int[nextBatchSize + 1];
int i = 0;
int bufferIndex = 0;
int offsetIndex = 0;
for (ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext : valuesDecoderContexts) {
BinaryValuesDecoder binaryValuesDecoder = valuesDecoderContext.getValuesDecoder();
ValueBuffer value = valueBuffers.get(i);
bufferIndex = binaryValuesDecoder.readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
offsetIndex += valuesDecoderContext.getValueCount();
i++;
}
Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
Block block = new VariableWidthBlock(nextBatchSize, buffer, offsets, Optional.of(isNull));
return new ColumnChunk(block, new int[0], new int[0]);
}
use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.
the class BinaryNestedBatchReader method readNestedWithNull.
@Override
protected ColumnChunk readNestedWithNull() throws IOException {
int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();
RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize);
DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length);
int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels();
int newBatchSize = 0;
int batchNonNullCount = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
int nonNullCount = 0;
int valueCount = 0;
for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) {
nonNullCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0);
valueCount += (definitionLevels[i] >= maxDefinitionLevel - 1 ? 1 : 0);
}
batchNonNullCount += nonNullCount;
newBatchSize += valueCount;
valuesDecoderContext.setNonNullCount(nonNullCount);
valuesDecoderContext.setValueCount(valueCount);
}
if (batchNonNullCount == 0) {
Block block = RunLengthEncodedBlock.create(field.getType(), null, newBatchSize);
return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels());
}
List<ValueBuffer> valueBuffers = new ArrayList<>();
int bufferSize = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
ValueBuffer valueBuffer = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(valuesDecoderContext.getNonNullCount());
bufferSize += valueBuffer.getBufferSize();
valueBuffers.add(valueBuffer);
}
byte[] byteBuffer = new byte[bufferSize];
int[] offsets = new int[newBatchSize + 1];
int i = 0;
int bufferIndex = 0;
int offsetIndex = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
ValueBuffer value = valueBuffers.get(i);
bufferIndex = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
offsetIndex += valuesDecoderContext.getValueCount();
i++;
}
boolean[] isNull = new boolean[newBatchSize];
int offset = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
int destinationIndex = offset + valuesDecoderContext.getValueCount() - 1;
int sourceIndex = offset + valuesDecoderContext.getNonNullCount() - 1;
int definitionLevelIndex = valuesDecoderContext.getEnd() - 1;
offsets[destinationIndex + 1] = offsets[sourceIndex + 1];
while (destinationIndex >= offset) {
if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel) {
offsets[destinationIndex--] = offsets[sourceIndex--];
} else if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel - 1) {
offsets[destinationIndex] = offsets[sourceIndex + 1];
isNull[destinationIndex] = true;
destinationIndex--;
}
definitionLevelIndex--;
}
offset += valuesDecoderContext.getValueCount();
}
Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
boolean hasNoNull = batchNonNullCount == newBatchSize;
Block block = new VariableWidthBlock(newBatchSize, buffer, offsets, hasNoNull ? Optional.empty() : Optional.of(isNull));
return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels());
}
use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.
the class BinaryFlatBatchReader method readWithNull.
private ColumnChunk readWithNull() throws IOException {
boolean[] isNull = new boolean[nextBatchSize];
List<ValueBuffer> valueBuffers = new ArrayList<>();
List<ValuesDecoderContext> valuesDecoderContexts = new ArrayList<>();
int bufferSize = 0;
int totalNonNullCount = 0;
int remainingInBatch = nextBatchSize;
int startOffset = 0;
while (remainingInBatch > 0) {
if (remainingCountInPage == 0) {
if (!readNextPage()) {
break;
}
}
int readChunkSize = Math.min(remainingCountInPage, remainingInBatch);
int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, readChunkSize);
totalNonNullCount += nonNullCount;
ValueBuffer valueBuffer = valuesDecoder.readNext(nonNullCount);
bufferSize += valueBuffer.getBufferSize();
valueBuffers.add(valueBuffer);
ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext = new ValuesDecoderContext(valuesDecoder, startOffset, startOffset + readChunkSize);
valuesDecoderContext.setValueCount(readChunkSize);
valuesDecoderContext.setNonNullCount(nonNullCount);
valuesDecoderContexts.add(valuesDecoderContext);
startOffset += readChunkSize;
remainingInBatch -= readChunkSize;
remainingCountInPage -= readChunkSize;
}
if (totalNonNullCount == 0) {
Block block = RunLengthEncodedBlock.create(field.getType(), null, nextBatchSize);
return new ColumnChunk(block, new int[0], new int[0]);
}
byte[] byteBuffer = new byte[bufferSize];
int[] offsets = new int[nextBatchSize + 1];
int i = 0;
int bufferIndex = 0;
int offsetIndex = 0;
for (ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext : valuesDecoderContexts) {
BinaryValuesDecoder binaryValuesDecoder = valuesDecoderContext.getValuesDecoder();
ValueBuffer value = valueBuffers.get(i);
bufferIndex = binaryValuesDecoder.readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
offsetIndex += valuesDecoderContext.getValueCount();
i++;
}
Collections.reverse(valuesDecoderContexts);
for (ValuesDecoderContext valuesDecoderContext : valuesDecoderContexts) {
int destinationIndex = valuesDecoderContext.getEnd() - 1;
int sourceIndex = valuesDecoderContext.getStart() + valuesDecoderContext.getNonNullCount() - 1;
offsets[destinationIndex + 1] = offsets[sourceIndex + 1];
while (destinationIndex >= valuesDecoderContext.getStart()) {
if (isNull[destinationIndex]) {
offsets[destinationIndex] = offsets[sourceIndex + 1];
} else {
offsets[destinationIndex] = offsets[sourceIndex];
sourceIndex--;
}
destinationIndex--;
}
}
Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
boolean hasNoNull = totalNonNullCount == nextBatchSize;
Block block = new VariableWidthBlock(nextBatchSize, buffer, offsets, hasNoNull ? Optional.empty() : Optional.of(isNull));
return new ColumnChunk(block, new int[0], new int[0]);
}
use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.
the class Int32NestedBatchReader method readNestedWithNull.
@Override
protected ColumnChunk readNestedWithNull() throws IOException {
int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();
RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize);
DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length);
int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels();
int newBatchSize = 0;
int batchNonNullCount = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
int nonNullCount = 0;
int valueCount = 0;
for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) {
nonNullCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0);
valueCount += (definitionLevels[i] >= maxDefinitionLevel - 1 ? 1 : 0);
}
batchNonNullCount += nonNullCount;
newBatchSize += valueCount;
valuesDecoderContext.setNonNullCount(nonNullCount);
valuesDecoderContext.setValueCount(valueCount);
}
if (batchNonNullCount == 0) {
Block block = RunLengthEncodedBlock.create(field.getType(), null, newBatchSize);
return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels());
}
int[] values = new int[newBatchSize];
boolean[] isNull = new boolean[newBatchSize];
int offset = 0;
for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
((Int32ValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(values, offset, valuesDecoderContext.getNonNullCount());
int valueDestinationIndex = offset + valuesDecoderContext.getValueCount() - 1;
int valueSourceIndex = offset + valuesDecoderContext.getNonNullCount() - 1;
int definitionLevelIndex = valuesDecoderContext.getEnd() - 1;
while (valueDestinationIndex >= offset) {
if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel) {
values[valueDestinationIndex--] = values[valueSourceIndex--];
} else if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel - 1) {
values[valueDestinationIndex] = 0;
isNull[valueDestinationIndex] = true;
valueDestinationIndex--;
}
definitionLevelIndex--;
}
offset += valuesDecoderContext.getValueCount();
}
boolean hasNoNull = batchNonNullCount == newBatchSize;
Block block = new IntArrayBlock(newBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values);
return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels());
}
Aggregations