use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.
the class DeltaBinaryPackingValuesWriterForIntegerTest method shouldConsumePageDataInInitialization.
@Test
public void shouldConsumePageDataInInitialization() throws IOException {
int[] data = new int[2 * blockSize + 3];
for (int i = 0; i < data.length; i++) {
data[i] = i * 32;
}
writeData(data);
reader = new DeltaBinaryPackingValuesReader();
BytesInput bytes = writer.getBytes();
byte[] valueContent = bytes.toByteArray();
byte[] pageContent = new byte[valueContent.length * 10];
int contentOffsetInPage = 33;
System.arraycopy(valueContent, 0, pageContent, contentOffsetInPage, valueContent.length);
// offset should be correct
ByteBufferInputStream stream = ByteBufferInputStream.wrap(ByteBuffer.wrap(pageContent));
stream.skipFully(contentOffsetInPage);
reader.initFromPage(100, stream);
long offset = stream.position();
assertEquals(valueContent.length + contentOffsetInPage, offset);
// should be able to read data correctly
for (int i : data) {
assertEquals(i, reader.readInteger());
}
}
use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.
the class DeltaBinaryPackingValuesWriterForLongTest method shouldReturnCorrectOffsetAfterInitialization.
@Test
public void shouldReturnCorrectOffsetAfterInitialization() throws IOException {
long[] data = new long[2 * blockSize + 3];
for (int i = 0; i < data.length; i++) {
data[i] = i * 32;
}
writeData(data);
reader = new DeltaBinaryPackingValuesReader();
BytesInput bytes = writer.getBytes();
byte[] valueContent = bytes.toByteArray();
byte[] pageContent = new byte[valueContent.length * 10];
int contentOffsetInPage = 33;
System.arraycopy(valueContent, 0, pageContent, contentOffsetInPage, valueContent.length);
// offset should be correct
ByteBufferInputStream stream = ByteBufferInputStream.wrap(ByteBuffer.wrap(pageContent));
stream.skipFully(contentOffsetInPage);
reader.initFromPage(100, stream);
long offset = stream.position();
assertEquals(valueContent.length + contentOffsetInPage, offset);
// should be able to read data correctly
for (long i : data) {
assertEquals(i, reader.readLong());
}
}
use of org.apache.parquet.bytes.ByteBufferInputStream in project drill by apache.
the class PageReader method decodeLevels.
/**
* Decodes any repetition and definition level data in this page
* @returns the offset into the page buffer after any levels have been decoded.
*/
protected int decodeLevels() throws IOException {
int maxRepLevel = columnDescriptor.getMaxRepetitionLevel();
int maxDefLevel = columnDescriptor.getMaxDefinitionLevel();
int dataOffset;
switch(pageHeader.getType()) {
case DATA_PAGE:
ByteBufferInputStream dataStream = ByteBufferInputStream.wrap(pageData.nioBuffer(0, byteLength));
if (maxRepLevel > 0) {
Encoding rlEncoding = METADATA_CONVERTER.getEncoding(dataPageInfo.getRepetitionLevelEncoding());
ValuesReader rlReader = rlEncoding.getValuesReader(columnDescriptor, ValuesType.REPETITION_LEVEL);
rlReader.initFromPage(pageValueCount, dataStream);
this.repetitionLevels = new ValuesReaderIntIterator(rlReader);
// we know that the first value will be a 0, at the end of each list of repeated values we will hit another 0 indicating
// a new record, although we don't know the length until we hit it (and this is a one way stream of integers) so we
// read the first zero here to simplify the reading processes, and start reading the first value the same as all
// of the rest. Effectively we are 'reading' the non-existent value in front of the first allowing direct access to
// the first list of repetition levels
this.repetitionLevels.nextInt();
}
if (maxDefLevel > 0) {
Encoding dlEncoding = METADATA_CONVERTER.getEncoding(dataPageInfo.getDefinitionLevelEncoding());
ValuesReader dlReader = dlEncoding.getValuesReader(columnDescriptor, ValuesType.DEFINITION_LEVEL);
dlReader.initFromPage(pageValueCount, dataStream);
this.definitionLevels = new ValuesReaderIntIterator(dlReader);
}
dataOffset = (int) dataStream.position();
break;
case DATA_PAGE_V2:
int repLevelLen = pageHeader.data_page_header_v2.repetition_levels_byte_length;
int defLevelLen = pageHeader.data_page_header_v2.definition_levels_byte_length;
if (maxRepLevel > 0) {
this.repetitionLevels = newRLEIterator(maxRepLevel, BytesInput.from(pageData.nioBuffer(0, repLevelLen)));
// See earlier comment.
this.repetitionLevels.nextInt();
}
if (maxDefLevel > 0) {
this.definitionLevels = newRLEIterator(maxDefLevel, BytesInput.from(pageData.nioBuffer(repLevelLen, defLevelLen)));
}
dataOffset = repLevelLen + defLevelLen;
break;
default:
throw new DrillRuntimeException(String.format("Did not expect to find a page of type %s now.", pageHeader.getType()));
}
return dataOffset;
}
use of org.apache.parquet.bytes.ByteBufferInputStream in project presto by prestodb.
the class Decoders method createRepetitionLevelDecoder.
public static final RepetitionLevelDecoder createRepetitionLevelDecoder(ParquetEncoding encoding, int maxLevelValue, int valueCount, ByteBuffer buffer) throws IOException {
final int bitWidth = getWidthFromMaxInt(maxLevelValue);
if (maxLevelValue == 0 || bitWidth == 0) {
return new RepetitionLevelDecoder(0, valueCount);
}
checkArgument(encoding == RLE, "Invalid repetition level encoding: " + encoding);
ByteBufferInputStream bufferInputStream = ByteBufferInputStream.wrap(buffer);
final int bufferSize = readIntLittleEndian(bufferInputStream);
RepetitionLevelDecoder repetitionLevelDecoder = new RepetitionLevelDecoder(valueCount, bitWidth, bufferInputStream.sliceStream(bufferSize));
buffer.position(buffer.position() + bufferSize + 4);
return repetitionLevelDecoder;
}
use of org.apache.parquet.bytes.ByteBufferInputStream in project presto by prestodb.
the class Decoders method createFlatDefinitionLevelDecoder.
private static final FlatDefinitionLevelDecoder createFlatDefinitionLevelDecoder(ParquetEncoding encoding, boolean isRequired, int maxLevelValue, int valueCount, ByteBuffer buffer) throws IOException {
if (isRequired) {
return new FlatDefinitionLevelDecoder(1, valueCount);
}
if (maxLevelValue == 0) {
return new FlatDefinitionLevelDecoder(0, valueCount);
}
final int bitWidth = getWidthFromMaxInt(maxLevelValue);
if (bitWidth == 0) {
return new FlatDefinitionLevelDecoder(0, valueCount);
}
checkArgument(encoding == RLE, "Invalid definition level encoding: " + encoding);
ByteBufferInputStream bufferInputStream = ByteBufferInputStream.wrap(buffer);
final int bufferSize = readIntLittleEndian(bufferInputStream);
FlatDefinitionLevelDecoder definitionLevelDecoder = new FlatDefinitionLevelDecoder(valueCount, bufferInputStream.sliceStream(bufferSize));
buffer.position(buffer.position() + bufferSize + 4);
return definitionLevelDecoder;
}
Aggregations