use of io.prestosql.rcfile.RcFileDecoderUtils.findFirstSyncPosition in project hetu-core by openlookeng.
the class RcFileReader method advance.
public int advance() throws IOException {
if (closed) {
return -1;
}
rowGroupPosition += ColumnData.MAX_SIZE;
currentChunkRowCount = min(ColumnData.MAX_SIZE, rowGroupRowCount - rowGroupPosition);
// do we still have rows in the current row group
if (currentChunkRowCount > 0) {
validateWritePageChecksum();
return currentChunkRowCount;
}
// are we at the end?
if (input.remaining() == 0) {
close();
return -1;
}
// read uncompressed size of row group (which is useless information)
verify(input.remaining() >= SIZE_OF_INT, "RCFile truncated %s", dataSource.getId());
int unusedRowGroupSize = Integer.reverseBytes(input.readInt());
// read sequence sync if present
if (unusedRowGroupSize == -1) {
verify(input.remaining() >= SIZE_OF_LONG + SIZE_OF_LONG + SIZE_OF_INT, "RCFile truncated %s", dataSource.getId());
// NOTE: this decision must agree with RcFileDecoderUtils.findFirstSyncPosition
if (input.position() - SIZE_OF_INT >= end) {
close();
return -1;
}
verify(syncFirst == input.readLong() && syncSecond == input.readLong(), "Invalid sync in RCFile %s", dataSource.getId());
// read the useless uncompressed length
unusedRowGroupSize = Integer.reverseBytes(input.readInt());
} else if (rowsRead > 0) {
validateWrite(writeValidation -> false, "Expected sync sequence for every row group except the first one");
}
verify(unusedRowGroupSize > 0, "Invalid uncompressed row group length %s", unusedRowGroupSize);
// read row group header
int uncompressedHeaderSize = Integer.reverseBytes(input.readInt());
int compressedHeaderSize = Integer.reverseBytes(input.readInt());
if (compressedHeaderSize > compressedHeaderBuffer.length()) {
compressedHeaderBuffer = Slices.allocate(compressedHeaderSize);
}
input.readBytes(compressedHeaderBuffer, 0, compressedHeaderSize);
// decompress row group header
Slice header;
if (decompressor != null) {
if (headerBuffer.length() < uncompressedHeaderSize) {
headerBuffer = Slices.allocate(uncompressedHeaderSize);
}
Slice buffer = headerBuffer.slice(0, uncompressedHeaderSize);
decompressor.decompress(compressedHeaderBuffer, buffer);
header = buffer;
} else {
verify(compressedHeaderSize == uncompressedHeaderSize, "Invalid RCFile %s", dataSource.getId());
header = compressedHeaderBuffer;
}
BasicSliceInput headerInput = header.getInput();
// read number of rows in row group
rowGroupRowCount = toIntExact(readVInt(headerInput));
rowsRead += rowGroupRowCount;
rowGroupPosition = 0;
currentChunkRowCount = min(ColumnData.MAX_SIZE, rowGroupRowCount);
// set column buffers
int totalCompressedDataSize = 0;
for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
int compressedDataSize = toIntExact(readVInt(headerInput));
totalCompressedDataSize += compressedDataSize;
int uncompressedDataSize = toIntExact(readVInt(headerInput));
if (decompressor == null && compressedDataSize != uncompressedDataSize) {
throw corrupt("Invalid RCFile %s", dataSource.getId());
}
int lengthsSize = toIntExact(readVInt(headerInput));
Slice lengthsBuffer = headerInput.readSlice(lengthsSize);
if (readColumns.containsKey(columnIndex)) {
Slice dataBuffer = input.readSlice(compressedDataSize);
columns[columnIndex].setBuffers(lengthsBuffer, dataBuffer, uncompressedDataSize);
} else {
skipFully(input, compressedDataSize);
}
}
// this value is not used but validate it is correct since it might signal corruption
verify(unusedRowGroupSize == totalCompressedDataSize + uncompressedHeaderSize, "Invalid row group size");
validateWriteRowGroupChecksum();
validateWritePageChecksum();
return currentChunkRowCount;
}
Aggregations