use of io.airlift.slice.BasicSliceInput in project presto by prestodb.
the class StringClassifierAdapter method deserialize.
public static StringClassifierAdapter deserialize(byte[] data) {
Slice slice = Slices.wrappedBuffer(data);
BasicSliceInput input = slice.getInput();
int classifierLength = input.readInt();
Model classifier = ModelUtils.deserialize(input.readSlice(classifierLength));
int numEnumerations = input.readInt();
ImmutableMap.Builder<Integer, String> builder = ImmutableMap.builder();
for (int i = 0; i < numEnumerations; i++) {
int key = input.readInt();
int valueLength = input.readInt();
String value = input.readSlice(valueLength).toStringUtf8();
builder.put(key, value);
}
return new StringClassifierAdapter((Classifier) classifier, builder.build());
}
use of io.airlift.slice.BasicSliceInput in project presto by prestodb.
the class RcFileReader method advance.
public int advance() throws IOException {
if (closed) {
return -1;
}
rowGroupPosition += ColumnData.MAX_SIZE;
currentChunkRowCount = min(ColumnData.MAX_SIZE, rowGroupRowCount - rowGroupPosition);
// do we still have rows in the current row group
if (currentChunkRowCount > 0) {
validateWritePageChecksum();
return currentChunkRowCount;
}
// are we at the end?
if (input.remaining() == 0) {
close();
return -1;
}
// read uncompressed size of row group (which is useless information)
if (input.remaining() < SIZE_OF_INT) {
throw corrupt("RCFile truncated %s", dataSource);
}
int unusedRowGroupSize = Integer.reverseBytes(input.readInt());
// read sequence sync if present
if (unusedRowGroupSize == -1) {
if (input.remaining() < SIZE_OF_LONG + SIZE_OF_LONG + SIZE_OF_INT) {
throw corrupt("RCFile truncated %s", dataSource);
}
// NOTE: this decision must agree with RcFileDecoderUtils.findFirstSyncPosition
if (input.position() - SIZE_OF_INT >= end) {
close();
return -1;
}
if (syncFirst != input.readLong() || syncSecond != input.readLong()) {
throw corrupt("Invalid sync in RCFile %s", dataSource);
}
// read the useless uncompressed length
unusedRowGroupSize = Integer.reverseBytes(input.readInt());
} else if (rowsRead > 0) {
validateWrite(writeValidation -> false, "Expected sync sequence for every row group except the first one");
}
if (unusedRowGroupSize <= 0) {
throw corrupt("Invalid uncompressed row group length %s", unusedRowGroupSize);
}
// read row group header
int uncompressedHeaderSize = Integer.reverseBytes(input.readInt());
int compressedHeaderSize = Integer.reverseBytes(input.readInt());
if (compressedHeaderSize > compressedHeaderBuffer.length()) {
compressedHeaderBuffer = Slices.allocate(compressedHeaderSize);
}
input.readBytes(compressedHeaderBuffer, 0, compressedHeaderSize);
// decompress row group header
Slice header;
if (decompressor != null) {
if (headerBuffer.length() < uncompressedHeaderSize) {
headerBuffer = Slices.allocate(uncompressedHeaderSize);
}
Slice buffer = headerBuffer.slice(0, uncompressedHeaderSize);
decompressor.decompress(compressedHeaderBuffer, buffer);
header = buffer;
} else {
if (compressedHeaderSize != uncompressedHeaderSize) {
throw corrupt("Invalid RCFile %s", dataSource);
}
header = compressedHeaderBuffer;
}
BasicSliceInput headerInput = header.getInput();
// read number of rows in row group
rowGroupRowCount = toIntExact(readVInt(headerInput));
rowsRead += rowGroupRowCount;
rowGroupPosition = 0;
currentChunkRowCount = min(ColumnData.MAX_SIZE, rowGroupRowCount);
// set column buffers
int totalCompressedDataSize = 0;
for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
int compressedDataSize = toIntExact(readVInt(headerInput));
totalCompressedDataSize += compressedDataSize;
int uncompressedDataSize = toIntExact(readVInt(headerInput));
if (decompressor == null && compressedDataSize != uncompressedDataSize) {
throw corrupt("Invalid RCFile %s", dataSource);
}
int lengthsSize = toIntExact(readVInt(headerInput));
Slice lengthsBuffer = headerInput.readSlice(lengthsSize);
if (readColumns.containsKey(columnIndex)) {
Slice dataBuffer = input.readSlice(compressedDataSize);
columns[columnIndex].setBuffers(lengthsBuffer, dataBuffer, uncompressedDataSize);
} else {
skipFully(input, compressedDataSize);
}
}
// this value is not used but validate it is correct since it might signal corruption
if (unusedRowGroupSize != totalCompressedDataSize + uncompressedHeaderSize) {
throw corrupt("Invalid row group size");
}
validateWriteRowGroupChecksum();
validateWritePageChecksum();
return currentChunkRowCount;
}
Aggregations