use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.
the class DwrfMetadataReader method readStripeFooter.
@Override
public StripeFooter readStripeFooter(HiveWriterVersion hiveWriterVersion, List<OrcType> types, InputStream inputStream) throws IOException {
CodedInputStream input = CodedInputStream.newInstance(inputStream);
DwrfProto.StripeFooter stripeFooter = DwrfProto.StripeFooter.parseFrom(input);
return new StripeFooter(toStream(stripeFooter.getStreamsList()), toColumnEncoding(types, stripeFooter.getColumnsList()));
}
use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.
the class DwrfMetadataReader method readFooter.
@Override
public Footer readFooter(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws IOException {
CodedInputStream input = CodedInputStream.newInstance(inputStream);
DwrfProto.Footer footer = DwrfProto.Footer.parseFrom(input);
return new Footer(footer.getNumberOfRows(), footer.getRowIndexStride(), toStripeInformation(footer.getStripesList()), toType(footer.getTypesList()), toColumnStatistics(hiveWriterVersion, footer.getStatisticsList(), false), toUserMetadata(footer.getMetadataList()));
}
use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.
the class TestOrcBloomFilters method testOrcHiveBloomFilterSerde.
@Test
public void testOrcHiveBloomFilterSerde() throws Exception {
BloomFilter bloomFilterWrite = new BloomFilter(1000L, 0.05);
bloomFilterWrite.addString(TEST_STRING);
assertTrue(bloomFilterWrite.testString(TEST_STRING));
OrcProto.BloomFilter.Builder bloomFilterBuilder = OrcProto.BloomFilter.newBuilder();
bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet()));
bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions());
OrcProto.BloomFilter bloomFilter = bloomFilterBuilder.build();
OrcProto.BloomFilterIndex bloomFilterIndex = OrcProto.BloomFilterIndex.getDefaultInstance();
byte[] bytes = serializeBloomFilterToIndex(bloomFilter, bloomFilterIndex);
// Read through method
InputStream inputStream = new ByteArrayInputStream(bytes);
OrcMetadataReader metadataReader = new OrcMetadataReader(new RuntimeStats());
List<HiveBloomFilter> bloomFilters = metadataReader.readBloomFilterIndexes(inputStream);
assertEquals(bloomFilters.size(), 1);
assertTrue(bloomFilters.get(0).testString(TEST_STRING));
assertFalse(bloomFilters.get(0).testString(TEST_STRING_NOT_WRITTEN));
assertEquals(bloomFilterWrite.getBitSize(), bloomFilters.get(0).getBitSize());
assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilters.get(0).getNumHashFunctions());
// Validate bit set
assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet()));
// Read directly: allows better inspection of the bit sets (helped to fix a lot of bugs)
CodedInputStream input = CodedInputStream.newInstance(bytes);
OrcProto.BloomFilterIndex deserializedBloomFilterIndex = OrcProto.BloomFilterIndex.parseFrom(input);
List<OrcProto.BloomFilter> bloomFilterList = deserializedBloomFilterIndex.getBloomFilterList();
assertEquals(bloomFilterList.size(), 1);
OrcProto.BloomFilter bloomFilterRead = bloomFilterList.get(0);
// Validate contents of ORC bloom filter bit set
assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet()));
// hash functions
assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilterRead.getNumHashFunctions());
// bit size
assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
}
use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.
the class AbstractTestDwrfStripeCaching method readFileFooter.
static DwrfProto.Footer readFileFooter(File orcFile) {
try (RandomAccessFile file = new RandomAccessFile(orcFile, "r")) {
// read postscript size
file.seek(file.length() - 1);
int postScriptSize = file.read() & 0xff;
// read postscript
long postScriptPosition = file.length() - postScriptSize - 1;
byte[] postScriptBytes = readBytes(file, postScriptPosition, postScriptSize);
CodedInputStream postScriptInput = CodedInputStream.newInstance(postScriptBytes, 0, postScriptSize);
DwrfProto.PostScript postScript = DwrfProto.PostScript.parseFrom(postScriptInput);
// read footer
long footerPosition = postScriptPosition - postScript.getFooterLength();
int footerLength = toIntExact(postScript.getFooterLength());
byte[] footerBytes = readBytes(file, footerPosition, postScript.getFooterLength());
int compressionBufferSize = toIntExact(postScript.getCompressionBlockSize());
OrcDataSourceId dataSourceId = new OrcDataSourceId(orcFile.getName());
Optional<OrcDecompressor> decompressor = OrcDecompressor.createOrcDecompressor(dataSourceId, ZLIB, compressionBufferSize);
InputStream footerInputStream = new OrcInputStream(dataSourceId, new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), Slices.wrappedBuffer(footerBytes).slice(0, footerLength).getInput(), decompressor, Optional.empty(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, footerLength);
return DwrfProto.Footer.parseFrom(footerInputStream);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.
the class OrcMetadataReader method readStripeFooter.
@Override
public StripeFooter readStripeFooter(OrcDataSourceId orcDataSourceId, List<OrcType> types, InputStream inputStream) throws IOException {
long cpuStart = THREAD_MX_BEAN.getCurrentThreadCpuTime();
CodedInputStream input = CodedInputStream.newInstance(inputStream);
OrcProto.StripeFooter stripeFooter = OrcProto.StripeFooter.parseFrom(input);
runtimeStats.addMetricValue("OrcReadStripeFooterTimeNanos", THREAD_MX_BEAN.getCurrentThreadCpuTime() - cpuStart);
return new StripeFooter(toStream(stripeFooter.getStreamsList()), toColumnEncoding(stripeFooter.getColumnsList()), ImmutableList.of());
}
Aggregations