use of io.prestosql.orc.metadata.Footer in project hetu-core by openlookeng.
the class TestOrcReaderPositions method testReadUserMetadata.
@Test
public void testReadUserMetadata() throws Exception {
try (TempFile tempFile = new TempFile()) {
Map<String, String> metadata = ImmutableMap.of("a", "ala", "b", "ma", "c", "kota");
createFileWithOnlyUserMetadata(tempFile.getFile(), metadata);
OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true, tempFile.getFile().lastModified());
OrcReader orcReader = new OrcReader(orcDataSource, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE));
Footer footer = orcReader.getFooter();
Map<String, String> readMetadata = Maps.transformValues(footer.getUserMetadata(), Slice::toStringAscii);
assertEquals(readMetadata, metadata);
}
}
use of io.prestosql.orc.metadata.Footer in project hetu-core by openlookeng.
the class OrcWriter method bufferFileFooter.
/**
* Collect the data for for the file footer. This is not the actual data, but
* instead are functions that know how to write the data.
*/
private List<OrcDataOutput> bufferFileFooter() throws IOException {
if (preCloseCallback.isPresent()) {
try {
preCloseCallback.get().call();
} catch (Exception e) {
log.debug("Call pre close call back error");
}
}
List<OrcDataOutput> outputData = new ArrayList<>();
Metadata metadata = new Metadata(closedStripes.stream().map(ClosedStripe::getStatistics).map(Optional::of).collect(toList()));
Slice metadataSlice = metadataWriter.writeMetadata(metadata);
outputData.add(createDataOutput(metadataSlice));
long numberOfRows = closedStripes.stream().mapToLong(stripe -> stripe.getStripeInformation().getNumberOfRows()).sum();
Optional<ColumnMetadata<ColumnStatistics>> fileStats = toFileStats(closedStripes.stream().map(ClosedStripe::getStatistics).map(StripeStatistics::getColumnStatistics).collect(toList()));
recordValidation(validation -> validation.setFileStatistics(fileStats));
Map<String, Slice> localUserMetadata = this.userMetadata.entrySet().stream().collect(Collectors.toMap(Entry::getKey, entry -> utf8Slice(entry.getValue())));
Footer footer = new Footer(numberOfRows, rowGroupMaxRowCount, closedStripes.stream().map(ClosedStripe::getStripeInformation).collect(toImmutableList()), orcTypes, fileStats, localUserMetadata);
closedStripes.clear();
closedStripesRetainedBytes = 0;
Slice footerSlice = metadataWriter.writeFooter(footer);
outputData.add(createDataOutput(footerSlice));
recordValidation(validation -> validation.setVersion(metadataWriter.getOrcMetadataVersion()));
Slice postscriptSlice = metadataWriter.writePostscript(footerSlice.length(), metadataSlice.length(), compression, maxCompressionBufferSize);
outputData.add(createDataOutput(postscriptSlice));
outputData.add(createDataOutput(Slices.wrappedBuffer(UnsignedBytes.checkedCast(postscriptSlice.length()))));
return outputData;
}
use of io.prestosql.orc.metadata.Footer in project hetu-core by openlookeng.
the class TestOrcWriter method testWriteOutputStreamsInOrder.
@Test
public void testWriteOutputStreamsInOrder() throws IOException {
for (OrcWriteValidationMode validationMode : OrcWriteValidationMode.values()) {
TempFile tempFile = new TempFile();
OrcWriter writer = new OrcWriter(new OutputStreamOrcDataSink(new FileOutputStream(tempFile.getFile())), ImmutableList.of("test1", "test2", "test3", "test4", "test5"), ImmutableList.of(VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR), NONE, new OrcWriterOptions().withStripeMinSize(new DataSize(0, MEGABYTE)).withStripeMaxSize(new DataSize(32, MEGABYTE)).withStripeMaxRowCount(ORC_STRIPE_SIZE).withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE).withDictionaryMaxMemory(new DataSize(32, MEGABYTE)), false, ImmutableMap.of(), true, validationMode, new OrcWriterStats(), Optional.empty(), Optional.empty());
// write down some data with unsorted streams
String[] data = new String[] { "a", "bbbbb", "ccc", "dd", "eeee" };
Block[] blocks = new Block[data.length];
int entries = 65536;
BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, entries);
for (int i = 0; i < data.length; i++) {
byte[] bytes = data[i].getBytes();
for (int j = 0; j < entries; j++) {
// force to write different data
bytes[0] = (byte) ((bytes[0] + 1) % 128);
blockBuilder.writeBytes(Slices.wrappedBuffer(bytes, 0, bytes.length), 0, bytes.length);
blockBuilder.closeEntry();
}
blocks[i] = blockBuilder.build();
blockBuilder = blockBuilder.newBlockBuilderLike(null);
}
writer.write(new Page(blocks));
writer.close();
// read the footer and verify the streams are ordered by size
DataSize dataSize = new DataSize(1, MEGABYTE);
OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), dataSize, dataSize, dataSize, true, tempFile.getFile().lastModified());
Footer footer = new OrcReader(orcDataSource, dataSize, dataSize, dataSize).getFooter();
for (StripeInformation stripe : footer.getStripes()) {
// read the footer
Slice tailBuffer = orcDataSource.readFully(stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(), toIntExact(stripe.getFooterLength()));
try (InputStream inputStream = new OrcInputStream(OrcChunkLoader.create(orcDataSource.getId(), tailBuffer, Optional.empty(), newSimpleAggregatedMemoryContext()))) {
StripeFooter stripeFooter = new OrcMetadataReader().readStripeFooter(footer.getTypes(), inputStream, ZoneId.of("UTC"));
int size = 0;
boolean dataStreamStarted = false;
for (Stream stream : stripeFooter.getStreams()) {
if (isIndexStream(stream)) {
assertFalse(dataStreamStarted);
continue;
}
dataStreamStarted = true;
// verify sizes in order
assertGreaterThanOrEqual(stream.getLength(), size);
size = stream.getLength();
}
}
}
}
}
Aggregations