use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project urban-eureka by errir503.
the class OrcWriteValidation method validateColumnStatisticsEquivalent.
private static void validateColumnStatisticsEquivalent(OrcDataSourceId orcDataSourceId, String name, List<ColumnStatistics> actualColumnStatistics, List<ColumnStatistics> expectedColumnStatistics) throws OrcCorruptionException {
requireNonNull(name, "name is null");
requireNonNull(actualColumnStatistics, "actualColumnStatistics is null");
requireNonNull(expectedColumnStatistics, "expectedColumnStatistics is null");
if (actualColumnStatistics.size() != expectedColumnStatistics.size()) {
throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected number of columns in %s statistics", name);
}
for (int i = 0; i < actualColumnStatistics.size(); i++) {
ColumnStatistics actual = actualColumnStatistics.get(i);
ColumnStatistics expected = expectedColumnStatistics.get(i);
validateColumnStatisticsEquivalent(orcDataSourceId, name + " column " + i, actual, expected);
}
}
use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project urban-eureka by errir503.
the class OrcWriter method bufferFileFooter.
/**
* Collect the data for the file footer. This is not the actual data, but
* instead are functions that know how to write the data.
*/
private List<DataOutput> bufferFileFooter() throws IOException {
List<DataOutput> outputData = new ArrayList<>();
Metadata metadata = new Metadata(closedStripes.stream().map(ClosedStripe::getStatistics).collect(toList()));
Slice metadataSlice = metadataWriter.writeMetadata(metadata);
outputData.add(createDataOutput(metadataSlice));
numberOfRows = closedStripes.stream().mapToLong(stripe -> stripe.getStripeInformation().getNumberOfRows()).sum();
List<ColumnStatistics> fileStats = toFileStats(closedStripes.stream().map(ClosedStripe::getStatistics).map(StripeStatistics::getColumnStatistics).collect(toList()));
recordValidation(validation -> validation.setFileStatistics(fileStats));
Map<String, Slice> userMetadata = this.userMetadata.entrySet().stream().collect(Collectors.toMap(Entry::getKey, entry -> utf8Slice(entry.getValue())));
unencryptedStats = new ArrayList<>();
Map<Integer, Map<Integer, Slice>> encryptedStats = new HashMap<>();
addStatsRecursive(fileStats, 0, new HashMap<>(), unencryptedStats, encryptedStats);
Optional<DwrfEncryption> dwrfEncryption;
if (dwrfWriterEncryption.isPresent()) {
ImmutableList.Builder<EncryptionGroup> encryptionGroupBuilder = ImmutableList.builder();
List<WriterEncryptionGroup> writerEncryptionGroups = dwrfWriterEncryption.get().getWriterEncryptionGroups();
for (int i = 0; i < writerEncryptionGroups.size(); i++) {
WriterEncryptionGroup group = writerEncryptionGroups.get(i);
Map<Integer, Slice> groupStats = encryptedStats.get(i);
encryptionGroupBuilder.add(new EncryptionGroup(group.getNodes(), // reader will just use key metadata from the stripe
Optional.empty(), group.getNodes().stream().map(groupStats::get).collect(toList())));
}
dwrfEncryption = Optional.of(new DwrfEncryption(dwrfWriterEncryption.get().getKeyProvider(), encryptionGroupBuilder.build()));
} else {
dwrfEncryption = Optional.empty();
}
Optional<DwrfStripeCacheData> dwrfStripeCacheData = dwrfStripeCacheWriter.map(DwrfStripeCacheWriter::getDwrfStripeCacheData);
Slice dwrfStripeCacheSlice = metadataWriter.writeDwrfStripeCache(dwrfStripeCacheData);
outputData.add(createDataOutput(dwrfStripeCacheSlice));
Optional<List<Integer>> dwrfStripeCacheOffsets = dwrfStripeCacheWriter.map(DwrfStripeCacheWriter::getOffsets);
Footer footer = new Footer(numberOfRows, rowGroupMaxRowCount, OptionalLong.of(rawSize), closedStripes.stream().map(ClosedStripe::getStripeInformation).collect(toList()), orcTypes, ImmutableList.copyOf(unencryptedStats), userMetadata, dwrfEncryption, dwrfStripeCacheOffsets);
closedStripes.clear();
closedStripesRetainedBytes = 0;
Slice footerSlice = metadataWriter.writeFooter(footer);
outputData.add(createDataOutput(footerSlice));
recordValidation(validation -> validation.setVersion(metadataWriter.getOrcMetadataVersion()));
Slice postscriptSlice = metadataWriter.writePostscript(footerSlice.length(), metadataSlice.length(), columnWriterOptions.getCompressionKind(), columnWriterOptions.getCompressionMaxBufferSize(), dwrfStripeCacheData);
outputData.add(createDataOutput(postscriptSlice));
outputData.add(createDataOutput(Slices.wrappedBuffer((byte) postscriptSlice.length())));
return outputData;
}
use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project urban-eureka by errir503.
the class LongColumnWriter method finishRowGroup.
@Override
public Map<Integer, ColumnStatistics> finishRowGroup() {
checkState(!closed);
ColumnStatistics statistics = statisticsBuilder.buildColumnStatistics();
rowGroupColumnStatistics.add(statistics);
columnStatisticsRetainedSizeInBytes += statistics.getRetainedSizeInBytes();
statisticsBuilder = statisticsBuilderSupplier.get();
return ImmutableMap.of(column, statistics);
}
use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project urban-eureka by errir503.
the class LongDictionaryColumnWriter method createColumnStatistics.
@Override
protected ColumnStatistics createColumnStatistics() {
ColumnStatistics statistics = statisticsBuilder.buildColumnStatistics();
statisticsBuilder = new IntegerStatisticsBuilder();
return statistics;
}
use of com.facebook.presto.orc.metadata.statistics.ColumnStatistics in project urban-eureka by errir503.
the class MapFlatColumnWriter method getValueColumnStatistics.
private Map<Integer, ColumnStatistics> getValueColumnStatistics(Function<ColumnWriter, Map<Integer, ColumnStatistics>> getStats) {
if (valueWriters.isEmpty()) {
return getEmptyValueColumnStatistics();
}
ImmutableListMultimap.Builder<Integer, ColumnStatistics> allValueStats = ImmutableListMultimap.builder();
for (MapFlatValueWriter valueWriter : valueWriters) {
Map<Integer, ColumnStatistics> valueColumnStatistic = getStats.apply(valueWriter.getValueWriter());
allValueStats.putAll(valueColumnStatistic.entrySet());
}
ImmutableMap.Builder<Integer, ColumnStatistics> columnStatistics = ImmutableMap.builder();
allValueStats.build().asMap().forEach((nodeIndex, nodeStats) -> {
ColumnStatistics mergedNodeStats = mergeColumnStatistics((List<ColumnStatistics>) nodeStats);
columnStatistics.put(nodeIndex, mergedNodeStats);
});
return columnStatistics.build();
}
Aggregations