use of org.apache.orc.OrcProto.StripeStatistics in project hive by apache.
the class OrcEncodedDataReader method getFileFooterFromCacheOrDisk.
/**
* Gets file metadata for the split from cache, or reads it from the file.
*/
private OrcFileMetadata getFileFooterFromCacheOrDisk() throws IOException {
LlapBufferOrBuffers tailBuffers = null;
List<StripeStatistics> stats = null;
List<StripeInformation> stripes = null;
boolean hasCache = fileKey != null && metadataCache != null;
if (hasCache) {
tailBuffers = metadataCache.getFileMetadata(fileKey);
if (tailBuffers != null) {
try {
OrcTail orcTail = getOrcTailFromLlapBuffers(tailBuffers);
counters.incrCounter(LlapIOCounters.METADATA_CACHE_HIT);
FileTail tail = orcTail.getFileTail();
stats = getStripeStatsFromOrcTail(orcTail);
stripes = new ArrayList<>(tail.getFooter().getStripesCount());
int stripeIdx = 0;
for (OrcProto.StripeInformation stripeProto : tail.getFooter().getStripesList()) {
stripes.add(new ReaderImpl.StripeInformationImpl(stripeProto, stripeIdx++, -1, null));
}
return new OrcFileMetadata(fileKey, tail.getFooter(), tail.getPostscript(), stats, stripes, ReaderImpl.getFileVersion(tail.getPostscript().getVersionList()));
} finally {
// We don't need the buffer anymore.
metadataCache.decRefBuffer(tailBuffers);
}
} else {
counters.incrCounter(LlapIOCounters.METADATA_CACHE_MISS);
throwIfCacheOnlyRead(isReadCacheOnly);
}
}
ensureOrcReader();
ByteBuffer tailBufferBb = orcReader.getSerializedFileFooter();
if (hasCache) {
tailBuffers = metadataCache.putFileMetadata(fileKey, tailBufferBb, cacheTag, isStopped);
// We don't use the cache's copy of the buffer.
metadataCache.decRefBuffer(tailBuffers);
}
FileTail ft = orcReader.getFileTail();
return new OrcFileMetadata(fileKey, ft.getFooter(), ft.getPostscript(), orcReader.getOrcProtoStripeStatistics(), orcReader.getStripes(), orcReader.getFileVersion());
}
use of org.apache.orc.OrcProto.StripeStatistics in project hive by apache.
the class FixAcidKeyIndex method recoverFile.
static void recoverFile(Configuration conf, Path inputPath, String backup) throws IOException {
FileSystem fs = inputPath.getFileSystem(conf);
Path recoveredPath = getRecoveryFile(inputPath);
try (Reader reader = OrcFile.createReader(fs, inputPath)) {
if (OrcInputFormat.isOriginal(reader)) {
System.out.println(inputPath + " is not an acid file. No need to recover.");
return;
}
AcidKeyIndexValidationResult validationResult = validate(conf, inputPath);
if (validationResult.isValid) {
System.out.println(inputPath + " has a valid acid key index. No need to recover.");
return;
}
System.out.println("Recovering " + inputPath);
// make sure that file does not exist
try {
fs.delete(recoveredPath, false);
} catch (FileNotFoundException e) {
// no problem, we're just making sure the file doesn't exist
}
// Writer should match the orc configuration from the original file
OrcFile.WriterOptions writerOptions = OrcFile.writerOptions(conf).compress(reader.getCompression()).version(reader.getFileVersion()).rowIndexStride(reader.getRowIndexStride()).inspector(reader.getObjectInspector());
// compression buffer size should only be set if compression is enabled
if (reader.getCompression() != org.apache.hadoop.hive.ql.io.orc.CompressionKind.NONE) {
writerOptions.bufferSize(reader.getCompressionSize()).enforceBufferSize();
}
try (Writer writer = OrcFile.createWriter(recoveredPath, writerOptions)) {
List<StripeInformation> stripes = reader.getStripes();
List<StripeStatistics> stripeStats = reader.getOrcProtoStripeStatistics();
try (FSDataInputStream inputStream = fs.open(inputPath)) {
for (int idx = 0; idx < stripes.size(); ++idx) {
// initialize buffer to read the entire stripe.
StripeInformation stripe = stripes.get(idx);
int stripeLength = (int) stripe.getLength();
byte[] buffer = new byte[stripeLength];
inputStream.readFully(stripe.getOffset(), buffer, 0, stripeLength);
// append the stripe buffer to the new ORC file
writer.appendStripe(buffer, 0, buffer.length, stripe, stripeStats.get(idx));
}
}
// Add the rest of the metadata keys.
for (String metadataKey : reader.getMetadataKeys()) {
if (!metadataKey.equals(OrcRecordUpdater.ACID_KEY_INDEX_NAME)) {
writer.addUserMetadata(metadataKey, reader.getMetadataValue(metadataKey));
}
}
StringBuilder sb = new StringBuilder();
validationResult.recordIdentifiers.stream().forEach(ri -> sb.append(ri.getWriteId()).append(",").append(ri.getBucketProperty()).append(",").append(ri.getRowId()).append(";"));
// Finally add the fixed acid key index.
writer.addUserMetadata(OrcRecordUpdater.ACID_KEY_INDEX_NAME, UTF8.encode(sb.toString()));
}
}
// Confirm the file is really fixed, and replace the old file.
AcidKeyIndexValidationResult fileFixed = validate(conf, recoveredPath);
if (fileFixed.isValid) {
Path backupDataPath;
String scheme = inputPath.toUri().getScheme();
String authority = inputPath.toUri().getAuthority();
String filePath = inputPath.toUri().getPath();
// use the same filesystem as input file if backup-path is not explicitly specified
if (backup.equals(DEFAULT_BACKUP_PATH)) {
backupDataPath = new Path(scheme, authority, DEFAULT_BACKUP_PATH + filePath);
} else {
backupDataPath = Path.mergePaths(new Path(backup), inputPath);
}
// Move data file to backup path
moveFiles(fs, inputPath, backupDataPath);
// finally move recovered file to actual file
moveFiles(fs, recoveredPath, inputPath);
System.out.println("Fixed acid key index for " + inputPath);
} else {
System.out.println("Unable to fix acid key index for " + inputPath);
}
}
Aggregations