Search in sources :

Example 1 with StripeStatistics

use of org.apache.orc.OrcProto.StripeStatistics in project hive by apache.

the class OrcEncodedDataReader method getFileFooterFromCacheOrDisk.

/**
 *  Gets file metadata for the split from cache, or reads it from the file.
 */
private OrcFileMetadata getFileFooterFromCacheOrDisk() throws IOException {
    LlapBufferOrBuffers tailBuffers = null;
    List<StripeStatistics> stats = null;
    List<StripeInformation> stripes = null;
    boolean hasCache = fileKey != null && metadataCache != null;
    if (hasCache) {
        tailBuffers = metadataCache.getFileMetadata(fileKey);
        if (tailBuffers != null) {
            try {
                OrcTail orcTail = getOrcTailFromLlapBuffers(tailBuffers);
                counters.incrCounter(LlapIOCounters.METADATA_CACHE_HIT);
                FileTail tail = orcTail.getFileTail();
                stats = getStripeStatsFromOrcTail(orcTail);
                stripes = new ArrayList<>(tail.getFooter().getStripesCount());
                int stripeIdx = 0;
                for (OrcProto.StripeInformation stripeProto : tail.getFooter().getStripesList()) {
                    stripes.add(new ReaderImpl.StripeInformationImpl(stripeProto, stripeIdx++, -1, null));
                }
                return new OrcFileMetadata(fileKey, tail.getFooter(), tail.getPostscript(), stats, stripes, ReaderImpl.getFileVersion(tail.getPostscript().getVersionList()));
            } finally {
                // We don't need the buffer anymore.
                metadataCache.decRefBuffer(tailBuffers);
            }
        } else {
            counters.incrCounter(LlapIOCounters.METADATA_CACHE_MISS);
            throwIfCacheOnlyRead(isReadCacheOnly);
        }
    }
    ensureOrcReader();
    ByteBuffer tailBufferBb = orcReader.getSerializedFileFooter();
    if (hasCache) {
        tailBuffers = metadataCache.putFileMetadata(fileKey, tailBufferBb, cacheTag, isStopped);
        // We don't use the cache's copy of the buffer.
        metadataCache.decRefBuffer(tailBuffers);
    }
    FileTail ft = orcReader.getFileTail();
    return new OrcFileMetadata(fileKey, ft.getFooter(), ft.getPostscript(), orcReader.getOrcProtoStripeStatistics(), orcReader.getStripes(), orcReader.getFileVersion());
}
Also used : OrcFileMetadata(org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata) OrcProto(org.apache.orc.OrcProto) StripeStatistics(org.apache.orc.OrcProto.StripeStatistics) RecordReaderImpl(org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl) ReaderImpl(org.apache.orc.impl.ReaderImpl) ByteBuffer(java.nio.ByteBuffer) FileTail(org.apache.orc.OrcProto.FileTail) LlapBufferOrBuffers(org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers) StripeInformation(org.apache.orc.StripeInformation) OrcTail(org.apache.orc.impl.OrcTail)

Example 2 with StripeStatistics

use of org.apache.orc.OrcProto.StripeStatistics in project hive by apache.

the class FixAcidKeyIndex method recoverFile.

static void recoverFile(Configuration conf, Path inputPath, String backup) throws IOException {
    FileSystem fs = inputPath.getFileSystem(conf);
    Path recoveredPath = getRecoveryFile(inputPath);
    try (Reader reader = OrcFile.createReader(fs, inputPath)) {
        if (OrcInputFormat.isOriginal(reader)) {
            System.out.println(inputPath + " is not an acid file. No need to recover.");
            return;
        }
        AcidKeyIndexValidationResult validationResult = validate(conf, inputPath);
        if (validationResult.isValid) {
            System.out.println(inputPath + " has a valid acid key index. No need to recover.");
            return;
        }
        System.out.println("Recovering " + inputPath);
        // make sure that file does not exist
        try {
            fs.delete(recoveredPath, false);
        } catch (FileNotFoundException e) {
        // no problem, we're just making sure the file doesn't exist
        }
        // Writer should match the orc configuration from the original file
        OrcFile.WriterOptions writerOptions = OrcFile.writerOptions(conf).compress(reader.getCompression()).version(reader.getFileVersion()).rowIndexStride(reader.getRowIndexStride()).inspector(reader.getObjectInspector());
        // compression buffer size should only be set if compression is enabled
        if (reader.getCompression() != org.apache.hadoop.hive.ql.io.orc.CompressionKind.NONE) {
            writerOptions.bufferSize(reader.getCompressionSize()).enforceBufferSize();
        }
        try (Writer writer = OrcFile.createWriter(recoveredPath, writerOptions)) {
            List<StripeInformation> stripes = reader.getStripes();
            List<StripeStatistics> stripeStats = reader.getOrcProtoStripeStatistics();
            try (FSDataInputStream inputStream = fs.open(inputPath)) {
                for (int idx = 0; idx < stripes.size(); ++idx) {
                    // initialize buffer to read the entire stripe.
                    StripeInformation stripe = stripes.get(idx);
                    int stripeLength = (int) stripe.getLength();
                    byte[] buffer = new byte[stripeLength];
                    inputStream.readFully(stripe.getOffset(), buffer, 0, stripeLength);
                    // append the stripe buffer to the new ORC file
                    writer.appendStripe(buffer, 0, buffer.length, stripe, stripeStats.get(idx));
                }
            }
            // Add the rest of the metadata keys.
            for (String metadataKey : reader.getMetadataKeys()) {
                if (!metadataKey.equals(OrcRecordUpdater.ACID_KEY_INDEX_NAME)) {
                    writer.addUserMetadata(metadataKey, reader.getMetadataValue(metadataKey));
                }
            }
            StringBuilder sb = new StringBuilder();
            validationResult.recordIdentifiers.stream().forEach(ri -> sb.append(ri.getWriteId()).append(",").append(ri.getBucketProperty()).append(",").append(ri.getRowId()).append(";"));
            // Finally add the fixed acid key index.
            writer.addUserMetadata(OrcRecordUpdater.ACID_KEY_INDEX_NAME, UTF8.encode(sb.toString()));
        }
    }
    // Confirm the file is really fixed, and replace the old file.
    AcidKeyIndexValidationResult fileFixed = validate(conf, recoveredPath);
    if (fileFixed.isValid) {
        Path backupDataPath;
        String scheme = inputPath.toUri().getScheme();
        String authority = inputPath.toUri().getAuthority();
        String filePath = inputPath.toUri().getPath();
        // use the same filesystem as input file if backup-path is not explicitly specified
        if (backup.equals(DEFAULT_BACKUP_PATH)) {
            backupDataPath = new Path(scheme, authority, DEFAULT_BACKUP_PATH + filePath);
        } else {
            backupDataPath = Path.mergePaths(new Path(backup), inputPath);
        }
        // Move data file to backup path
        moveFiles(fs, inputPath, backupDataPath);
        // finally move recovered file to actual file
        moveFiles(fs, recoveredPath, inputPath);
        System.out.println("Fixed acid key index for " + inputPath);
    } else {
        System.out.println("Unable to fix acid key index for " + inputPath);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileNotFoundException(java.io.FileNotFoundException) StripeStatistics(org.apache.orc.OrcProto.StripeStatistics) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) StripeInformation(org.apache.orc.StripeInformation)

Aggregations

StripeStatistics (org.apache.orc.OrcProto.StripeStatistics)2 StripeInformation (org.apache.orc.StripeInformation)2 FileNotFoundException (java.io.FileNotFoundException)1 ByteBuffer (java.nio.ByteBuffer)1 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 LlapBufferOrBuffers (org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers)1 OrcFileMetadata (org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata)1 RecordReaderImpl (org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl)1 OrcProto (org.apache.orc.OrcProto)1 FileTail (org.apache.orc.OrcProto.FileTail)1 OrcTail (org.apache.orc.impl.OrcTail)1 ReaderImpl (org.apache.orc.impl.ReaderImpl)1