Search in sources :

Example 1 with MetadataPpdResult

use of org.apache.hadoop.hive.metastore.api.MetadataPpdResult in project hive by apache.

the class ExternalCache method getAndValidate.

@Override
public void getAndValidate(List<HdfsFileStatusWithId> files, boolean isOriginal, OrcTail[] result, ByteBuffer[] ppdResult) throws IOException, HiveException {
    assert result.length == files.size();
    assert ppdResult == null || ppdResult.length == files.size();
    // First, check the local cache.
    localCache.getAndValidate(files, isOriginal, result, ppdResult);
    // posMap is an unfortunate consequence of batching/iterating thru MS results.
    HashMap<Long, Integer> posMap = new HashMap<Long, Integer>();
    // We won't do metastore-side PPD for the things we have locally.
    List<Long> fileIds = determineFileIdsToQuery(files, result, posMap);
    // Need to get a new one, see the comment wrt threadlocals.
    ExternalFooterCachesByConf.Cache cache = externalCacheSrc.getCache(conf);
    ByteBuffer serializedSarg = null;
    if (isPpdEnabled) {
        serializedSarg = getSerializedSargForMetastore(isOriginal);
    }
    if (serializedSarg != null) {
        Iterator<Entry<Long, MetadataPpdResult>> iter = cache.getFileMetadataByExpr(fileIds, serializedSarg, // don't fetch the footer, PPD happens in MS.
        false);
        while (iter.hasNext()) {
            Entry<Long, MetadataPpdResult> e = iter.next();
            int ix = getAndVerifyIndex(posMap, files, result, e.getKey());
            processPpdResult(e.getValue(), files.get(ix), ix, result, ppdResult);
        }
    } else {
        // Only populate corrupt IDs for the things we couldn't deserialize if we are not using
        // ppd. We assume that PPD makes sure the cached values are correct (or fails otherwise);
        // also, we don't use the footers in PPD case.
        List<Long> corruptIds = null;
        Iterator<Entry<Long, ByteBuffer>> iter = cache.getFileMetadata(fileIds);
        while (iter.hasNext()) {
            Entry<Long, ByteBuffer> e = iter.next();
            int ix = getAndVerifyIndex(posMap, files, result, e.getKey());
            if (!processBbResult(e.getValue(), ix, files.get(ix), result)) {
                if (corruptIds == null) {
                    corruptIds = new ArrayList<>();
                }
                corruptIds.add(e.getKey());
            }
        }
        if (corruptIds != null) {
            cache.clearFileMetadata(corruptIds);
        }
    }
}
Also used : HashMap(java.util.HashMap) MetadataPpdResult(org.apache.hadoop.hive.metastore.api.MetadataPpdResult) ByteBuffer(java.nio.ByteBuffer) Entry(java.util.Map.Entry)

Aggregations

ByteBuffer (java.nio.ByteBuffer)1 HashMap (java.util.HashMap)1 Entry (java.util.Map.Entry)1 MetadataPpdResult (org.apache.hadoop.hive.metastore.api.MetadataPpdResult)1