Search in sources :

Example 1 with CacheTag

use of org.apache.hadoop.hive.common.io.CacheTag in project hive by apache.

the class VectorizedReadUtils method getSerializedOrcTail.

/**
 * Opens the ORC inputFile and reads the metadata information to construct a byte buffer with OrcTail content.
 * @param inputFile - the original ORC file - this needs to be accessed to retrieve the original schema for mapping
 * @param job - JobConf instance to adjust
 * @param fileId - FileID for the input file, serves as cache key in an LLAP setup
 * @throws IOException - errors relating to accessing the ORC file
 */
public static ByteBuffer getSerializedOrcTail(InputFile inputFile, SyntheticFileId fileId, JobConf job) throws IOException {
    ByteBuffer result = null;
    if (HiveConf.getBoolVar(job, HiveConf.ConfVars.LLAP_IO_ENABLED, LlapProxy.isDaemon()) && LlapProxy.getIo() != null) {
        MapWork mapWork = LlapHiveUtils.findMapWork(job);
        Path path = new Path(inputFile.location());
        PartitionDesc partitionDesc = LlapHiveUtils.partitionDescForPath(path, mapWork.getPathToPartitionInfo());
        // Note: Since Hive doesn't know about partition information of Iceberg tables, partitionDesc is only used to
        // deduct the table (and DB) name here.
        CacheTag cacheTag = HiveConf.getBoolVar(job, HiveConf.ConfVars.LLAP_TRACK_CACHE_USAGE) ? LlapHiveUtils.getDbAndTableNameForMetrics(path, true, partitionDesc) : null;
        try {
            // Schema has to be serialized and deserialized as it is passed between different packages of TypeDescription:
            // Iceberg expects org.apache.hive.iceberg.org.apache.orc.TypeDescription as it shades ORC, while LLAP provides
            // the unshaded org.apache.orc.TypeDescription type.
            BufferChunk tailBuffer = LlapProxy.getIo().getOrcTailFromCache(path, job, cacheTag, fileId).getTailBuffer();
            result = tailBuffer.getData();
        } catch (IOException ioe) {
            LOG.warn("LLAP is turned on but was unable to get file metadata information through its cache for {}", path, ioe);
        }
    }
    // Fallback to simple ORC reader file opening method in lack of or failure of LLAP.
    if (result == null) {
        try (ReaderImpl orcFileReader = (ReaderImpl) ORC.newFileReader(inputFile, job)) {
            result = orcFileReader.getSerializedFileFooter();
        }
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) ReaderImpl(org.apache.hive.iceberg.org.apache.orc.impl.ReaderImpl) CacheTag(org.apache.hadoop.hive.common.io.CacheTag) IOException(java.io.IOException) BufferChunk(org.apache.orc.impl.BufferChunk) ByteBuffer(java.nio.ByteBuffer)

Example 2 with CacheTag

use of org.apache.hadoop.hive.common.io.CacheTag in project hive by apache.

the class TestOrcMetadataCache method testGetOrcTailForPath.

@Test
public void testGetOrcTailForPath() throws Exception {
    DummyMemoryManager mm = new DummyMemoryManager();
    DummyCachePolicy cp = new DummyCachePolicy();
    final int MAX_ALLOC = 64;
    LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
    BuddyAllocator alloc = new BuddyAllocator(false, false, 8, MAX_ALLOC, 1, 4 * 4096, 0, null, mm, metrics, null, true);
    MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
    Path path = new Path("../data/files/alltypesorc");
    Configuration jobConf = new Configuration();
    Configuration daemonConf = new Configuration();
    CacheTag tag = CacheTag.build("test-table");
    OrcTail uncached = OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, null);
    jobConf.set(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname, "true");
    OrcTail cached = OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, null);
    assertEquals(uncached.getSerializedTail(), cached.getSerializedTail());
    assertEquals(uncached.getFileTail(), cached.getFileTail());
}
Also used : Path(org.apache.hadoop.fs.Path) LlapDaemonCacheMetrics(org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics) Configuration(org.apache.hadoop.conf.Configuration) MetadataCache(org.apache.hadoop.hive.llap.io.metadata.MetadataCache) CacheTag(org.apache.hadoop.hive.common.io.CacheTag) OrcTail(org.apache.orc.impl.OrcTail) Test(org.junit.Test)

Example 3 with CacheTag

use of org.apache.hadoop.hive.common.io.CacheTag in project hive by apache.

the class TestOrcMetadataCache method testGetOrcTailForPathWithFileIdChange.

@Test
public void testGetOrcTailForPathWithFileIdChange() throws Exception {
    DummyMemoryManager mm = new DummyMemoryManager();
    DummyCachePolicy cp = new DummyCachePolicy();
    final int MAX_ALLOC = 64;
    LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
    BuddyAllocator alloc = new BuddyAllocator(false, false, 8, MAX_ALLOC, 1, 4 * 4096, 0, null, mm, metrics, null, true);
    MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
    Path path = new Path("../data/files/alltypesorc");
    Configuration jobConf = new Configuration();
    Configuration daemonConf = new Configuration();
    CacheTag tag = CacheTag.build("test-table");
    OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, new SyntheticFileId(path, 100, 100));
    jobConf.set(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname, "true");
    Exception ex = null;
    try {
        // this should miss the cache, since the fileKey changed
        OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, new SyntheticFileId(path, 100, 101));
        fail();
    } catch (IOException e) {
        ex = e;
    }
    Assert.assertTrue(ex.getMessage().contains(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname));
}
Also used : Path(org.apache.hadoop.fs.Path) LlapDaemonCacheMetrics(org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics) Configuration(org.apache.hadoop.conf.Configuration) SyntheticFileId(org.apache.hadoop.hive.ql.io.SyntheticFileId) MetadataCache(org.apache.hadoop.hive.llap.io.metadata.MetadataCache) CacheTag(org.apache.hadoop.hive.common.io.CacheTag) IOException(java.io.IOException) IllegalCacheConfigurationException(org.apache.hadoop.hive.llap.IllegalCacheConfigurationException) IOException(java.io.IOException) Test(org.junit.Test)

Example 4 with CacheTag

use of org.apache.hadoop.hive.common.io.CacheTag in project hive by apache.

the class TestOrcMetadataCache method testProactiveEvictionMark.

@Test
public void testProactiveEvictionMark() throws Exception {
    DummyMemoryManager mm = new DummyMemoryManager();
    DummyCachePolicy cp = new DummyCachePolicy();
    final int MAX_ALLOC = 64;
    LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
    BuddyAllocator alloc = new BuddyAllocator(false, false, 8, MAX_ALLOC, 1, 4096, 0, null, mm, metrics, null, true);
    MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
    long fn1 = 1;
    long fn2 = 2;
    long fn3 = 3;
    AtomicBoolean isStopped = new AtomicBoolean(false);
    // Case for when metadata consists of just 1 buffer (most of the realworld cases)
    ByteBuffer bb = ByteBuffer.wrap("small-meta-data-content".getBytes());
    // Case for when metadata consists of multiple buffers (rare case), (max allocation is 64 hence the test data
    // below is of length 65
    ByteBuffer bb2 = ByteBuffer.wrap("-large-meta-data-content-large-meta-data-content-large-meta-data-".getBytes());
    LlapBufferOrBuffers table1Buffers1 = cache.putFileMetadata(fn1, bb, CacheTag.build("default.table1"), isStopped);
    assertNotNull(table1Buffers1.getSingleLlapBuffer());
    LlapBufferOrBuffers table1Buffers2 = cache.putFileMetadata(fn2, bb2, CacheTag.build("default.table1"), isStopped);
    assertNotNull(table1Buffers2.getMultipleLlapBuffers());
    assertEquals(2, table1Buffers2.getMultipleLlapBuffers().length);
    // Case for when metadata consists of just 1 buffer (most of the realworld cases)
    ByteBuffer bb3 = ByteBuffer.wrap("small-meta-data-content-for-otherFile".getBytes());
    LlapBufferOrBuffers table2Buffers1 = cache.putFileMetadata(fn3, bb3, CacheTag.build("default.table2"), isStopped);
    assertNotNull(table2Buffers1.getSingleLlapBuffer());
    Predicate<CacheTag> predicate = tag -> "default.table1".equals(tag.getTableName());
    // Simulating eviction on some buffers
    table1Buffers2.getMultipleLlapBuffers()[1].decRef();
    assertEquals(INVALIDATE_OK, table1Buffers2.getMultipleLlapBuffers()[1].invalidate());
    // table1Buffers1:27 (allocated as 32) + table1Buffers2[0]:64 (also allocated as 64)
    assertEquals(96, cache.markBuffersForProactiveEviction(predicate, false));
    // Single buffer for file1 should be marked as per predicate
    assertTrue(table1Buffers1.getSingleLlapBuffer().isMarkedForEviction());
    // Multi buffer for file2 should be partially marked as per predicate and prior eviction
    assertTrue(table1Buffers2.getMultipleLlapBuffers()[0].isMarkedForEviction());
    assertFalse(table1Buffers2.getMultipleLlapBuffers()[1].isMarkedForEviction());
    // Single buffer for file3 should not be marked as per predicate
    assertFalse(table2Buffers1.getSingleLlapBuffer().isMarkedForEviction());
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) OrcEncodedDataReader(org.apache.hadoop.hive.llap.io.encoded.OrcEncodedDataReader) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Random(java.util.Random) LlapBufferOrBuffers(org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers) FileStatus(org.apache.hadoop.fs.FileStatus) Priority(org.apache.hadoop.hive.llap.cache.LowLevelCache.Priority) ByteBuffer(java.nio.ByteBuffer) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) SyntheticFileId(org.apache.hadoop.hive.ql.io.SyntheticFileId) CacheTag(org.apache.hadoop.hive.common.io.CacheTag) DiskRange(org.apache.hadoop.hive.common.io.DiskRange) DiskRangeList(org.apache.hadoop.hive.common.io.DiskRangeList) IncompleteCb(org.apache.hadoop.hive.ql.io.orc.encoded.IncompleteCb) MetadataCache(org.apache.hadoop.hive.llap.io.metadata.MetadataCache) INVALIDATE_OK(org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer.INVALIDATE_OK) Predicate(java.util.function.Predicate) IllegalCacheConfigurationException(org.apache.hadoop.hive.llap.IllegalCacheConfigurationException) HiveConf(org.apache.hadoop.hive.conf.HiveConf) OrcTail(org.apache.orc.impl.OrcTail) IOException(java.io.IOException) Test(org.junit.Test) DataCache(org.apache.hadoop.hive.common.io.DataCache) LlapMetadataBuffer(org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapMetadataBuffer) LlapDaemonCacheMetrics(org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics) Assert(org.junit.Assert) MetadataCache(org.apache.hadoop.hive.llap.io.metadata.MetadataCache) ByteBuffer(java.nio.ByteBuffer) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) LlapDaemonCacheMetrics(org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics) CacheTag(org.apache.hadoop.hive.common.io.CacheTag) LlapBufferOrBuffers(org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers) Test(org.junit.Test)

Example 5 with CacheTag

use of org.apache.hadoop.hive.common.io.CacheTag in project hive by apache.

the class TestCacheContentsTracker method testParentCacheTagGeneration.

/**
 * Tests parent CacheTag generation by checking each step when traversing from 3rd level
 * partition to DB level.
 */
@Test
public void testParentCacheTagGeneration() {
    CacheTag db = cacheTagBuilder("dbname");
    CacheTag table = cacheTagBuilder("dbname.tablename");
    CacheTag p = cacheTagBuilder("dbname.tablename", "p=v1");
    CacheTag pp = cacheTagBuilder("dbname.tablename", "p=v1", "pp=vv1");
    CacheTag ppp = cacheTagBuilder("dbname.tablename", "p=v1", "pp=vv1", "ppp=vvv1");
    assertTrue(pp.compareTo(CacheTag.createParentCacheTag(ppp)) == 0);
    assertTrue(p.compareTo(CacheTag.createParentCacheTag(pp)) == 0);
    assertTrue(table.compareTo(CacheTag.createParentCacheTag(p)) == 0);
    assertTrue(db.compareTo(CacheTag.createParentCacheTag(table)) == 0);
    assertNull(CacheTag.createParentCacheTag(db));
}
Also used : CacheTag(org.apache.hadoop.hive.common.io.CacheTag) Test(org.junit.Test)

Aggregations

CacheTag (org.apache.hadoop.hive.common.io.CacheTag)12 Test (org.junit.Test)8 Path (org.apache.hadoop.fs.Path)7 LlapDaemonCacheMetrics (org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics)5 Configuration (org.apache.hadoop.conf.Configuration)4 MetadataCache (org.apache.hadoop.hive.llap.io.metadata.MetadataCache)4 IOException (java.io.IOException)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 DiskRangeList (org.apache.hadoop.hive.common.io.DiskRangeList)3 SyntheticFileId (org.apache.hadoop.hive.ql.io.SyntheticFileId)3 OrcTail (org.apache.orc.impl.OrcTail)3 ByteBuffer (java.nio.ByteBuffer)2 Random (java.util.Random)2 Predicate (java.util.function.Predicate)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 DiskRange (org.apache.hadoop.hive.common.io.DiskRange)2 IllegalCacheConfigurationException (org.apache.hadoop.hive.llap.IllegalCacheConfigurationException)2 INVALIDATE_OK (org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer.INVALIDATE_OK)2 Priority (org.apache.hadoop.hive.llap.cache.LowLevelCache.Priority)2 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)2