use of org.apache.hadoop.hive.common.io.CacheTag in project hive by apache.
the class VectorizedReadUtils method getSerializedOrcTail.
/**
* Opens the ORC inputFile and reads the metadata information to construct a byte buffer with OrcTail content.
* @param inputFile - the original ORC file - this needs to be accessed to retrieve the original schema for mapping
* @param job - JobConf instance to adjust
* @param fileId - FileID for the input file, serves as cache key in an LLAP setup
* @throws IOException - errors relating to accessing the ORC file
*/
public static ByteBuffer getSerializedOrcTail(InputFile inputFile, SyntheticFileId fileId, JobConf job) throws IOException {
ByteBuffer result = null;
if (HiveConf.getBoolVar(job, HiveConf.ConfVars.LLAP_IO_ENABLED, LlapProxy.isDaemon()) && LlapProxy.getIo() != null) {
MapWork mapWork = LlapHiveUtils.findMapWork(job);
Path path = new Path(inputFile.location());
PartitionDesc partitionDesc = LlapHiveUtils.partitionDescForPath(path, mapWork.getPathToPartitionInfo());
// Note: Since Hive doesn't know about partition information of Iceberg tables, partitionDesc is only used to
// deduct the table (and DB) name here.
CacheTag cacheTag = HiveConf.getBoolVar(job, HiveConf.ConfVars.LLAP_TRACK_CACHE_USAGE) ? LlapHiveUtils.getDbAndTableNameForMetrics(path, true, partitionDesc) : null;
try {
// Schema has to be serialized and deserialized as it is passed between different packages of TypeDescription:
// Iceberg expects org.apache.hive.iceberg.org.apache.orc.TypeDescription as it shades ORC, while LLAP provides
// the unshaded org.apache.orc.TypeDescription type.
BufferChunk tailBuffer = LlapProxy.getIo().getOrcTailFromCache(path, job, cacheTag, fileId).getTailBuffer();
result = tailBuffer.getData();
} catch (IOException ioe) {
LOG.warn("LLAP is turned on but was unable to get file metadata information through its cache for {}", path, ioe);
}
}
// Fallback to simple ORC reader file opening method in lack of or failure of LLAP.
if (result == null) {
try (ReaderImpl orcFileReader = (ReaderImpl) ORC.newFileReader(inputFile, job)) {
result = orcFileReader.getSerializedFileFooter();
}
}
return result;
}
use of org.apache.hadoop.hive.common.io.CacheTag in project hive by apache.
the class TestOrcMetadataCache method testGetOrcTailForPath.
@Test
public void testGetOrcTailForPath() throws Exception {
DummyMemoryManager mm = new DummyMemoryManager();
DummyCachePolicy cp = new DummyCachePolicy();
final int MAX_ALLOC = 64;
LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
BuddyAllocator alloc = new BuddyAllocator(false, false, 8, MAX_ALLOC, 1, 4 * 4096, 0, null, mm, metrics, null, true);
MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
Path path = new Path("../data/files/alltypesorc");
Configuration jobConf = new Configuration();
Configuration daemonConf = new Configuration();
CacheTag tag = CacheTag.build("test-table");
OrcTail uncached = OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, null);
jobConf.set(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname, "true");
OrcTail cached = OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, null);
assertEquals(uncached.getSerializedTail(), cached.getSerializedTail());
assertEquals(uncached.getFileTail(), cached.getFileTail());
}
use of org.apache.hadoop.hive.common.io.CacheTag in project hive by apache.
the class TestOrcMetadataCache method testGetOrcTailForPathWithFileIdChange.
@Test
public void testGetOrcTailForPathWithFileIdChange() throws Exception {
DummyMemoryManager mm = new DummyMemoryManager();
DummyCachePolicy cp = new DummyCachePolicy();
final int MAX_ALLOC = 64;
LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
BuddyAllocator alloc = new BuddyAllocator(false, false, 8, MAX_ALLOC, 1, 4 * 4096, 0, null, mm, metrics, null, true);
MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
Path path = new Path("../data/files/alltypesorc");
Configuration jobConf = new Configuration();
Configuration daemonConf = new Configuration();
CacheTag tag = CacheTag.build("test-table");
OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, new SyntheticFileId(path, 100, 100));
jobConf.set(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname, "true");
Exception ex = null;
try {
// this should miss the cache, since the fileKey changed
OrcEncodedDataReader.getOrcTailForPath(path, jobConf, tag, daemonConf, cache, new SyntheticFileId(path, 100, 101));
fail();
} catch (IOException e) {
ex = e;
}
Assert.assertTrue(ex.getMessage().contains(HiveConf.ConfVars.LLAP_IO_CACHE_ONLY.varname));
}
use of org.apache.hadoop.hive.common.io.CacheTag in project hive by apache.
the class TestOrcMetadataCache method testProactiveEvictionMark.
@Test
public void testProactiveEvictionMark() throws Exception {
DummyMemoryManager mm = new DummyMemoryManager();
DummyCachePolicy cp = new DummyCachePolicy();
final int MAX_ALLOC = 64;
LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("", "");
BuddyAllocator alloc = new BuddyAllocator(false, false, 8, MAX_ALLOC, 1, 4096, 0, null, mm, metrics, null, true);
MetadataCache cache = new MetadataCache(alloc, mm, cp, true, metrics);
long fn1 = 1;
long fn2 = 2;
long fn3 = 3;
AtomicBoolean isStopped = new AtomicBoolean(false);
// Case for when metadata consists of just 1 buffer (most of the realworld cases)
ByteBuffer bb = ByteBuffer.wrap("small-meta-data-content".getBytes());
// Case for when metadata consists of multiple buffers (rare case), (max allocation is 64 hence the test data
// below is of length 65
ByteBuffer bb2 = ByteBuffer.wrap("-large-meta-data-content-large-meta-data-content-large-meta-data-".getBytes());
LlapBufferOrBuffers table1Buffers1 = cache.putFileMetadata(fn1, bb, CacheTag.build("default.table1"), isStopped);
assertNotNull(table1Buffers1.getSingleLlapBuffer());
LlapBufferOrBuffers table1Buffers2 = cache.putFileMetadata(fn2, bb2, CacheTag.build("default.table1"), isStopped);
assertNotNull(table1Buffers2.getMultipleLlapBuffers());
assertEquals(2, table1Buffers2.getMultipleLlapBuffers().length);
// Case for when metadata consists of just 1 buffer (most of the realworld cases)
ByteBuffer bb3 = ByteBuffer.wrap("small-meta-data-content-for-otherFile".getBytes());
LlapBufferOrBuffers table2Buffers1 = cache.putFileMetadata(fn3, bb3, CacheTag.build("default.table2"), isStopped);
assertNotNull(table2Buffers1.getSingleLlapBuffer());
Predicate<CacheTag> predicate = tag -> "default.table1".equals(tag.getTableName());
// Simulating eviction on some buffers
table1Buffers2.getMultipleLlapBuffers()[1].decRef();
assertEquals(INVALIDATE_OK, table1Buffers2.getMultipleLlapBuffers()[1].invalidate());
// table1Buffers1:27 (allocated as 32) + table1Buffers2[0]:64 (also allocated as 64)
assertEquals(96, cache.markBuffersForProactiveEviction(predicate, false));
// Single buffer for file1 should be marked as per predicate
assertTrue(table1Buffers1.getSingleLlapBuffer().isMarkedForEviction());
// Multi buffer for file2 should be partially marked as per predicate and prior eviction
assertTrue(table1Buffers2.getMultipleLlapBuffers()[0].isMarkedForEviction());
assertFalse(table1Buffers2.getMultipleLlapBuffers()[1].isMarkedForEviction());
// Single buffer for file3 should not be marked as per predicate
assertFalse(table2Buffers1.getSingleLlapBuffer().isMarkedForEviction());
}
use of org.apache.hadoop.hive.common.io.CacheTag in project hive by apache.
the class TestCacheContentsTracker method testParentCacheTagGeneration.
/**
* Tests parent CacheTag generation by checking each step when traversing from 3rd level
* partition to DB level.
*/
@Test
public void testParentCacheTagGeneration() {
CacheTag db = cacheTagBuilder("dbname");
CacheTag table = cacheTagBuilder("dbname.tablename");
CacheTag p = cacheTagBuilder("dbname.tablename", "p=v1");
CacheTag pp = cacheTagBuilder("dbname.tablename", "p=v1", "pp=vv1");
CacheTag ppp = cacheTagBuilder("dbname.tablename", "p=v1", "pp=vv1", "ppp=vvv1");
assertTrue(pp.compareTo(CacheTag.createParentCacheTag(ppp)) == 0);
assertTrue(p.compareTo(CacheTag.createParentCacheTag(pp)) == 0);
assertTrue(table.compareTo(CacheTag.createParentCacheTag(p)) == 0);
assertTrue(db.compareTo(CacheTag.createParentCacheTag(table)) == 0);
assertNull(CacheTag.createParentCacheTag(db));
}
Aggregations