use of org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl in project hive by apache.
the class TestLimitOperator method testGlobalLimitReachedInDaemonOrContainer.
private void testGlobalLimitReachedInDaemonOrContainer(boolean isDaemon, int offset, int limit) throws HiveException {
// from FakeVectorRowBatchFromObjectIterables
int numProcessedElements = 0;
LlapProxy.setDaemon(isDaemon);
if (!isDaemon) {
// init tez object registry
ObjectCache.setupObjectRegistry(new ObjectRegistryImpl());
}
HiveConf conf = new HiveConf();
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYID, "query-" + random.nextInt(10000));
HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez");
conf.set(TezProcessor.HIVE_TEZ_VERTEX_NAME, "Map 1");
LimitOperator lo1 = new LimitOperator(new CompilationOpContext());
lo1.setConf(new LimitDesc(offset, limit));
lo1.initialize(conf, null);
lo1.initializeOp(conf);
LimitOperator lo2 = new LimitOperator(new CompilationOpContext());
lo2.setConf(new LimitDesc(offset, limit));
lo2.initialize(conf, null);
lo2.initializeOp(conf);
Assert.assertEquals(0, lo1.currCount);
Assert.assertEquals(0, lo2.currCount);
// operator id is important, as it's the base of the limit cache key
// these operator instances represent the same operator running in different tasks
Assert.assertEquals("LIM_0", lo1.getOperatorId());
Assert.assertEquals("LIM_0", lo2.getOperatorId());
// assertion no.1: unlike VectorLimitOperator, we op.process checks limit before every element,
// so we can notice limit reached while processing the offset+limit+1st element, so op.getDone()
// is true if we already processed at least limit + offset
// assertion no.2: number of processed rows properly set to global cache and is equal to the
// count by which op.process was called
// assertion no.3: the local counter is in sync with the global counter (in this test case, no
// other tasks work concurrently)
// element: 1,2
processRowNTimes(lo1, 2);
numProcessedElements += 2;
Assert.assertEquals(numProcessedElements > limit + offset, lo1.getDone());
Assert.assertEquals(Math.min(numProcessedElements, limit + offset), lo1.getCurrentCount().get());
Assert.assertEquals(lo1.getCurrentCount().get(), lo1.currCount);
// element: 3
processRowNTimes(lo1, 1);
numProcessedElements += 1;
Assert.assertEquals(numProcessedElements > limit + offset, lo1.getDone());
Assert.assertEquals(Math.min(numProcessedElements, limit + offset), lo1.getCurrentCount().get());
Assert.assertEquals(lo1.getCurrentCount().get(), lo1.currCount);
// element: 4
processRowNTimes(lo1, 1);
numProcessedElements += 1;
Assert.assertEquals(numProcessedElements > limit + offset, lo1.getDone());
Assert.assertEquals(Math.min(numProcessedElements, limit + offset), lo1.getCurrentCount().get());
Assert.assertEquals(lo1.getCurrentCount().get(), lo1.currCount);
// if lo1 already processed enough rows, lo2 will turn to done without processing any elements
// lo2.getCurrentCount().get() should return the same as lo1.getCurrentCount().get()
Assert.assertEquals(Math.min(numProcessedElements, limit + offset), lo2.getCurrentCount().get());
// ...but lo2's current count hasn't been touched yet, as process hasn't been called
Assert.assertEquals(0, lo2.currCount);
// getDone() = false before processing
Assert.assertEquals(false, lo2.getDone());
// try to process one more element with op2
processRowNTimes(lo2, 1);
// op2 will be noticed as done only if "numProcessedElements" (the number of elements processed
// by lo1) is more than limit + offset + 1, because in that case lo2 has nothing to do
boolean lo2DoneExpected = numProcessedElements > limit + offset + 1;
Assert.assertEquals(lo2DoneExpected, lo2.getDone());
// if lo2 is done, it hasn't processed any elements (currCount=0), otherwise it processed the
// new element
int lo2Count = lo2.currCount;
Assert.assertEquals(lo2DoneExpected ? 0 : 1, lo2.currCount);
// repeat once more (to test cases where limit+offset+1 < number of all elements to process
processRowNTimes(lo2, 1);
if (!lo2DoneExpected) {
// if lo2 had the chance to process one more element (!done) ...
// ... let's count that in
numProcessedElements += 1;
if (lo2.getDone()) {
// turn to done after processing => hasn't processed any element
Assert.assertEquals(lo2Count, lo2.currCount);
} else {
// hasn't turned to done after processing => processed 1 more element
Assert.assertEquals(lo2Count + 1, lo2.currCount);
}
} else {
// current count hasn't changed
Assert.assertEquals(lo2Count, lo2.currCount);
}
lo2DoneExpected = numProcessedElements > limit + offset + 1;
Assert.assertEquals(lo2DoneExpected, lo2.getDone());
}
use of org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl in project hive by apache.
the class TestVectorLimitOperator method testGlobalLimitReachedInDaemonOrContainer.
private void testGlobalLimitReachedInDaemonOrContainer(boolean isDaemon, int offset, int limit) throws HiveException {
// from FakeVectorRowBatchFromObjectIterables
int actualNumberOfElements = 4;
LlapProxy.setDaemon(isDaemon);
if (!isDaemon) {
// init tez object registry
ObjectCache.setupObjectRegistry(new ObjectRegistryImpl());
}
HiveConf conf = new HiveConf();
HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYID, "query-" + random.nextInt(10000));
HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez");
conf.set(TezProcessor.HIVE_TEZ_VERTEX_NAME, "Map 1");
VectorLimitOperator lo1 = new VectorLimitOperator(new CompilationOpContext(), new LimitDesc(offset, limit), null, new VectorLimitDesc());
lo1.initialize(conf, null);
lo1.initializeOp(conf);
VectorLimitOperator lo2 = new VectorLimitOperator(new CompilationOpContext(), new LimitDesc(offset, limit), null, new VectorLimitDesc());
lo2.initialize(conf, null);
lo2.initializeOp(conf);
// operator id is important, as it's the base of the limit cache key
// these operator instances represent the same operator running in different tasks
Assert.assertEquals("LIM_0", lo1.getOperatorId());
Assert.assertEquals("LIM_0", lo2.getOperatorId());
lo1.process(getBatch(500).produceNextBatch(), 0);
// lo1 is not done, as that's not checked after forwarding, only before next batch
Assert.assertFalse(lo1.getDone());
// number of processed rows properly set to global cache and is equal to limit+offset or equal
// to batch size if limit+offset > batch size (because the operator cannot read through the
// current batch obviously)
Assert.assertEquals(Math.min(limit + offset, actualNumberOfElements), lo1.getCurrentCount().get());
// if lo1 already processed enough rows, lo2 will turn to done without processing any elements
lo2.process(getBatch(500).produceNextBatch(), 0);
Assert.assertEquals(limit + offset <= actualNumberOfElements ? true : false, lo2.getDone());
// lo1 is done now, as limit is check before processing batch
lo1.process(getBatch(500).produceNextBatch(), 0);
Assert.assertTrue(lo1.getDone());
}
Aggregations