Search in sources :

Example 6 with ObjectRegistryImpl

use of org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl in project hive by apache.

the class TestLimitOperator method testGlobalLimitReachedInDaemonOrContainer.

private void testGlobalLimitReachedInDaemonOrContainer(boolean isDaemon, int offset, int limit) throws HiveException {
    // from FakeVectorRowBatchFromObjectIterables
    int numProcessedElements = 0;
    LlapProxy.setDaemon(isDaemon);
    if (!isDaemon) {
        // init tez object registry
        ObjectCache.setupObjectRegistry(new ObjectRegistryImpl());
    }
    HiveConf conf = new HiveConf();
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYID, "query-" + random.nextInt(10000));
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez");
    conf.set(TezProcessor.HIVE_TEZ_VERTEX_NAME, "Map 1");
    LimitOperator lo1 = new LimitOperator(new CompilationOpContext());
    lo1.setConf(new LimitDesc(offset, limit));
    lo1.initialize(conf, null);
    lo1.initializeOp(conf);
    LimitOperator lo2 = new LimitOperator(new CompilationOpContext());
    lo2.setConf(new LimitDesc(offset, limit));
    lo2.initialize(conf, null);
    lo2.initializeOp(conf);
    Assert.assertEquals(0, lo1.currCount);
    Assert.assertEquals(0, lo2.currCount);
    // operator id is important, as it's the base of the limit cache key
    // these operator instances represent the same operator running in different tasks
    Assert.assertEquals("LIM_0", lo1.getOperatorId());
    Assert.assertEquals("LIM_0", lo2.getOperatorId());
    // assertion no.1: unlike VectorLimitOperator, we op.process checks limit before every element,
    // so we can notice limit reached while processing the offset+limit+1st element, so op.getDone()
    // is true if we already processed at least limit + offset
    // assertion no.2: number of processed rows properly set to global cache and is equal to the
    // count by which op.process was called
    // assertion no.3: the local counter is in sync with the global counter (in this test case, no
    // other tasks work concurrently)
    // element: 1,2
    processRowNTimes(lo1, 2);
    numProcessedElements += 2;
    Assert.assertEquals(numProcessedElements > limit + offset, lo1.getDone());
    Assert.assertEquals(Math.min(numProcessedElements, limit + offset), lo1.getCurrentCount().get());
    Assert.assertEquals(lo1.getCurrentCount().get(), lo1.currCount);
    // element: 3
    processRowNTimes(lo1, 1);
    numProcessedElements += 1;
    Assert.assertEquals(numProcessedElements > limit + offset, lo1.getDone());
    Assert.assertEquals(Math.min(numProcessedElements, limit + offset), lo1.getCurrentCount().get());
    Assert.assertEquals(lo1.getCurrentCount().get(), lo1.currCount);
    // element: 4
    processRowNTimes(lo1, 1);
    numProcessedElements += 1;
    Assert.assertEquals(numProcessedElements > limit + offset, lo1.getDone());
    Assert.assertEquals(Math.min(numProcessedElements, limit + offset), lo1.getCurrentCount().get());
    Assert.assertEquals(lo1.getCurrentCount().get(), lo1.currCount);
    // if lo1 already processed enough rows, lo2 will turn to done without processing any elements
    // lo2.getCurrentCount().get() should return the same as lo1.getCurrentCount().get()
    Assert.assertEquals(Math.min(numProcessedElements, limit + offset), lo2.getCurrentCount().get());
    // ...but lo2's current count hasn't been touched yet, as process hasn't been called
    Assert.assertEquals(0, lo2.currCount);
    // getDone() = false before processing
    Assert.assertEquals(false, lo2.getDone());
    // try to process one more element with op2
    processRowNTimes(lo2, 1);
    // op2 will be noticed as done only if "numProcessedElements" (the number of elements processed
    // by lo1) is more than limit + offset + 1, because in that case lo2 has nothing to do
    boolean lo2DoneExpected = numProcessedElements > limit + offset + 1;
    Assert.assertEquals(lo2DoneExpected, lo2.getDone());
    // if lo2 is done, it hasn't processed any elements (currCount=0), otherwise it processed the
    // new element
    int lo2Count = lo2.currCount;
    Assert.assertEquals(lo2DoneExpected ? 0 : 1, lo2.currCount);
    // repeat once more (to test cases where limit+offset+1 < number of all elements to process
    processRowNTimes(lo2, 1);
    if (!lo2DoneExpected) {
        // if lo2 had the chance to process one more element (!done) ...
        // ... let's count that in
        numProcessedElements += 1;
        if (lo2.getDone()) {
            // turn to done after processing => hasn't processed any element
            Assert.assertEquals(lo2Count, lo2.currCount);
        } else {
            // hasn't turned to done after processing => processed 1 more element
            Assert.assertEquals(lo2Count + 1, lo2.currCount);
        }
    } else {
        // current count hasn't changed
        Assert.assertEquals(lo2Count, lo2.currCount);
    }
    lo2DoneExpected = numProcessedElements > limit + offset + 1;
    Assert.assertEquals(lo2DoneExpected, lo2.getDone());
}
Also used : CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ObjectRegistryImpl(org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl) HiveConf(org.apache.hadoop.hive.conf.HiveConf) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc)

Example 7 with ObjectRegistryImpl

use of org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl in project hive by apache.

the class TestVectorLimitOperator method testGlobalLimitReachedInDaemonOrContainer.

private void testGlobalLimitReachedInDaemonOrContainer(boolean isDaemon, int offset, int limit) throws HiveException {
    // from FakeVectorRowBatchFromObjectIterables
    int actualNumberOfElements = 4;
    LlapProxy.setDaemon(isDaemon);
    if (!isDaemon) {
        // init tez object registry
        ObjectCache.setupObjectRegistry(new ObjectRegistryImpl());
    }
    HiveConf conf = new HiveConf();
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVEQUERYID, "query-" + random.nextInt(10000));
    HiveConf.setVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "tez");
    conf.set(TezProcessor.HIVE_TEZ_VERTEX_NAME, "Map 1");
    VectorLimitOperator lo1 = new VectorLimitOperator(new CompilationOpContext(), new LimitDesc(offset, limit), null, new VectorLimitDesc());
    lo1.initialize(conf, null);
    lo1.initializeOp(conf);
    VectorLimitOperator lo2 = new VectorLimitOperator(new CompilationOpContext(), new LimitDesc(offset, limit), null, new VectorLimitDesc());
    lo2.initialize(conf, null);
    lo2.initializeOp(conf);
    // operator id is important, as it's the base of the limit cache key
    // these operator instances represent the same operator running in different tasks
    Assert.assertEquals("LIM_0", lo1.getOperatorId());
    Assert.assertEquals("LIM_0", lo2.getOperatorId());
    lo1.process(getBatch(500).produceNextBatch(), 0);
    // lo1 is not done, as that's not checked after forwarding, only before next batch
    Assert.assertFalse(lo1.getDone());
    // number of processed rows properly set to global cache and is equal to limit+offset or equal
    // to batch size if limit+offset > batch size (because the operator cannot read through the
    // current batch obviously)
    Assert.assertEquals(Math.min(limit + offset, actualNumberOfElements), lo1.getCurrentCount().get());
    // if lo1 already processed enough rows, lo2 will turn to done without processing any elements
    lo2.process(getBatch(500).produceNextBatch(), 0);
    Assert.assertEquals(limit + offset <= actualNumberOfElements ? true : false, lo2.getDone());
    // lo1 is done now, as limit is check before processing batch
    lo1.process(getBatch(500).produceNextBatch(), 0);
    Assert.assertTrue(lo1.getDone());
}
Also used : CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ObjectRegistryImpl(org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc)

Aggregations

ObjectRegistryImpl (org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl)7 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)3 LimitDesc (org.apache.hadoop.hive.ql.plan.LimitDesc)3 Configuration (org.apache.hadoop.conf.Configuration)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 VectorLimitDesc (org.apache.hadoop.hive.ql.plan.VectorLimitDesc)2 Test (org.junit.Test)2 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 VectorRandomBatchSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource)1 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)1 MapJoinTestData (org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData)1 MapJoinTestDescription (org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription)1 FakeVectorRowBatchFromObjectIterables (org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables)1 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)1 TezSharedExecutor (org.apache.tez.common.TezSharedExecutor)1 ProcessorDescriptor (org.apache.tez.dag.api.ProcessorDescriptor)1 TezDAGID (org.apache.tez.dag.records.TezDAGID)1 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)1 TezTaskID (org.apache.tez.dag.records.TezTaskID)1