Search in sources :

Example 6 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestVectorGroupByOperator method testAggregateLongIterable.

public void testAggregateLongIterable(String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("A");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.longTypeInfo);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
    FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(null, null);
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(1, outBatchList.size());
    Object result = outBatchList.get(0);
    Validator validator = getValidator(aggregateName);
    validator.validate("_total", expected, result);
}
Also used : ArrayList(java.util.ArrayList) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) FakeCaptureOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator)

Example 7 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestVectorGroupByOperator method testAggregateLongKeyIterable.

public void testAggregateLongKeyIterable(String aggregateName, Iterable<VectorizedRowBatch> data, HashMap<Object, Object> expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("Key");
    mapColumnNames.add("Value");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    Set<Object> keys = new HashSet<Object>();
    GroupByDesc desc = buildKeyGroupByDesc(ctx, aggregateName, "Value", TypeInfoFactory.longTypeInfo, "Key", TypeInfoFactory.longTypeInfo);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
    FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {

        private String aggregateName;

        private HashMap<Object, Object> expected;

        private Set<Object> keys;

        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            assertTrue(row instanceof Object[]);
            Object[] fields = (Object[]) row;
            assertEquals(2, fields.length);
            Object key = fields[0];
            Long keyValue = null;
            if (null != key) {
                assertTrue(key instanceof LongWritable);
                LongWritable lwKey = (LongWritable) key;
                keyValue = lwKey.get();
            }
            assertTrue(expected.containsKey(keyValue));
            String keyAsString = String.format("%s", key);
            Object expectedValue = expected.get(keyValue);
            Object value = fields[1];
            Validator validator = getValidator(aggregateName);
            validator.validate(keyAsString, expectedValue, new Object[] { value });
            keys.add(keyValue);
        }

        private FakeCaptureOutputOperator.OutputInspector init(String aggregateName, HashMap<Object, Object> expected, Set<Object> keys) {
            this.aggregateName = aggregateName;
            this.expected = expected;
            this.keys = keys;
            return this;
        }
    }.init(aggregateName, expected, keys));
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(expected.size(), outBatchList.size());
    assertEquals(expected.size(), keys.size());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) LongWritable(org.apache.hadoop.io.LongWritable) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) HashSet(java.util.HashSet) FakeCaptureOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator)

Example 8 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestVectorGroupByOperator method testMemoryPressureFlush.

@Test
public void testMemoryPressureFlush() throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("Key");
    mapColumnNames.add("Value");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    GroupByDesc desc = buildKeyGroupByDesc(ctx, "max", "Value", TypeInfoFactory.longTypeInfo, "Key", TypeInfoFactory.longTypeInfo);
    // Set the memory treshold so that we get 100Kb before we need to flush.
    MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
    long maxMemory = memoryMXBean.getHeapMemoryUsage().getMax();
    float treshold = 100.0f * 1024.0f / maxMemory;
    desc.setMemoryThreshold(treshold);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
    FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    this.outputRowCount = 0;
    out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {

        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            ++outputRowCount;
        }
    });
    Iterable<Object> it = new Iterable<Object>() {

        @Override
        public Iterator<Object> iterator() {
            return new Iterator<Object>() {

                long value = 0;

                @Override
                public boolean hasNext() {
                    return true;
                }

                @Override
                public Object next() {
                    return ++value;
                }

                @Override
                public void remove() {
                }
            };
        }
    };
    FakeVectorRowBatchFromObjectIterables data = new FakeVectorRowBatchFromObjectIterables(100, new String[] { "long", "long" }, it, it);
    // The 'it' data source will produce data w/o ever ending
    // We want to see that memory pressure kicks in and some
    // entries in the VGBY are flushed.
    long countRowsProduced = 0;
    for (VectorizedRowBatch unit : data) {
        countRowsProduced += 100;
        vgo.process(unit, 0);
        if (0 < outputRowCount) {
            break;
        }
        // Set an upper bound how much we're willing to push before it should flush
        // we've set the memory treshold at 100kb, each key is distinct
        // It should not go beyond 100k/16 (key+data)
        assertTrue(countRowsProduced < 100 * 1024 / 16);
    }
    assertTrue(0 < outputRowCount);
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) MemoryMXBean(java.lang.management.MemoryMXBean) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) Iterator(java.util.Iterator) FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) FakeCaptureOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator) Test(org.junit.Test)

Example 9 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestVectorGroupByOperator method testAggregateDecimalIterable.

public void testAggregateDecimalIterable(String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("A");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    GroupByDesc desc = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4));
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
    FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(1, outBatchList.size());
    Object result = outBatchList.get(0);
    Validator validator = getValidator(aggregateName);
    validator.validate("_total", expected, result);
}
Also used : ArrayList(java.util.ArrayList) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) FakeCaptureOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator)

Example 10 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestVectorLimitOperator method validateVectorLimitOperator.

private void validateVectorLimitOperator(int limit, int batchSize, int expectedBatchSize) throws HiveException {
    @SuppressWarnings("unchecked") FakeVectorRowBatchFromObjectIterables frboi = new FakeVectorRowBatchFromObjectIterables(batchSize, new String[] { "tinyint", "double" }, Arrays.asList(new Object[] { 1, 2, 3, 4 }), Arrays.asList(new Object[] { 323.0, 34.5, null, 89.3 }));
    // Get next batch
    VectorizedRowBatch vrb = frboi.produceNextBatch();
    // Create limit desc with limit value
    LimitDesc ld = new LimitDesc(limit);
    VectorLimitOperator lo = new VectorLimitOperator(new CompilationOpContext(), null, ld);
    lo.initialize(new Configuration(), null);
    // Process the batch
    lo.process(vrb, 0);
    // Verify batch size
    Assert.assertEquals(vrb.size, expectedBatchSize);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc)

Aggregations

CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)40 ArrayList (java.util.ArrayList)25 JobConf (org.apache.hadoop.mapred.JobConf)12 FakeCaptureOutputOperator (org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator)10 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)10 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)10 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)10 Operator (org.apache.hadoop.hive.ql.exec.Operator)8 HashMap (java.util.HashMap)7 LinkedHashMap (java.util.LinkedHashMap)7 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 HashSet (java.util.HashSet)5 Path (org.apache.hadoop.fs.Path)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 Set (java.util.Set)4 Configuration (org.apache.hadoop.conf.Configuration)4 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)4 Test (org.junit.Test)4