Search in sources :

Example 11 with FakeVectorRowBatchFromObjectIterables

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.

the class TestVectorGroupByOperator method testAggregateDouble.

public void testAggregateDouble(String aggregateName, int batchSize, Iterable<Object> values, Object expected) throws HiveException {
    @SuppressWarnings("unchecked") FakeVectorRowBatchFromObjectIterables fdr = new FakeVectorRowBatchFromObjectIterables(batchSize, new String[] { "double" }, values);
    testAggregateDoubleIterable(aggregateName, fdr, expected);
}
Also used : FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables)

Example 12 with FakeVectorRowBatchFromObjectIterables

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.

the class TestVectorGroupByOperator method testRollupAggregation.

@Test
public void testRollupAggregation() throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("k1");
    mapColumnNames.add("k2");
    mapColumnNames.add("v");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    // select count(v) from name group by rollup (k1,k2);
    Pair<GroupByDesc, VectorGroupByDesc> pair = buildKeyGroupByDesc(ctx, "count", "v", TypeInfoFactory.longTypeInfo, new String[] { "k1", "k2" }, new TypeInfo[] { TypeInfoFactory.longTypeInfo, TypeInfoFactory.longTypeInfo });
    GroupByDesc desc = pair.left;
    VectorGroupByDesc vectorDesc = pair.right;
    desc.setGroupingSetsPresent(true);
    ArrayList<Long> groupingSets = new ArrayList<>();
    // groupingSets
    groupingSets.add(0L);
    groupingSets.add(1L);
    groupingSets.add(2L);
    desc.setListGroupingSets(groupingSets);
    // add grouping sets dummy key
    ExprNodeDesc groupingSetDummyKey = new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, 0L);
    // this only works because we used an arraylist in buildKeyGroupByDesc
    // don't do this in actual compiler
    desc.getKeys().add(groupingSetDummyKey);
    // groupingSet Position
    desc.setGroupingSetPosition(2);
    CompilationOpContext cCtx = new CompilationOpContext();
    desc.setMinReductionHashAggr(0.5f);
    // Set really low check interval setting
    hconf.set("hive.groupby.mapaggr.checkinterval", "10");
    hconf.set("hive.vectorized.groupby.checkinterval", "10");
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    this.outputRowCount = 0;
    out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {

        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            ++outputRowCount;
        }
    });
    // vrb of 1 row each
    FakeVectorRowBatchFromObjectIterables data = getDataForRollup();
    long countRowsProduced = 0;
    for (VectorizedRowBatch unit : data) {
        // after 24 rows, we'd have seen all the keys
        // find 14 keys in the hashmap
        // but 24*0.5 = 12
        // won't turn off hash mode because of the 3 grouping sets
        // if it turns off the hash mode, we'd get 14 + 3*(100-24) rows
        countRowsProduced += unit.size;
        vgo.process(unit, 0);
        if (countRowsProduced >= 100) {
            break;
        }
    }
    vgo.close(false);
    // all groupings
    // 10 keys generates 14 rows with the rollup
    assertEquals(1 + 3 + 10, outputRowCount);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) Test(org.junit.Test)

Example 13 with FakeVectorRowBatchFromObjectIterables

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables in project hive by apache.

the class TestVectorGroupByOperator method testAggregateStringKeyAggregate.

public void testAggregateStringKeyAggregate(String aggregateName, int batchSize, Iterable<Object> list, Iterable<Object> values, HashMap<Object, Object> expected) throws HiveException {
    @SuppressWarnings("unchecked") FakeVectorRowBatchFromObjectIterables fdr = new FakeVectorRowBatchFromObjectIterables(batchSize, new String[] { "string", "long" }, list, values);
    testAggregateStringKeyIterable(aggregateName, fdr, TypeInfoFactory.longTypeInfo, expected);
}
Also used : FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables)

Aggregations

FakeVectorRowBatchFromObjectIterables (org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables)13 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)7 Test (org.junit.Test)7 ArrayList (java.util.ArrayList)6 FakeCaptureVectorToRowOutputOperator (org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator)6 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)6 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)6 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)5 Iterator (java.util.Iterator)3 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)3 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)3 MemoryMXBean (java.lang.management.MemoryMXBean)2 Configuration (org.apache.hadoop.conf.Configuration)1 VectorAggregateExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression)1 LimitDesc (org.apache.hadoop.hive.ql.plan.LimitDesc)1 VectorLimitDesc (org.apache.hadoop.hive.ql.plan.VectorLimitDesc)1 ObjectRegistryImpl (org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl)1