Search in sources :

Example 1 with FakeCaptureVectorToRowOutputOperator

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator in project hive by apache.

the class TestVectorGroupByOperator method testAggregateLongIterable.

public void testAggregateLongIterable(String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("A");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    Pair<GroupByDesc, VectorGroupByDesc> pair = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.longTypeInfo);
    GroupByDesc desc = pair.fst;
    VectorGroupByDesc vectorDesc = pair.snd;
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(1, outBatchList.size());
    Object result = outBatchList.get(0);
    Validator validator = getValidator(aggregateName);
    validator.validate("_total", expected, result);
}
Also used : ArrayList(java.util.ArrayList) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 2 with FakeCaptureVectorToRowOutputOperator

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator in project hive by apache.

the class TestVectorGroupByOperator method testAggregateDecimalIterable.

public void testAggregateDecimalIterable(String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("A");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    Pair<GroupByDesc, VectorGroupByDesc> pair = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.getDecimalTypeInfo(30, 4));
    GroupByDesc desc = pair.fst;
    VectorGroupByDesc vectorDesc = pair.snd;
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(1, outBatchList.size());
    Object result = outBatchList.get(0);
    Validator validator = getValidator(aggregateName);
    validator.validate("_total", expected, result);
}
Also used : ArrayList(java.util.ArrayList) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 3 with FakeCaptureVectorToRowOutputOperator

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator in project hive by apache.

the class TestVectorGroupByOperator method testAggregateCountReduceIterable.

public void testAggregateCountReduceIterable(Iterable<VectorizedRowBatch> data, Object expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("A");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    Pair<GroupByDesc, VectorGroupByDesc> pair = buildGroupByDescType(ctx, "count", GenericUDAFEvaluator.Mode.FINAL, "A", TypeInfoFactory.longTypeInfo);
    GroupByDesc desc = pair.fst;
    VectorGroupByDesc vectorDesc = pair.snd;
    // Use GLOBAL when no key for Reduce.
    vectorDesc.setProcessingMode(ProcessingMode.GLOBAL);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(1, outBatchList.size());
    Object result = outBatchList.get(0);
    Validator validator = getValidator("count");
    validator.validate("_total", expected, result);
}
Also used : ArrayList(java.util.ArrayList) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 4 with FakeCaptureVectorToRowOutputOperator

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator in project hive by apache.

the class TestVectorGroupByOperator method testAggregateStringKeyIterable.

public void testAggregateStringKeyIterable(String aggregateName, Iterable<VectorizedRowBatch> data, TypeInfo dataTypeInfo, HashMap<Object, Object> expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("Key");
    mapColumnNames.add("Value");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    Set<Object> keys = new HashSet<Object>();
    Pair<GroupByDesc, VectorGroupByDesc> pair = buildKeyGroupByDesc(ctx, aggregateName, "Value", dataTypeInfo, "Key", TypeInfoFactory.stringTypeInfo);
    GroupByDesc desc = pair.fst;
    VectorGroupByDesc vectorDesc = pair.snd;
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {

        private int rowIndex;

        private String aggregateName;

        private HashMap<Object, Object> expected;

        private Set<Object> keys;

        @SuppressWarnings("deprecation")
        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            assertTrue(row instanceof Object[]);
            Object[] fields = (Object[]) row;
            assertEquals(2, fields.length);
            Object key = fields[0];
            String keyValue = null;
            if (null != key) {
                assertTrue(key instanceof Text);
                Text bwKey = (Text) key;
                keyValue = bwKey.toString();
            }
            assertTrue(expected.containsKey(keyValue));
            Object expectedValue = expected.get(keyValue);
            Object value = fields[1];
            Validator validator = getValidator(aggregateName);
            String keyAsString = String.format("%s", key);
            validator.validate(keyAsString, expectedValue, new Object[] { value });
            keys.add(keyValue);
        }

        private FakeCaptureVectorToRowOutputOperator.OutputInspector init(String aggregateName, HashMap<Object, Object> expected, Set<Object> keys) {
            this.aggregateName = aggregateName;
            this.expected = expected;
            this.keys = keys;
            return this;
        }
    }.init(aggregateName, expected, keys));
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(expected.size(), outBatchList.size());
    assertEquals(expected.size(), keys.size());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) HashSet(java.util.HashSet)

Example 5 with FakeCaptureVectorToRowOutputOperator

use of org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator in project hive by apache.

the class TestVectorGroupByOperator method testAggregateDoubleIterable.

public void testAggregateDoubleIterable(String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("A");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    Pair<GroupByDesc, VectorGroupByDesc> pair = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.doubleTypeInfo);
    GroupByDesc desc = pair.fst;
    VectorGroupByDesc vectorDesc = pair.snd;
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(1, outBatchList.size());
    Object result = outBatchList.get(0);
    Validator validator = getValidator(aggregateName);
    validator.validate("_total", expected, result);
}
Also used : ArrayList(java.util.ArrayList) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Aggregations

ArrayList (java.util.ArrayList)11 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)11 FakeCaptureVectorToRowOutputOperator (org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator)11 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)11 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)11 HashMap (java.util.HashMap)4 HashSet (java.util.HashSet)4 Set (java.util.Set)4 LongWritable (org.apache.hadoop.io.LongWritable)3 Map (java.util.Map)2 AggregationDesc (org.apache.hadoop.hive.ql.plan.AggregationDesc)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)2 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)2 TimestampWritable (org.apache.hadoop.hive.serde2.io.TimestampWritable)2 BooleanWritable (org.apache.hadoop.io.BooleanWritable)2 FloatWritable (org.apache.hadoop.io.FloatWritable)2 IntWritable (org.apache.hadoop.io.IntWritable)2 Text (org.apache.hadoop.io.Text)2