Search in sources :

Example 11 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class TestVectorizationContext method testVectorizeAndOrProjectionExpression.

@Test
public void testVectorizeAndOrProjectionExpression() throws HiveException {
    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
    ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10));
    GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
    ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc();
    greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
    greaterExprDesc.setGenericUDF(udf);
    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(constDesc);
    greaterExprDesc.setChildren(children1);
    ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false);
    GenericUDFOPAnd andUdf = new GenericUDFOPAnd();
    ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc();
    andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
    andExprDesc.setGenericUDF(andUdf);
    List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
    children3.add(greaterExprDesc);
    children3.add(col2Expr);
    andExprDesc.setChildren(children3);
    List<String> columns = new ArrayList<String>();
    columns.add("col1");
    columns.add("col2");
    VectorizationContext vc = new VectorizationContext("name", columns);
    VectorExpression veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertEquals(veAnd.getClass(), FilterExprAndExpr.class);
    assertEquals(veAnd.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
    assertEquals(veAnd.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
    veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
    assertEquals(veAnd.getClass(), ColAndCol.class);
    assertEquals(1, veAnd.getChildExpressions().length);
    assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
    assertEquals(2, ((ColAndCol) veAnd).getColNum1());
    assertEquals(1, ((ColAndCol) veAnd).getColNum2());
    assertEquals(3, ((ColAndCol) veAnd).getOutputColumn());
    //OR
    GenericUDFOPOr orUdf = new GenericUDFOPOr();
    ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc();
    orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
    orExprDesc.setGenericUDF(orUdf);
    List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2);
    children4.add(greaterExprDesc);
    children4.add(col2Expr);
    orExprDesc.setChildren(children4);
    //Allocate new Vectorization context to reset the intermediate columns.
    vc = new VectorizationContext("name", columns);
    VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertEquals(veOr.getClass(), FilterExprOrExpr.class);
    assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
    assertEquals(veOr.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
    veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
    assertEquals(veOr.getClass(), ColOrCol.class);
    assertEquals(1, veAnd.getChildExpressions().length);
    assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
    assertEquals(2, ((ColOrCol) veOr).getColNum1());
    assertEquals(1, ((ColOrCol) veOr).getColNum2());
    assertEquals(3, ((ColOrCol) veOr).getOutputColumn());
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 12 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class TestVectorizationContext method testMathFunctions.

@Test
public void testMathFunctions() throws HiveException {
    ExprNodeGenericFuncDesc mathFuncExpr = new ExprNodeGenericFuncDesc();
    mathFuncExpr.setTypeInfo(TypeInfoFactory.doubleTypeInfo);
    ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc(Integer.class, "a", "table", false);
    ExprNodeColumnDesc colDesc2 = new ExprNodeColumnDesc(Double.class, "b", "table", false);
    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
    List<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>();
    children1.add(colDesc1);
    children2.add(colDesc2);
    List<String> columns = new ArrayList<String>();
    columns.add("b");
    columns.add("a");
    VectorizationContext vc = new VectorizationContext("name", columns);
    // Sin(double)
    GenericUDFBridge gudfBridge = new GenericUDFBridge("sin", false, UDFSin.class.getName());
    mathFuncExpr.setGenericUDF(gudfBridge);
    mathFuncExpr.setChildren(children2);
    VectorExpression ve = vc.getVectorExpression(mathFuncExpr, VectorExpressionDescriptor.Mode.PROJECTION);
    Assert.assertEquals(FuncSinDoubleToDouble.class, ve.getClass());
    // Round without digits
    GenericUDFRound udfRound = new GenericUDFRound();
    mathFuncExpr.setGenericUDF(udfRound);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncRoundDoubleToDouble.class, ve.getClass());
    // BRound without digits
    GenericUDFBRound udfBRound = new GenericUDFBRound();
    mathFuncExpr.setGenericUDF(udfBRound);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncBRoundDoubleToDouble.class, ve.getClass());
    // Round with digits
    mathFuncExpr.setGenericUDF(udfRound);
    children2.add(new ExprNodeConstantDesc(4));
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(RoundWithNumDigitsDoubleToDouble.class, ve.getClass());
    Assert.assertEquals(4, ((RoundWithNumDigitsDoubleToDouble) ve).getDecimalPlaces().get());
    // BRound with digits
    mathFuncExpr.setGenericUDF(udfBRound);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(BRoundWithNumDigitsDoubleToDouble.class, ve.getClass());
    Assert.assertEquals(4, ((BRoundWithNumDigitsDoubleToDouble) ve).getDecimalPlaces().get());
    // Logger with int base
    gudfBridge = new GenericUDFBridge("log", false, UDFLog.class.getName());
    mathFuncExpr.setGenericUDF(gudfBridge);
    children2.clear();
    children2.add(new ExprNodeConstantDesc(4.0));
    children2.add(colDesc2);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncLogWithBaseDoubleToDouble.class, ve.getClass());
    Assert.assertTrue(4 == ((FuncLogWithBaseDoubleToDouble) ve).getBase());
    // Logger with default base
    children2.clear();
    children2.add(colDesc2);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncLnDoubleToDouble.class, ve.getClass());
    //Log with double base
    children2.clear();
    children2.add(new ExprNodeConstantDesc(4.5));
    children2.add(colDesc2);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncLogWithBaseDoubleToDouble.class, ve.getClass());
    Assert.assertTrue(4.5 == ((FuncLogWithBaseDoubleToDouble) ve).getBase());
    //Log with int input and double base
    children2.clear();
    children2.add(new ExprNodeConstantDesc(4.5));
    children2.add(colDesc1);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncLogWithBaseLongToDouble.class, ve.getClass());
    Assert.assertTrue(4.5 == ((FuncLogWithBaseLongToDouble) ve).getBase());
    //Power with double power
    children2.clear();
    children2.add(colDesc2);
    children2.add(new ExprNodeConstantDesc(4.5));
    mathFuncExpr.setGenericUDF(new GenericUDFPower());
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncPowerDoubleToDouble.class, ve.getClass());
    Assert.assertTrue(4.5 == ((FuncPowerDoubleToDouble) ve).getPower());
    //Round with default decimal places
    mathFuncExpr.setGenericUDF(udfRound);
    children2.clear();
    children2.add(colDesc2);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncRoundDoubleToDouble.class, ve.getClass());
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFPower(org.apache.hadoop.hive.ql.udf.generic.GenericUDFPower) FuncLogWithBaseLongToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseLongToDouble) FuncPowerDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) UDFSin(org.apache.hadoop.hive.ql.udf.UDFSin) GenericUDFBRound(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBRound) GenericUDFRound(org.apache.hadoop.hive.ql.udf.generic.GenericUDFRound) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) BRoundWithNumDigitsDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble) RoundWithNumDigitsDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.RoundWithNumDigitsDoubleToDouble) BRoundWithNumDigitsDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) FuncLogWithBaseDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble) Test(org.junit.Test)

Example 13 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class TestVectorGroupByOperator method testMultiKey.

private void testMultiKey(String aggregateName, FakeVectorRowBatchFromObjectIterables data, HashMap<Object, Object> expected) throws HiveException {
    Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
    Set<Object> keys = new HashSet<Object>();
    // The types array tells us the number of columns in the data
    final String[] columnTypes = data.getTypes();
    // Columns 0..N-1 are keys. Column N is the aggregate value input
    int i = 0;
    for (; i < columnTypes.length - 1; ++i) {
        String columnName = String.format("_col%d", i);
        mapColumnNames.put(columnName, i);
        outputColumnNames.add(columnName);
    }
    mapColumnNames.put("value", i);
    outputColumnNames.add("value");
    VectorizationContext ctx = new VectorizationContext("name", outputColumnNames);
    ArrayList<AggregationDesc> aggs = new ArrayList(1);
    aggs.add(buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "value", TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
    for (i = 0; i < columnTypes.length - 1; ++i) {
        String columnName = String.format("_col%d", i);
        keysDesc.add(buildColumnDesc(ctx, columnName, TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
    }
    GroupByDesc desc = new GroupByDesc();
    desc.setVectorDesc(new VectorGroupByDesc());
    desc.setOutputColumnNames(outputColumnNames);
    desc.setAggregators(aggs);
    desc.setKeys(keysDesc);
    ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
    FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {

        private int rowIndex;

        private String aggregateName;

        private Map<Object, Object> expected;

        private Set<Object> keys;

        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            assertTrue(row instanceof Object[]);
            Object[] fields = (Object[]) row;
            assertEquals(columnTypes.length, fields.length);
            ArrayList<Object> keyValue = new ArrayList<Object>(columnTypes.length - 1);
            for (int i = 0; i < columnTypes.length - 1; ++i) {
                Object key = fields[i];
                if (null == key) {
                    keyValue.add(null);
                } else if (key instanceof Text) {
                    Text txKey = (Text) key;
                    keyValue.add(txKey.toString());
                } else if (key instanceof ByteWritable) {
                    ByteWritable bwKey = (ByteWritable) key;
                    keyValue.add(bwKey.get());
                } else if (key instanceof ShortWritable) {
                    ShortWritable swKey = (ShortWritable) key;
                    keyValue.add(swKey.get());
                } else if (key instanceof IntWritable) {
                    IntWritable iwKey = (IntWritable) key;
                    keyValue.add(iwKey.get());
                } else if (key instanceof LongWritable) {
                    LongWritable lwKey = (LongWritable) key;
                    keyValue.add(lwKey.get());
                } else if (key instanceof TimestampWritable) {
                    TimestampWritable twKey = (TimestampWritable) key;
                    keyValue.add(twKey.getTimestamp());
                } else if (key instanceof DoubleWritable) {
                    DoubleWritable dwKey = (DoubleWritable) key;
                    keyValue.add(dwKey.get());
                } else if (key instanceof FloatWritable) {
                    FloatWritable fwKey = (FloatWritable) key;
                    keyValue.add(fwKey.get());
                } else if (key instanceof BooleanWritable) {
                    BooleanWritable bwKey = (BooleanWritable) key;
                    keyValue.add(bwKey.get());
                } else {
                    Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
                }
            }
            String keyAsString = Arrays.deepToString(keyValue.toArray());
            assertTrue(expected.containsKey(keyValue));
            Object expectedValue = expected.get(keyValue);
            Object value = fields[columnTypes.length - 1];
            Validator validator = getValidator(aggregateName);
            validator.validate(keyAsString, expectedValue, new Object[] { value });
            keys.add(keyValue);
        }

        private FakeCaptureOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
            this.aggregateName = aggregateName;
            this.expected = expected;
            this.keys = keys;
            return this;
        }
    }.init(aggregateName, expected, keys));
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(expected.size(), outBatchList.size());
    assertEquals(expected.size(), keys.size());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongWritable(org.apache.hadoop.io.LongWritable) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HashSet(java.util.HashSet) FakeCaptureOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator) Text(org.apache.hadoop.io.Text) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) HashMap(java.util.HashMap)

Example 14 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class TestVectorGroupByOperator method buildAggregationDescCountStar.

private static AggregationDesc buildAggregationDescCountStar(VectorizationContext ctx) {
    AggregationDesc agg = new AggregationDesc();
    agg.setGenericUDAFName("COUNT");
    agg.setMode(GenericUDAFEvaluator.Mode.PARTIAL1);
    agg.setParameters(new ArrayList<ExprNodeDesc>());
    return agg;
}
Also used : AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 15 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class TestVectorGroupByOperator method buildAggregationDesc.

private static AggregationDesc buildAggregationDesc(VectorizationContext ctx, String aggregate, GenericUDAFEvaluator.Mode mode, String column, TypeInfo typeInfo) {
    ExprNodeDesc inputColumn = buildColumnDesc(ctx, column, typeInfo);
    ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
    params.add(inputColumn);
    AggregationDesc agg = new AggregationDesc();
    agg.setGenericUDAFName(aggregate);
    agg.setMode(mode);
    agg.setParameters(params);
    return agg;
}
Also used : ArrayList(java.util.ArrayList) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)321 ArrayList (java.util.ArrayList)179 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)146 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)110 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)101 Test (org.junit.Test)74 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)69 HashMap (java.util.HashMap)67 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)57 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)47 LinkedHashMap (java.util.LinkedHashMap)43 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)42 List (java.util.List)40 Operator (org.apache.hadoop.hive.ql.exec.Operator)39 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)35 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)34 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)34 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)34 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)33 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)32