Search in sources :

Example 11 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class TestVectorizationContext method testVectorizeAndOrProjectionExpression.

@Test
public void testVectorizeAndOrProjectionExpression() throws HiveException {
    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
    ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10));
    GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
    ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc();
    greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
    greaterExprDesc.setGenericUDF(udf);
    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(constDesc);
    greaterExprDesc.setChildren(children1);
    ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false);
    GenericUDFOPAnd andUdf = new GenericUDFOPAnd();
    ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc();
    andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
    andExprDesc.setGenericUDF(andUdf);
    List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
    children3.add(greaterExprDesc);
    children3.add(col2Expr);
    andExprDesc.setChildren(children3);
    List<String> columns = new ArrayList<String>();
    columns.add("col1");
    columns.add("col2");
    VectorizationContext vc = new VectorizationContext("name", columns);
    VectorExpression veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertEquals(veAnd.getClass(), FilterExprAndExpr.class);
    assertEquals(veAnd.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
    assertEquals(veAnd.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
    veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
    assertEquals(veAnd.getClass(), ColAndCol.class);
    assertEquals(1, veAnd.getChildExpressions().length);
    assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
    assertEquals(2, ((ColAndCol) veAnd).getColNum1());
    assertEquals(1, ((ColAndCol) veAnd).getColNum2());
    assertEquals(3, ((ColAndCol) veAnd).getOutputColumn());
    //OR
    GenericUDFOPOr orUdf = new GenericUDFOPOr();
    ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc();
    orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
    orExprDesc.setGenericUDF(orUdf);
    List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2);
    children4.add(greaterExprDesc);
    children4.add(col2Expr);
    orExprDesc.setChildren(children4);
    //Allocate new Vectorization context to reset the intermediate columns.
    vc = new VectorizationContext("name", columns);
    VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertEquals(veOr.getClass(), FilterExprOrExpr.class);
    assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
    assertEquals(veOr.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
    veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
    assertEquals(veOr.getClass(), ColOrCol.class);
    assertEquals(1, veAnd.getChildExpressions().length);
    assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
    assertEquals(2, ((ColOrCol) veOr).getColNum1());
    assertEquals(1, ((ColOrCol) veOr).getColNum2());
    assertEquals(3, ((ColOrCol) veOr).getOutputColumn());
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 12 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class TestVectorizationContext method testMathFunctions.

@Test
public void testMathFunctions() throws HiveException {
    ExprNodeGenericFuncDesc mathFuncExpr = new ExprNodeGenericFuncDesc();
    mathFuncExpr.setTypeInfo(TypeInfoFactory.doubleTypeInfo);
    ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc(Integer.class, "a", "table", false);
    ExprNodeColumnDesc colDesc2 = new ExprNodeColumnDesc(Double.class, "b", "table", false);
    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
    List<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>();
    children1.add(colDesc1);
    children2.add(colDesc2);
    List<String> columns = new ArrayList<String>();
    columns.add("b");
    columns.add("a");
    VectorizationContext vc = new VectorizationContext("name", columns);
    // Sin(double)
    GenericUDFBridge gudfBridge = new GenericUDFBridge("sin", false, UDFSin.class.getName());
    mathFuncExpr.setGenericUDF(gudfBridge);
    mathFuncExpr.setChildren(children2);
    VectorExpression ve = vc.getVectorExpression(mathFuncExpr, VectorExpressionDescriptor.Mode.PROJECTION);
    Assert.assertEquals(FuncSinDoubleToDouble.class, ve.getClass());
    // Round without digits
    GenericUDFRound udfRound = new GenericUDFRound();
    mathFuncExpr.setGenericUDF(udfRound);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncRoundDoubleToDouble.class, ve.getClass());
    // BRound without digits
    GenericUDFBRound udfBRound = new GenericUDFBRound();
    mathFuncExpr.setGenericUDF(udfBRound);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncBRoundDoubleToDouble.class, ve.getClass());
    // Round with digits
    mathFuncExpr.setGenericUDF(udfRound);
    children2.add(new ExprNodeConstantDesc(4));
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(RoundWithNumDigitsDoubleToDouble.class, ve.getClass());
    Assert.assertEquals(4, ((RoundWithNumDigitsDoubleToDouble) ve).getDecimalPlaces().get());
    // BRound with digits
    mathFuncExpr.setGenericUDF(udfBRound);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(BRoundWithNumDigitsDoubleToDouble.class, ve.getClass());
    Assert.assertEquals(4, ((BRoundWithNumDigitsDoubleToDouble) ve).getDecimalPlaces().get());
    // Logger with int base
    gudfBridge = new GenericUDFBridge("log", false, UDFLog.class.getName());
    mathFuncExpr.setGenericUDF(gudfBridge);
    children2.clear();
    children2.add(new ExprNodeConstantDesc(4.0));
    children2.add(colDesc2);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncLogWithBaseDoubleToDouble.class, ve.getClass());
    Assert.assertTrue(4 == ((FuncLogWithBaseDoubleToDouble) ve).getBase());
    // Logger with default base
    children2.clear();
    children2.add(colDesc2);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncLnDoubleToDouble.class, ve.getClass());
    //Log with double base
    children2.clear();
    children2.add(new ExprNodeConstantDesc(4.5));
    children2.add(colDesc2);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncLogWithBaseDoubleToDouble.class, ve.getClass());
    Assert.assertTrue(4.5 == ((FuncLogWithBaseDoubleToDouble) ve).getBase());
    //Log with int input and double base
    children2.clear();
    children2.add(new ExprNodeConstantDesc(4.5));
    children2.add(colDesc1);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncLogWithBaseLongToDouble.class, ve.getClass());
    Assert.assertTrue(4.5 == ((FuncLogWithBaseLongToDouble) ve).getBase());
    //Power with double power
    children2.clear();
    children2.add(colDesc2);
    children2.add(new ExprNodeConstantDesc(4.5));
    mathFuncExpr.setGenericUDF(new GenericUDFPower());
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncPowerDoubleToDouble.class, ve.getClass());
    Assert.assertTrue(4.5 == ((FuncPowerDoubleToDouble) ve).getPower());
    //Round with default decimal places
    mathFuncExpr.setGenericUDF(udfRound);
    children2.clear();
    children2.add(colDesc2);
    mathFuncExpr.setChildren(children2);
    ve = vc.getVectorExpression(mathFuncExpr);
    Assert.assertEquals(FuncRoundDoubleToDouble.class, ve.getClass());
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFPower(org.apache.hadoop.hive.ql.udf.generic.GenericUDFPower) FuncLogWithBaseLongToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseLongToDouble) FuncPowerDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) UDFSin(org.apache.hadoop.hive.ql.udf.UDFSin) GenericUDFBRound(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBRound) GenericUDFRound(org.apache.hadoop.hive.ql.udf.generic.GenericUDFRound) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) BRoundWithNumDigitsDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble) RoundWithNumDigitsDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.RoundWithNumDigitsDoubleToDouble) BRoundWithNumDigitsDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) FuncLogWithBaseDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble) Test(org.junit.Test)

Example 13 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class TestVectorFilterOperator method getAVectorFilterOperator.

private VectorFilterOperator getAVectorFilterOperator() throws HiveException {
    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false);
    List<String> columns = new ArrayList<String>();
    columns.add("col1");
    FilterDesc fdesc = new FilterDesc();
    fdesc.setPredicate(col1Expr);
    Operator<? extends OperatorDesc> filterOp = OperatorFactory.get(new CompilationOpContext(), fdesc);
    VectorizationContext vc = new VectorizationContext("name", columns);
    return (VectorFilterOperator) Vectorizer.vectorizeFilterOperator(filterOp, vc);
}
Also used : FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) VectorFilterDesc(org.apache.hadoop.hive.ql.plan.VectorFilterDesc) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList)

Example 14 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class TestVectorUDFAdaptor method testMultiArgumentUDF.

@Test
public void testMultiArgumentUDF() {
    // create a syntax tree for a function call "testudf(col0, col1, col2)"
    ExprNodeGenericFuncDesc funcDesc;
    TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo;
    TypeInfo typeInfoLong = TypeInfoFactory.longTypeInfo;
    TypeInfo typeInfoDbl = TypeInfoFactory.doubleTypeInfo;
    GenericUDFBridge genericUDFBridge = new GenericUDFBridge("testudf", false, ConcatTextLongDoubleUDF.class.getName());
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", false));
    children.add(new ExprNodeColumnDesc(typeInfoLong, "col1", "tablename", false));
    children.add(new ExprNodeColumnDesc(typeInfoDbl, "col2", "tablename", false));
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[3];
    for (int i = 0; i < 3; i++) {
        argDescs[i] = new VectorUDFArgDesc();
        argDescs[i].setVariable(i);
    }
    funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDFBridge, genericUDFBridge.getUdfName(), children);
    // create the adaptor for this function call to work in vector mode
    VectorUDFAdaptor vudf = null;
    try {
        vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs);
    } catch (HiveException e) {
        // We should never get here.
        assertTrue(false);
        throw new RuntimeException(e);
    }
    // with no nulls
    VectorizedRowBatch b = getBatchStrDblLongWithStrOut();
    vudf.evaluate(b);
    byte[] result = null;
    byte[] result2 = null;
    try {
        result = "red:1:1.0".getBytes("UTF-8");
        result2 = "blue:0:0.0".getBytes("UTF-8");
    } catch (Exception e) {
        ;
    }
    BytesColumnVector out = (BytesColumnVector) b.cols[3];
    int cmp = StringExpr.compare(result, 0, result.length, out.vector[1], out.start[1], out.length[1]);
    assertEquals(0, cmp);
    assertTrue(out.noNulls);
    // with nulls
    b = getBatchStrDblLongWithStrOut();
    b.cols[1].noNulls = false;
    vudf.evaluate(b);
    out = (BytesColumnVector) b.cols[3];
    assertFalse(out.noNulls);
    assertTrue(out.isNull[1]);
    // with all input columns repeating
    b = getBatchStrDblLongWithStrOut();
    b.cols[0].isRepeating = true;
    b.cols[1].isRepeating = true;
    b.cols[2].isRepeating = true;
    vudf.evaluate(b);
    out = (BytesColumnVector) b.cols[3];
    assertTrue(out.isRepeating);
    cmp = StringExpr.compare(result2, 0, result2.length, out.vector[0], out.start[0], out.length[0]);
    assertEquals(0, cmp);
    assertTrue(out.noNulls);
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ConcatTextLongDoubleUDF(org.apache.hadoop.hive.ql.exec.vector.udf.legacy.ConcatTextLongDoubleUDF) Test(org.junit.Test)

Example 15 with ExprNodeColumnDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.

the class TestVectorUDFAdaptor method testLongUDF.

@Test
public void testLongUDF() {
    // create a syntax tree for a simple function call "longudf(col0)"
    ExprNodeGenericFuncDesc funcDesc;
    TypeInfo typeInfo = TypeInfoFactory.longTypeInfo;
    GenericUDFBridge genericUDFBridge = new GenericUDFBridge("longudf", false, LongUDF.class.getName());
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    ExprNodeColumnDesc colDesc = new ExprNodeColumnDesc(typeInfo, "col0", "tablename", false);
    children.add(colDesc);
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[1];
    argDescs[0] = new VectorUDFArgDesc();
    argDescs[0].setVariable(0);
    funcDesc = new ExprNodeGenericFuncDesc(typeInfo, genericUDFBridge, genericUDFBridge.getUdfName(), children);
    // create the adaptor for this function call to work in vector mode
    VectorUDFAdaptor vudf = null;
    try {
        vudf = new VectorUDFAdaptor(funcDesc, 1, "Long", argDescs);
    } catch (HiveException e) {
        // We should never get here.
        assertTrue(false);
    }
    VectorizedRowBatch b = getBatchLongInLongOut();
    vudf.evaluate(b);
    // verify output
    LongColumnVector out = (LongColumnVector) b.cols[1];
    assertEquals(1000, out.vector[0]);
    assertEquals(1001, out.vector[1]);
    assertEquals(1002, out.vector[2]);
    assertTrue(out.noNulls);
    assertFalse(out.isRepeating);
    // with nulls
    b = getBatchLongInLongOut();
    out = (LongColumnVector) b.cols[1];
    b.cols[0].noNulls = false;
    vudf.evaluate(b);
    assertFalse(out.noNulls);
    assertEquals(1000, out.vector[0]);
    assertEquals(1001, out.vector[1]);
    assertTrue(out.isNull[2]);
    assertFalse(out.isRepeating);
    // with repeating
    b = getBatchLongInLongOut();
    out = (LongColumnVector) b.cols[1];
    b.cols[0].isRepeating = true;
    vudf.evaluate(b);
    // The implementation may or may not set output it isRepeting.
    // That is implementation-defined.
    assertTrue(b.cols[1].isRepeating && out.vector[0] == 1000 || !b.cols[1].isRepeating && out.vector[2] == 1000);
    assertEquals(3, b.size);
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LongUDF(org.apache.hadoop.hive.ql.exec.vector.udf.legacy.LongUDF) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Aggregations

ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)161 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)145 ArrayList (java.util.ArrayList)93 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)88 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)78 Test (org.junit.Test)65 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)43 HashMap (java.util.HashMap)40 LinkedHashMap (java.util.LinkedHashMap)30 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)28 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)25 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)24 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)22 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)21 Operator (org.apache.hadoop.hive.ql.exec.Operator)19 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)19 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)19 List (java.util.List)17 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)17 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)17