Search in sources :

Example 26 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class TestVectorizationContext method testFilterStringColCompareStringColumnExpressions.

@Test
public void testFilterStringColCompareStringColumnExpressions() throws HiveException {
    // Strings test
    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
    ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(String.class, "col2", "table", false);
    GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    List<String> columns = new ArrayList<String>();
    columns.add("col0");
    columns.add("col1");
    columns.add("col2");
    VectorizationContext vc = new VectorizationContext("name", columns);
    VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
    // 2 CHAR test
    CharTypeInfo charTypeInfo = new CharTypeInfo(10);
    col1Expr = new ExprNodeColumnDesc(charTypeInfo, "col1", "table", false);
    col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false);
    udf = new GenericUDFOPGreaterThan();
    exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
    // 2 VARCHAR test
    VarcharTypeInfo varcharTypeInfo = new VarcharTypeInfo(10);
    col1Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col1", "table", false);
    col2Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col2", "table", false);
    udf = new GenericUDFOPGreaterThan();
    exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
    // Some mix tests (STRING, CHAR), (VARCHAR, CHAR), (VARCHAR, STRING)...
    col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
    col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false);
    udf = new GenericUDFOPGreaterThan();
    exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
    col1Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col1", "table", false);
    col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false);
    udf = new GenericUDFOPGreaterThan();
    exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
    col1Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col1", "table", false);
    col2Expr = new ExprNodeColumnDesc(String.class, "col2", "table", false);
    udf = new GenericUDFOPGreaterThan();
    exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(col2Expr);
    exprDesc.setChildren(children1);
    vc = new VectorizationContext("name", columns);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn);
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) ArrayList(java.util.ArrayList) FilterStringGroupColGreaterStringGroupColumn(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterStringGroupColumn) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Test(org.junit.Test)

Example 27 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class TestVectorizationContext method testStringFilterExpressions.

@Test
public void testStringFilterExpressions() throws HiveException {
    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
    ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
    GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc();
    exprDesc.setGenericUDF(udf);
    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
    children1.add(col1Expr);
    children1.add(constDesc);
    exprDesc.setChildren(children1);
    List<String> columns = new ArrayList<String>();
    columns.add("col0");
    columns.add("col1");
    columns.add("col2");
    VectorizationContext vc = new VectorizationContext("name", columns);
    VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringGroupColGreaterStringScalar);
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) FilterStringGroupColGreaterStringScalar(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterStringScalar) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Test(org.junit.Test)

Example 28 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class TestVectorizationContext method testInFiltersAndExprs.

// Test translation of both IN filters and boolean-valued IN expressions (non-filters).
@Test
public void testInFiltersAndExprs() throws HiveException {
    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
    ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
    ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc("Bravo");
    // string IN
    GenericUDFIn udf = new GenericUDFIn();
    List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
    children1.add(col1Expr);
    children1.add(constDesc);
    children1.add(constDesc2);
    ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, children1);
    List<String> columns = new ArrayList<String>();
    columns.add("col0");
    columns.add("col1");
    columns.add("col2");
    VectorizationContext vc = new VectorizationContext("name", columns);
    VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterStringColumnInList);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
    assertTrue(ve instanceof StringColumnInList);
    // long IN
    children1.set(0, new ExprNodeColumnDesc(Long.class, "col1", "table", false));
    children1.set(1, new ExprNodeConstantDesc(10));
    children1.set(2, new ExprNodeConstantDesc(20));
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterLongColumnInList);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
    assertTrue(ve instanceof LongColumnInList);
    // double IN
    children1.set(0, new ExprNodeColumnDesc(Double.class, "col1", "table", false));
    children1.set(1, new ExprNodeConstantDesc(10d));
    children1.set(2, new ExprNodeConstantDesc(20d));
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
    assertTrue(ve instanceof FilterDoubleColumnInList);
    ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
    assertTrue(ve instanceof DoubleColumnInList);
}
Also used : FilterLongColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) FilterLongColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList) LongColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList) FilterDoubleColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList) FilterDoubleColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList) DoubleColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.DoubleColumnInList) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) BRoundWithNumDigitsDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble) FuncRoundDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncRoundDoubleToDouble) FuncBRoundDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncBRoundDoubleToDouble) FuncLogWithBaseDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble) FuncLogWithBaseLongToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseLongToDouble) FuncPowerDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble) FuncLnDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncLnDoubleToDouble) FuncSinDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncSinDoubleToDouble) RoundWithNumDigitsDoubleToDouble(org.apache.hadoop.hive.ql.exec.vector.expressions.RoundWithNumDigitsDoubleToDouble) FilterStringColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList) FilterStringColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList) StringColumnInList(org.apache.hadoop.hive.ql.exec.vector.expressions.StringColumnInList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) GenericUDFIn(org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) DynamicValueVectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Test(org.junit.Test)

Example 29 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class TestVectorUDFAdaptor method testMultiArgumentUDF.

@Test
public void testMultiArgumentUDF() {
    // create a syntax tree for a function call "testudf(col0, col1, col2)"
    ExprNodeGenericFuncDesc funcDesc;
    TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo;
    TypeInfo typeInfoLong = TypeInfoFactory.longTypeInfo;
    TypeInfo typeInfoDbl = TypeInfoFactory.doubleTypeInfo;
    GenericUDFBridge genericUDFBridge = new GenericUDFBridge("testudf", false, ConcatTextLongDoubleUDF.class.getName());
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", false));
    children.add(new ExprNodeColumnDesc(typeInfoLong, "col1", "tablename", false));
    children.add(new ExprNodeColumnDesc(typeInfoDbl, "col2", "tablename", false));
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[3];
    for (int i = 0; i < 3; i++) {
        argDescs[i] = new VectorUDFArgDesc();
        argDescs[i].setVariable(i);
    }
    funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDFBridge, genericUDFBridge.getUdfName(), children);
    // create the adaptor for this function call to work in vector mode
    VectorUDFAdaptor vudf = null;
    try {
        vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs);
    } catch (HiveException e) {
        // We should never get here.
        assertTrue(false);
        throw new RuntimeException(e);
    }
    // with no nulls
    VectorizedRowBatch b = getBatchStrDblLongWithStrOut();
    vudf.evaluate(b);
    byte[] result = null;
    byte[] result2 = null;
    try {
        result = "red:1:1.0".getBytes("UTF-8");
        result2 = "blue:0:0.0".getBytes("UTF-8");
    } catch (Exception e) {
        ;
    }
    BytesColumnVector out = (BytesColumnVector) b.cols[3];
    int cmp = StringExpr.compare(result, 0, result.length, out.vector[1], out.start[1], out.length[1]);
    assertEquals(0, cmp);
    assertTrue(out.noNulls);
    // with nulls
    b = getBatchStrDblLongWithStrOut();
    b.cols[1].noNulls = false;
    vudf.evaluate(b);
    out = (BytesColumnVector) b.cols[3];
    assertFalse(out.noNulls);
    assertTrue(out.isNull[1]);
    // with all input columns repeating
    b = getBatchStrDblLongWithStrOut();
    b.cols[0].isRepeating = true;
    b.cols[1].isRepeating = true;
    b.cols[2].isRepeating = true;
    vudf.evaluate(b);
    out = (BytesColumnVector) b.cols[3];
    assertTrue(out.isRepeating);
    cmp = StringExpr.compare(result2, 0, result2.length, out.vector[0], out.start[0], out.length[0]);
    assertEquals(0, cmp);
    assertTrue(out.noNulls);
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ConcatTextLongDoubleUDF(org.apache.hadoop.hive.ql.exec.vector.udf.legacy.ConcatTextLongDoubleUDF) Test(org.junit.Test)

Example 30 with ExprNodeGenericFuncDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc in project hive by apache.

the class TestVectorUDFAdaptor method testLongUDF.

@Test
public void testLongUDF() {
    // create a syntax tree for a simple function call "longudf(col0)"
    ExprNodeGenericFuncDesc funcDesc;
    TypeInfo typeInfo = TypeInfoFactory.longTypeInfo;
    GenericUDFBridge genericUDFBridge = new GenericUDFBridge("longudf", false, LongUDF.class.getName());
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    ExprNodeColumnDesc colDesc = new ExprNodeColumnDesc(typeInfo, "col0", "tablename", false);
    children.add(colDesc);
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[1];
    argDescs[0] = new VectorUDFArgDesc();
    argDescs[0].setVariable(0);
    funcDesc = new ExprNodeGenericFuncDesc(typeInfo, genericUDFBridge, genericUDFBridge.getUdfName(), children);
    // create the adaptor for this function call to work in vector mode
    VectorUDFAdaptor vudf = null;
    try {
        vudf = new VectorUDFAdaptor(funcDesc, 1, "Long", argDescs);
    } catch (HiveException e) {
        // We should never get here.
        assertTrue(false);
    }
    VectorizedRowBatch b = getBatchLongInLongOut();
    vudf.evaluate(b);
    // verify output
    LongColumnVector out = (LongColumnVector) b.cols[1];
    assertEquals(1000, out.vector[0]);
    assertEquals(1001, out.vector[1]);
    assertEquals(1002, out.vector[2]);
    assertTrue(out.noNulls);
    assertFalse(out.isRepeating);
    // with nulls
    b = getBatchLongInLongOut();
    out = (LongColumnVector) b.cols[1];
    b.cols[0].noNulls = false;
    vudf.evaluate(b);
    assertFalse(out.noNulls);
    assertEquals(1000, out.vector[0]);
    assertEquals(1001, out.vector[1]);
    assertTrue(out.isNull[2]);
    assertFalse(out.isRepeating);
    // with repeating
    b = getBatchLongInLongOut();
    out = (LongColumnVector) b.cols[1];
    b.cols[0].isRepeating = true;
    vudf.evaluate(b);
    // The implementation may or may not set output it isRepeting.
    // That is implementation-defined.
    assertTrue(b.cols[1].isRepeating && out.vector[0] == 1000 || !b.cols[1].isRepeating && out.vector[2] == 1000);
    assertEquals(3, b.size);
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LongUDF(org.apache.hadoop.hive.ql.exec.vector.udf.legacy.LongUDF) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Aggregations

ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)150 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)123 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)98 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)89 ArrayList (java.util.ArrayList)76 Test (org.junit.Test)68 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)26 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)26 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)24 GenericUDFOPAnd (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd)24 List (java.util.List)20 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)19 Range (org.apache.accumulo.core.data.Range)18 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)18 GenericUDFOPEqualOrLessThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan)18 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)18 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)15 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)15 HashMap (java.util.HashMap)14 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)14