use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class TestVectorizationContext method testVectorizeAndOrProjectionExpression.
@Test
public void testVectorizeAndOrProjectionExpression() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10));
GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc();
greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
greaterExprDesc.setGenericUDF(udf);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
children1.add(col1Expr);
children1.add(constDesc);
greaterExprDesc.setChildren(children1);
ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false);
GenericUDFOPAnd andUdf = new GenericUDFOPAnd();
ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc();
andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
andExprDesc.setGenericUDF(andUdf);
List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
children3.add(greaterExprDesc);
children3.add(col2Expr);
andExprDesc.setChildren(children3);
List<String> columns = new ArrayList<String>();
columns.add("col1");
columns.add("col2");
VectorizationContext vc = new VectorizationContext("name", columns);
VectorExpression veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertEquals(veAnd.getClass(), FilterExprAndExpr.class);
assertEquals(veAnd.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
assertEquals(veAnd.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertEquals(veAnd.getClass(), ColAndCol.class);
assertEquals(1, veAnd.getChildExpressions().length);
assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
assertEquals(2, ((ColAndCol) veAnd).getColNum1());
assertEquals(1, ((ColAndCol) veAnd).getColNum2());
assertEquals(3, ((ColAndCol) veAnd).getOutputColumn());
//OR
GenericUDFOPOr orUdf = new GenericUDFOPOr();
ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc();
orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
orExprDesc.setGenericUDF(orUdf);
List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2);
children4.add(greaterExprDesc);
children4.add(col2Expr);
orExprDesc.setChildren(children4);
//Allocate new Vectorization context to reset the intermediate columns.
vc = new VectorizationContext("name", columns);
VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertEquals(veOr.getClass(), FilterExprOrExpr.class);
assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
assertEquals(veOr.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertEquals(veOr.getClass(), ColOrCol.class);
assertEquals(1, veAnd.getChildExpressions().length);
assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
assertEquals(2, ((ColOrCol) veOr).getColNum1());
assertEquals(1, ((ColOrCol) veOr).getColNum2());
assertEquals(3, ((ColOrCol) veOr).getOutputColumn());
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class TestVectorizationContext method testMathFunctions.
@Test
public void testMathFunctions() throws HiveException {
ExprNodeGenericFuncDesc mathFuncExpr = new ExprNodeGenericFuncDesc();
mathFuncExpr.setTypeInfo(TypeInfoFactory.doubleTypeInfo);
ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc(Integer.class, "a", "table", false);
ExprNodeColumnDesc colDesc2 = new ExprNodeColumnDesc(Double.class, "b", "table", false);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
List<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>();
children1.add(colDesc1);
children2.add(colDesc2);
List<String> columns = new ArrayList<String>();
columns.add("b");
columns.add("a");
VectorizationContext vc = new VectorizationContext("name", columns);
// Sin(double)
GenericUDFBridge gudfBridge = new GenericUDFBridge("sin", false, UDFSin.class.getName());
mathFuncExpr.setGenericUDF(gudfBridge);
mathFuncExpr.setChildren(children2);
VectorExpression ve = vc.getVectorExpression(mathFuncExpr, VectorExpressionDescriptor.Mode.PROJECTION);
Assert.assertEquals(FuncSinDoubleToDouble.class, ve.getClass());
// Round without digits
GenericUDFRound udfRound = new GenericUDFRound();
mathFuncExpr.setGenericUDF(udfRound);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncRoundDoubleToDouble.class, ve.getClass());
// BRound without digits
GenericUDFBRound udfBRound = new GenericUDFBRound();
mathFuncExpr.setGenericUDF(udfBRound);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncBRoundDoubleToDouble.class, ve.getClass());
// Round with digits
mathFuncExpr.setGenericUDF(udfRound);
children2.add(new ExprNodeConstantDesc(4));
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(RoundWithNumDigitsDoubleToDouble.class, ve.getClass());
Assert.assertEquals(4, ((RoundWithNumDigitsDoubleToDouble) ve).getDecimalPlaces().get());
// BRound with digits
mathFuncExpr.setGenericUDF(udfBRound);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(BRoundWithNumDigitsDoubleToDouble.class, ve.getClass());
Assert.assertEquals(4, ((BRoundWithNumDigitsDoubleToDouble) ve).getDecimalPlaces().get());
// Logger with int base
gudfBridge = new GenericUDFBridge("log", false, UDFLog.class.getName());
mathFuncExpr.setGenericUDF(gudfBridge);
children2.clear();
children2.add(new ExprNodeConstantDesc(4.0));
children2.add(colDesc2);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncLogWithBaseDoubleToDouble.class, ve.getClass());
Assert.assertTrue(4 == ((FuncLogWithBaseDoubleToDouble) ve).getBase());
// Logger with default base
children2.clear();
children2.add(colDesc2);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncLnDoubleToDouble.class, ve.getClass());
//Log with double base
children2.clear();
children2.add(new ExprNodeConstantDesc(4.5));
children2.add(colDesc2);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncLogWithBaseDoubleToDouble.class, ve.getClass());
Assert.assertTrue(4.5 == ((FuncLogWithBaseDoubleToDouble) ve).getBase());
//Log with int input and double base
children2.clear();
children2.add(new ExprNodeConstantDesc(4.5));
children2.add(colDesc1);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncLogWithBaseLongToDouble.class, ve.getClass());
Assert.assertTrue(4.5 == ((FuncLogWithBaseLongToDouble) ve).getBase());
//Power with double power
children2.clear();
children2.add(colDesc2);
children2.add(new ExprNodeConstantDesc(4.5));
mathFuncExpr.setGenericUDF(new GenericUDFPower());
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncPowerDoubleToDouble.class, ve.getClass());
Assert.assertTrue(4.5 == ((FuncPowerDoubleToDouble) ve).getPower());
//Round with default decimal places
mathFuncExpr.setGenericUDF(udfRound);
children2.clear();
children2.add(colDesc2);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncRoundDoubleToDouble.class, ve.getClass());
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class TestVectorFilterOperator method getAVectorFilterOperator.
private VectorFilterOperator getAVectorFilterOperator() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false);
List<String> columns = new ArrayList<String>();
columns.add("col1");
FilterDesc fdesc = new FilterDesc();
fdesc.setPredicate(col1Expr);
Operator<? extends OperatorDesc> filterOp = OperatorFactory.get(new CompilationOpContext(), fdesc);
VectorizationContext vc = new VectorizationContext("name", columns);
return (VectorFilterOperator) Vectorizer.vectorizeFilterOperator(filterOp, vc);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class TestVectorUDFAdaptor method testMultiArgumentUDF.
@Test
public void testMultiArgumentUDF() {
// create a syntax tree for a function call "testudf(col0, col1, col2)"
ExprNodeGenericFuncDesc funcDesc;
TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo;
TypeInfo typeInfoLong = TypeInfoFactory.longTypeInfo;
TypeInfo typeInfoDbl = TypeInfoFactory.doubleTypeInfo;
GenericUDFBridge genericUDFBridge = new GenericUDFBridge("testudf", false, ConcatTextLongDoubleUDF.class.getName());
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", false));
children.add(new ExprNodeColumnDesc(typeInfoLong, "col1", "tablename", false));
children.add(new ExprNodeColumnDesc(typeInfoDbl, "col2", "tablename", false));
VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[3];
for (int i = 0; i < 3; i++) {
argDescs[i] = new VectorUDFArgDesc();
argDescs[i].setVariable(i);
}
funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDFBridge, genericUDFBridge.getUdfName(), children);
// create the adaptor for this function call to work in vector mode
VectorUDFAdaptor vudf = null;
try {
vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs);
} catch (HiveException e) {
// We should never get here.
assertTrue(false);
throw new RuntimeException(e);
}
// with no nulls
VectorizedRowBatch b = getBatchStrDblLongWithStrOut();
vudf.evaluate(b);
byte[] result = null;
byte[] result2 = null;
try {
result = "red:1:1.0".getBytes("UTF-8");
result2 = "blue:0:0.0".getBytes("UTF-8");
} catch (Exception e) {
;
}
BytesColumnVector out = (BytesColumnVector) b.cols[3];
int cmp = StringExpr.compare(result, 0, result.length, out.vector[1], out.start[1], out.length[1]);
assertEquals(0, cmp);
assertTrue(out.noNulls);
// with nulls
b = getBatchStrDblLongWithStrOut();
b.cols[1].noNulls = false;
vudf.evaluate(b);
out = (BytesColumnVector) b.cols[3];
assertFalse(out.noNulls);
assertTrue(out.isNull[1]);
// with all input columns repeating
b = getBatchStrDblLongWithStrOut();
b.cols[0].isRepeating = true;
b.cols[1].isRepeating = true;
b.cols[2].isRepeating = true;
vudf.evaluate(b);
out = (BytesColumnVector) b.cols[3];
assertTrue(out.isRepeating);
cmp = StringExpr.compare(result2, 0, result2.length, out.vector[0], out.start[0], out.length[0]);
assertEquals(0, cmp);
assertTrue(out.noNulls);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class TestVectorUDFAdaptor method testLongUDF.
@Test
public void testLongUDF() {
// create a syntax tree for a simple function call "longudf(col0)"
ExprNodeGenericFuncDesc funcDesc;
TypeInfo typeInfo = TypeInfoFactory.longTypeInfo;
GenericUDFBridge genericUDFBridge = new GenericUDFBridge("longudf", false, LongUDF.class.getName());
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
ExprNodeColumnDesc colDesc = new ExprNodeColumnDesc(typeInfo, "col0", "tablename", false);
children.add(colDesc);
VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[1];
argDescs[0] = new VectorUDFArgDesc();
argDescs[0].setVariable(0);
funcDesc = new ExprNodeGenericFuncDesc(typeInfo, genericUDFBridge, genericUDFBridge.getUdfName(), children);
// create the adaptor for this function call to work in vector mode
VectorUDFAdaptor vudf = null;
try {
vudf = new VectorUDFAdaptor(funcDesc, 1, "Long", argDescs);
} catch (HiveException e) {
// We should never get here.
assertTrue(false);
}
VectorizedRowBatch b = getBatchLongInLongOut();
vudf.evaluate(b);
// verify output
LongColumnVector out = (LongColumnVector) b.cols[1];
assertEquals(1000, out.vector[0]);
assertEquals(1001, out.vector[1]);
assertEquals(1002, out.vector[2]);
assertTrue(out.noNulls);
assertFalse(out.isRepeating);
// with nulls
b = getBatchLongInLongOut();
out = (LongColumnVector) b.cols[1];
b.cols[0].noNulls = false;
vudf.evaluate(b);
assertFalse(out.noNulls);
assertEquals(1000, out.vector[0]);
assertEquals(1001, out.vector[1]);
assertTrue(out.isNull[2]);
assertFalse(out.isRepeating);
// with repeating
b = getBatchLongInLongOut();
out = (LongColumnVector) b.cols[1];
b.cols[0].isRepeating = true;
vudf.evaluate(b);
// The implementation may or may not set output it isRepeting.
// That is implementation-defined.
assertTrue(b.cols[1].isRepeating && out.vector[0] == 1000 || !b.cols[1].isRepeating && out.vector[2] == 1000);
assertEquals(3, b.size);
}
Aggregations