use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class TestVectorizationContext method testVectorizeAndOrProjectionExpression.
@Test
public void testVectorizeAndOrProjectionExpression() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10));
GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc();
greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
greaterExprDesc.setGenericUDF(udf);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
children1.add(col1Expr);
children1.add(constDesc);
greaterExprDesc.setChildren(children1);
ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false);
GenericUDFOPAnd andUdf = new GenericUDFOPAnd();
ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc();
andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
andExprDesc.setGenericUDF(andUdf);
List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
children3.add(greaterExprDesc);
children3.add(col2Expr);
andExprDesc.setChildren(children3);
List<String> columns = new ArrayList<String>();
columns.add("col1");
columns.add("col2");
VectorizationContext vc = new VectorizationContext("name", columns);
VectorExpression veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertEquals(veAnd.getClass(), FilterExprAndExpr.class);
assertEquals(veAnd.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
assertEquals(veAnd.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertEquals(veAnd.getClass(), ColAndCol.class);
assertEquals(1, veAnd.getChildExpressions().length);
assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
assertEquals(2, ((ColAndCol) veAnd).getColNum1());
assertEquals(1, ((ColAndCol) veAnd).getColNum2());
assertEquals(3, ((ColAndCol) veAnd).getOutputColumn());
//OR
GenericUDFOPOr orUdf = new GenericUDFOPOr();
ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc();
orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
orExprDesc.setGenericUDF(orUdf);
List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2);
children4.add(greaterExprDesc);
children4.add(col2Expr);
orExprDesc.setChildren(children4);
//Allocate new Vectorization context to reset the intermediate columns.
vc = new VectorizationContext("name", columns);
VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertEquals(veOr.getClass(), FilterExprOrExpr.class);
assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
assertEquals(veOr.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertEquals(veOr.getClass(), ColOrCol.class);
assertEquals(1, veAnd.getChildExpressions().length);
assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
assertEquals(2, ((ColOrCol) veOr).getColNum1());
assertEquals(1, ((ColOrCol) veOr).getColNum2());
assertEquals(3, ((ColOrCol) veOr).getOutputColumn());
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class TestVectorizationContext method testMathFunctions.
@Test
public void testMathFunctions() throws HiveException {
ExprNodeGenericFuncDesc mathFuncExpr = new ExprNodeGenericFuncDesc();
mathFuncExpr.setTypeInfo(TypeInfoFactory.doubleTypeInfo);
ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc(Integer.class, "a", "table", false);
ExprNodeColumnDesc colDesc2 = new ExprNodeColumnDesc(Double.class, "b", "table", false);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
List<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>();
children1.add(colDesc1);
children2.add(colDesc2);
List<String> columns = new ArrayList<String>();
columns.add("b");
columns.add("a");
VectorizationContext vc = new VectorizationContext("name", columns);
// Sin(double)
GenericUDFBridge gudfBridge = new GenericUDFBridge("sin", false, UDFSin.class.getName());
mathFuncExpr.setGenericUDF(gudfBridge);
mathFuncExpr.setChildren(children2);
VectorExpression ve = vc.getVectorExpression(mathFuncExpr, VectorExpressionDescriptor.Mode.PROJECTION);
Assert.assertEquals(FuncSinDoubleToDouble.class, ve.getClass());
// Round without digits
GenericUDFRound udfRound = new GenericUDFRound();
mathFuncExpr.setGenericUDF(udfRound);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncRoundDoubleToDouble.class, ve.getClass());
// BRound without digits
GenericUDFBRound udfBRound = new GenericUDFBRound();
mathFuncExpr.setGenericUDF(udfBRound);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncBRoundDoubleToDouble.class, ve.getClass());
// Round with digits
mathFuncExpr.setGenericUDF(udfRound);
children2.add(new ExprNodeConstantDesc(4));
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(RoundWithNumDigitsDoubleToDouble.class, ve.getClass());
Assert.assertEquals(4, ((RoundWithNumDigitsDoubleToDouble) ve).getDecimalPlaces().get());
// BRound with digits
mathFuncExpr.setGenericUDF(udfBRound);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(BRoundWithNumDigitsDoubleToDouble.class, ve.getClass());
Assert.assertEquals(4, ((BRoundWithNumDigitsDoubleToDouble) ve).getDecimalPlaces().get());
// Logger with int base
gudfBridge = new GenericUDFBridge("log", false, UDFLog.class.getName());
mathFuncExpr.setGenericUDF(gudfBridge);
children2.clear();
children2.add(new ExprNodeConstantDesc(4.0));
children2.add(colDesc2);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncLogWithBaseDoubleToDouble.class, ve.getClass());
Assert.assertTrue(4 == ((FuncLogWithBaseDoubleToDouble) ve).getBase());
// Logger with default base
children2.clear();
children2.add(colDesc2);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncLnDoubleToDouble.class, ve.getClass());
//Log with double base
children2.clear();
children2.add(new ExprNodeConstantDesc(4.5));
children2.add(colDesc2);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncLogWithBaseDoubleToDouble.class, ve.getClass());
Assert.assertTrue(4.5 == ((FuncLogWithBaseDoubleToDouble) ve).getBase());
//Log with int input and double base
children2.clear();
children2.add(new ExprNodeConstantDesc(4.5));
children2.add(colDesc1);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncLogWithBaseLongToDouble.class, ve.getClass());
Assert.assertTrue(4.5 == ((FuncLogWithBaseLongToDouble) ve).getBase());
//Power with double power
children2.clear();
children2.add(colDesc2);
children2.add(new ExprNodeConstantDesc(4.5));
mathFuncExpr.setGenericUDF(new GenericUDFPower());
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncPowerDoubleToDouble.class, ve.getClass());
Assert.assertTrue(4.5 == ((FuncPowerDoubleToDouble) ve).getPower());
//Round with default decimal places
mathFuncExpr.setGenericUDF(udfRound);
children2.clear();
children2.add(colDesc2);
mathFuncExpr.setChildren(children2);
ve = vc.getVectorExpression(mathFuncExpr);
Assert.assertEquals(FuncRoundDoubleToDouble.class, ve.getClass());
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class TestVectorGroupByOperator method testMultiKey.
private void testMultiKey(String aggregateName, FakeVectorRowBatchFromObjectIterables data, HashMap<Object, Object> expected) throws HiveException {
Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
Set<Object> keys = new HashSet<Object>();
// The types array tells us the number of columns in the data
final String[] columnTypes = data.getTypes();
// Columns 0..N-1 are keys. Column N is the aggregate value input
int i = 0;
for (; i < columnTypes.length - 1; ++i) {
String columnName = String.format("_col%d", i);
mapColumnNames.put(columnName, i);
outputColumnNames.add(columnName);
}
mapColumnNames.put("value", i);
outputColumnNames.add("value");
VectorizationContext ctx = new VectorizationContext("name", outputColumnNames);
ArrayList<AggregationDesc> aggs = new ArrayList(1);
aggs.add(buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "value", TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
for (i = 0; i < columnTypes.length - 1; ++i) {
String columnName = String.format("_col%d", i);
keysDesc.add(buildColumnDesc(ctx, columnName, TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
}
GroupByDesc desc = new GroupByDesc();
desc.setVectorDesc(new VectorGroupByDesc());
desc.setOutputColumnNames(outputColumnNames);
desc.setAggregators(aggs);
desc.setKeys(keysDesc);
((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH);
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {
private int rowIndex;
private String aggregateName;
private Map<Object, Object> expected;
private Set<Object> keys;
@Override
public void inspectRow(Object row, int tag) throws HiveException {
assertTrue(row instanceof Object[]);
Object[] fields = (Object[]) row;
assertEquals(columnTypes.length, fields.length);
ArrayList<Object> keyValue = new ArrayList<Object>(columnTypes.length - 1);
for (int i = 0; i < columnTypes.length - 1; ++i) {
Object key = fields[i];
if (null == key) {
keyValue.add(null);
} else if (key instanceof Text) {
Text txKey = (Text) key;
keyValue.add(txKey.toString());
} else if (key instanceof ByteWritable) {
ByteWritable bwKey = (ByteWritable) key;
keyValue.add(bwKey.get());
} else if (key instanceof ShortWritable) {
ShortWritable swKey = (ShortWritable) key;
keyValue.add(swKey.get());
} else if (key instanceof IntWritable) {
IntWritable iwKey = (IntWritable) key;
keyValue.add(iwKey.get());
} else if (key instanceof LongWritable) {
LongWritable lwKey = (LongWritable) key;
keyValue.add(lwKey.get());
} else if (key instanceof TimestampWritable) {
TimestampWritable twKey = (TimestampWritable) key;
keyValue.add(twKey.getTimestamp());
} else if (key instanceof DoubleWritable) {
DoubleWritable dwKey = (DoubleWritable) key;
keyValue.add(dwKey.get());
} else if (key instanceof FloatWritable) {
FloatWritable fwKey = (FloatWritable) key;
keyValue.add(fwKey.get());
} else if (key instanceof BooleanWritable) {
BooleanWritable bwKey = (BooleanWritable) key;
keyValue.add(bwKey.get());
} else {
Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
}
}
String keyAsString = Arrays.deepToString(keyValue.toArray());
assertTrue(expected.containsKey(keyValue));
Object expectedValue = expected.get(keyValue);
Object value = fields[columnTypes.length - 1];
Validator validator = getValidator(aggregateName);
validator.validate(keyAsString, expectedValue, new Object[] { value });
keys.add(keyValue);
}
private FakeCaptureOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
this.aggregateName = aggregateName;
this.expected = expected;
this.keys = keys;
return this;
}
}.init(aggregateName, expected, keys));
for (VectorizedRowBatch unit : data) {
vgo.process(unit, 0);
}
vgo.close(false);
List<Object> outBatchList = out.getCapturedRows();
assertNotNull(outBatchList);
assertEquals(expected.size(), outBatchList.size());
assertEquals(expected.size(), keys.size());
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class TestVectorGroupByOperator method buildAggregationDescCountStar.
private static AggregationDesc buildAggregationDescCountStar(VectorizationContext ctx) {
AggregationDesc agg = new AggregationDesc();
agg.setGenericUDAFName("COUNT");
agg.setMode(GenericUDAFEvaluator.Mode.PARTIAL1);
agg.setParameters(new ArrayList<ExprNodeDesc>());
return agg;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class TestVectorGroupByOperator method buildAggregationDesc.
private static AggregationDesc buildAggregationDesc(VectorizationContext ctx, String aggregate, GenericUDAFEvaluator.Mode mode, String column, TypeInfo typeInfo) {
ExprNodeDesc inputColumn = buildColumnDesc(ctx, column, typeInfo);
ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
params.add(inputColumn);
AggregationDesc agg = new AggregationDesc();
agg.setGenericUDAFName(aggregate);
agg.setMode(mode);
agg.setParameters(params);
return agg;
}
Aggregations