use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMultiply in project hive by apache.
the class TestVectorizationContext method testArithmeticExpressionVectorization.
@Test
public void testArithmeticExpressionVectorization() throws HiveException {
/**
* Create original expression tree for following
* (plus (minus (plus col1 col2) col3) (multiply col4 (mod col5 col6)) )
*/
GenericUDFOPPlus udf1 = new GenericUDFOPPlus();
GenericUDFOPMinus udf2 = new GenericUDFOPMinus();
GenericUDFOPMultiply udf3 = new GenericUDFOPMultiply();
GenericUDFOPPlus udf4 = new GenericUDFOPPlus();
GenericUDFOPMod udf5 = new GenericUDFOPMod();
ExprNodeGenericFuncDesc sumExpr = new ExprNodeGenericFuncDesc();
sumExpr.setTypeInfo(TypeInfoFactory.intTypeInfo);
sumExpr.setGenericUDF(udf1);
ExprNodeGenericFuncDesc minusExpr = new ExprNodeGenericFuncDesc();
minusExpr.setTypeInfo(TypeInfoFactory.intTypeInfo);
minusExpr.setGenericUDF(udf2);
ExprNodeGenericFuncDesc multiplyExpr = new ExprNodeGenericFuncDesc();
multiplyExpr.setTypeInfo(TypeInfoFactory.intTypeInfo);
multiplyExpr.setGenericUDF(udf3);
ExprNodeGenericFuncDesc sum2Expr = new ExprNodeGenericFuncDesc();
sum2Expr.setTypeInfo(TypeInfoFactory.intTypeInfo);
sum2Expr.setGenericUDF(udf4);
ExprNodeGenericFuncDesc modExpr = new ExprNodeGenericFuncDesc();
modExpr.setTypeInfo(TypeInfoFactory.intTypeInfo);
modExpr.setGenericUDF(udf5);
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false);
ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Long.class, "col2", "table", false);
ExprNodeColumnDesc col3Expr = new ExprNodeColumnDesc(Long.class, "col3", "table", false);
ExprNodeColumnDesc col4Expr = new ExprNodeColumnDesc(Long.class, "col4", "table", false);
ExprNodeColumnDesc col5Expr = new ExprNodeColumnDesc(Long.class, "col5", "table", false);
ExprNodeColumnDesc col6Expr = new ExprNodeColumnDesc(Long.class, "col6", "table", false);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
List<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>(2);
List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2);
List<ExprNodeDesc> children5 = new ArrayList<ExprNodeDesc>(2);
children1.add(minusExpr);
children1.add(multiplyExpr);
sumExpr.setChildren(children1);
children2.add(sum2Expr);
children2.add(col3Expr);
minusExpr.setChildren(children2);
children3.add(col1Expr);
children3.add(col2Expr);
sum2Expr.setChildren(children3);
children4.add(col4Expr);
children4.add(modExpr);
multiplyExpr.setChildren(children4);
children5.add(col5Expr);
children5.add(col6Expr);
modExpr.setChildren(children5);
VectorizationContext vc = new VectorizationContext("name");
vc.addInitialColumn("col1");
vc.addInitialColumn("col2");
vc.addInitialColumn("col3");
vc.addInitialColumn("col4");
vc.addInitialColumn("col5");
vc.addInitialColumn("col6");
vc.finishedAddingInitialColumns();
// Generate vectorized expression
VectorExpression ve = vc.getVectorExpression(sumExpr, VectorExpressionDescriptor.Mode.PROJECTION);
// Verify vectorized expression
assertTrue(ve instanceof LongColAddLongColumn);
assertEquals(2, ve.getChildExpressions().length);
VectorExpression childExpr1 = ve.getChildExpressions()[0];
VectorExpression childExpr2 = ve.getChildExpressions()[1];
System.out.println(ve.toString());
// TODO: HIVE-20985 disabled output column reuse
// assertEquals(6, ve.getOutputColumnNum());
assertEquals(10, ve.getOutputColumnNum());
assertTrue(childExpr1 instanceof LongColSubtractLongColumn);
assertEquals(1, childExpr1.getChildExpressions().length);
assertTrue(childExpr1.getChildExpressions()[0] instanceof LongColAddLongColumn);
assertEquals(7, childExpr1.getOutputColumnNum());
assertEquals(6, childExpr1.getChildExpressions()[0].getOutputColumnNum());
assertTrue(childExpr2 instanceof LongColMultiplyLongColumn);
assertEquals(1, childExpr2.getChildExpressions().length);
assertTrue(childExpr2.getChildExpressions()[0] instanceof LongColModuloLongColumn);
// TODO: HIVE-20985 disabled output column reuse
// assertEquals(8, childExpr2.getOutputColumnNum());
// assertEquals(6, childExpr2.getChildExpressions()[0].getOutputColumnNum());
assertEquals(9, childExpr2.getOutputColumnNum());
assertEquals(8, childExpr2.getChildExpressions()[0].getOutputColumnNum());
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPMultiply in project hive by apache.
the class TestVectorArithmetic method doTestsWithDiffColumnScalar.
private void doTestsWithDiffColumnScalar(Random random, TypeInfo typeInfo1, TypeInfo typeInfo2, ColumnScalarMode columnScalarMode, Arithmetic arithmetic, boolean tryDecimal64) throws Exception {
String typeName1 = typeInfo1.getTypeName();
PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) typeInfo1).getPrimitiveCategory();
String typeName2 = typeInfo2.getTypeName();
PrimitiveCategory primitiveCategory2 = ((PrimitiveTypeInfo) typeInfo2).getPrimitiveCategory();
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
List<String> columns = new ArrayList<String>();
int columnNum = 1;
ExprNodeDesc col1Expr;
Object scalar1Object = null;
final boolean decimal64Enable1 = checkDecimal64(tryDecimal64, typeInfo1);
if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
generationSpecList.add(GenerationSpec.createSameType(typeInfo1));
explicitDataTypePhysicalVariationList.add(decimal64Enable1 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
String columnName = "col" + (columnNum++);
col1Expr = new ExprNodeColumnDesc(typeInfo1, columnName, "table", false);
columns.add(columnName);
} else {
scalar1Object = VectorRandomRowSource.randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo1);
// Adjust the decimal type to the scalar's type...
if (typeInfo1 instanceof DecimalTypeInfo) {
typeInfo1 = getDecimalScalarTypeInfo(scalar1Object);
}
col1Expr = new ExprNodeConstantDesc(typeInfo1, scalar1Object);
}
ExprNodeDesc col2Expr;
Object scalar2Object = null;
final boolean decimal64Enable2 = checkDecimal64(tryDecimal64, typeInfo2);
if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
generationSpecList.add(GenerationSpec.createSameType(typeInfo2));
explicitDataTypePhysicalVariationList.add(decimal64Enable2 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
String columnName = "col" + (columnNum++);
col2Expr = new ExprNodeColumnDesc(typeInfo2, columnName, "table", false);
columns.add(columnName);
} else {
scalar2Object = VectorRandomRowSource.randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo2);
// Adjust the decimal type to the scalar's type...
if (typeInfo2 instanceof DecimalTypeInfo) {
typeInfo2 = getDecimalScalarTypeInfo(scalar2Object);
}
col2Expr = new ExprNodeConstantDesc(typeInfo2, scalar2Object);
}
List<ObjectInspector> objectInspectorList = new ArrayList<ObjectInspector>();
objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo1));
objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo2));
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(col1Expr);
children.add(col2Expr);
// ----------------------------------------------------------------------------------------------
String[] columnNames = columns.toArray(new String[0]);
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] randomRows = rowSource.randomRows(100000);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
GenericUDF genericUdf;
switch(arithmetic) {
case ADD:
genericUdf = new GenericUDFOPPlus();
break;
case SUBTRACT:
genericUdf = new GenericUDFOPMinus();
break;
case MULTIPLY:
genericUdf = new GenericUDFOPMultiply();
break;
case DIVIDE:
genericUdf = new GenericUDFOPDivide();
break;
case MODULUS:
genericUdf = new GenericUDFOPMod();
break;
default:
throw new RuntimeException("Unexpected arithmetic " + arithmetic);
}
ObjectInspector[] objectInspectors = objectInspectorList.toArray(new ObjectInspector[objectInspectorList.size()]);
ObjectInspector outputObjectInspector = null;
try {
outputObjectInspector = genericUdf.initialize(objectInspectors);
} catch (Exception e) {
Assert.fail(e.toString());
}
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(outputTypeInfo, genericUdf, children);
final int rowCount = randomRows.length;
Object[][] resultObjectsArray = new Object[ArithmeticTestMode.count][];
for (int i = 0; i < ArithmeticTestMode.count; i++) {
Object[] resultObjects = new Object[rowCount];
resultObjectsArray[i] = resultObjects;
ArithmeticTestMode arithmeticTestMode = ArithmeticTestMode.values()[i];
switch(arithmeticTestMode) {
case ROW_MODE:
doRowArithmeticTest(typeInfo1, typeInfo2, columns, children, exprDesc, arithmetic, randomRows, columnScalarMode, rowSource.rowStructObjectInspector(), outputTypeInfo, resultObjects);
break;
case ADAPTOR:
case VECTOR_EXPRESSION:
doVectorArithmeticTest(typeInfo1, typeInfo2, columns, columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), children, exprDesc, arithmetic, arithmeticTestMode, columnScalarMode, batchSource, exprDesc.getWritableObjectInspector(), outputTypeInfo, resultObjects);
break;
default:
throw new RuntimeException("Unexpected IF statement test mode " + arithmeticTestMode);
}
}
for (int i = 0; i < rowCount; i++) {
// Row-mode is the expected value.
Object expectedResult = resultObjectsArray[0][i];
for (int v = 1; v < ArithmeticTestMode.count; v++) {
Object vectorResult = resultObjectsArray[v][i];
if (expectedResult == null || vectorResult == null) {
if (expectedResult != null || vectorResult != null) {
Assert.fail("Row " + i + " typeName1 " + typeName1 + " typeName2 " + typeName2 + " outputTypeName " + outputTypeInfo.getTypeName() + " " + arithmetic + " " + ArithmeticTestMode.values()[v] + " " + columnScalarMode + " result is NULL " + (vectorResult == null) + " does not match row-mode expected result is NULL " + (expectedResult == null) + (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? " scalar1 " + scalar1Object.toString() : "") + " row values " + Arrays.toString(randomRows[i]) + (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? " scalar2 " + scalar2Object.toString() : ""));
}
} else {
if (!expectedResult.equals(vectorResult)) {
Assert.fail("Row " + i + " typeName1 " + typeName1 + " typeName2 " + typeName2 + " outputTypeName " + outputTypeInfo.getTypeName() + " " + arithmetic + " " + ArithmeticTestMode.values()[v] + " " + columnScalarMode + " result " + vectorResult.toString() + " (" + vectorResult.getClass().getSimpleName() + ")" + " does not match row-mode expected result " + expectedResult.toString() + " (" + expectedResult.getClass().getSimpleName() + ")" + (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? " scalar1 " + scalar1Object.toString() : "") + " row values " + Arrays.toString(randomRows[i]) + (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? " scalar2 " + scalar2Object.toString() : ""));
}
}
}
}
}
Aggregations