use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual in project hive by apache.
the class TestVectorFilterCompare method doTestsWithDiffColumnScalar.
private void doTestsWithDiffColumnScalar(Random random, TypeInfo typeInfo1, TypeInfo typeInfo2, ColumnScalarMode columnScalarMode, Comparison comparison, boolean tryDecimal64) throws Exception {
String typeName1 = typeInfo1.getTypeName();
PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) typeInfo1).getPrimitiveCategory();
String typeName2 = typeInfo2.getTypeName();
PrimitiveCategory primitiveCategory2 = ((PrimitiveTypeInfo) typeInfo2).getPrimitiveCategory();
List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>();
List<String> columns = new ArrayList<String>();
int columnNum = 1;
ExprNodeDesc col1Expr;
Object scalar1Object = null;
final boolean decimal64Enable1 = checkDecimal64(tryDecimal64, typeInfo1);
if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
generationSpecList.add(GenerationSpec.createSameType(typeInfo1));
explicitDataTypePhysicalVariationList.add(decimal64Enable1 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
String columnName = "col" + (columnNum++);
col1Expr = new ExprNodeColumnDesc(typeInfo1, columnName, "table", false);
columns.add(columnName);
} else {
scalar1Object = VectorRandomRowSource.randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo1);
// Adjust the decimal type to the scalar's type...
if (typeInfo1 instanceof DecimalTypeInfo) {
typeInfo1 = getDecimalScalarTypeInfo(scalar1Object);
}
col1Expr = new ExprNodeConstantDesc(typeInfo1, scalar1Object);
}
ExprNodeDesc col2Expr;
Object scalar2Object = null;
final boolean decimal64Enable2 = checkDecimal64(tryDecimal64, typeInfo2);
if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
generationSpecList.add(GenerationSpec.createSameType(typeInfo2));
explicitDataTypePhysicalVariationList.add(decimal64Enable2 ? DataTypePhysicalVariation.DECIMAL_64 : DataTypePhysicalVariation.NONE);
String columnName = "col" + (columnNum++);
col2Expr = new ExprNodeColumnDesc(typeInfo2, columnName, "table", false);
columns.add(columnName);
} else {
scalar2Object = VectorRandomRowSource.randomPrimitiveObject(random, (PrimitiveTypeInfo) typeInfo2);
// Adjust the decimal type to the scalar's type...
if (typeInfo2 instanceof DecimalTypeInfo) {
typeInfo2 = getDecimalScalarTypeInfo(scalar2Object);
}
col2Expr = new ExprNodeConstantDesc(typeInfo2, scalar2Object);
}
List<ObjectInspector> objectInspectorList = new ArrayList<ObjectInspector>();
objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo1));
objectInspectorList.add(VectorRandomRowSource.getObjectInspector(typeInfo2));
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(col1Expr);
children.add(col2Expr);
// ----------------------------------------------------------------------------------------------
String[] columnNames = columns.toArray(new String[0]);
VectorRandomRowSource rowSource = new VectorRandomRowSource();
rowSource.initGenerationSpecSchema(random, generationSpecList, /* maxComplexDepth */
0, /* allowNull */
true, /* isUnicodeOk */
true, explicitDataTypePhysicalVariationList);
Object[][] randomRows = rowSource.randomRows(100000);
VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches(random, rowSource, randomRows, null);
GenericUDF genericUdf;
switch(comparison) {
case EQUALS:
genericUdf = new GenericUDFOPEqual();
break;
case LESS_THAN:
genericUdf = new GenericUDFOPLessThan();
break;
case LESS_THAN_EQUAL:
genericUdf = new GenericUDFOPEqualOrLessThan();
break;
case GREATER_THAN:
genericUdf = new GenericUDFOPGreaterThan();
break;
case GREATER_THAN_EQUAL:
genericUdf = new GenericUDFOPEqualOrGreaterThan();
break;
case NOT_EQUALS:
genericUdf = new GenericUDFOPNotEqual();
break;
default:
throw new RuntimeException("Unexpected arithmetic " + comparison);
}
ObjectInspector[] objectInspectors = objectInspectorList.toArray(new ObjectInspector[objectInspectorList.size()]);
ObjectInspector outputObjectInspector = null;
try {
outputObjectInspector = genericUdf.initialize(objectInspectors);
} catch (Exception e) {
Assert.fail(e.toString());
}
TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(outputObjectInspector);
ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(outputTypeInfo, genericUdf, children);
final int rowCount = randomRows.length;
Object[][] resultObjectsArray = new Object[FilterCompareTestMode.count][];
for (int i = 0; i < FilterCompareTestMode.count; i++) {
Object[] resultObjects = new Object[rowCount];
resultObjectsArray[i] = resultObjects;
FilterCompareTestMode filterCompareTestMode = FilterCompareTestMode.values()[i];
switch(filterCompareTestMode) {
case ROW_MODE:
doRowFilterCompareTest(typeInfo1, typeInfo2, columns, children, exprDesc, comparison, randomRows, columnScalarMode, rowSource.rowStructObjectInspector(), outputTypeInfo, resultObjects);
break;
case ADAPTOR:
case FILTER_VECTOR_EXPRESSION:
case COMPARE_VECTOR_EXPRESSION:
doVectorFilterCompareTest(typeInfo1, typeInfo2, columns, columnNames, rowSource.typeInfos(), rowSource.dataTypePhysicalVariations(), children, exprDesc, comparison, filterCompareTestMode, columnScalarMode, batchSource, exprDesc.getWritableObjectInspector(), outputTypeInfo, resultObjects);
break;
default:
throw new RuntimeException("Unexpected IF statement test mode " + filterCompareTestMode);
}
}
for (int i = 0; i < rowCount; i++) {
// Row-mode is the expected value.
Object expectedResult = resultObjectsArray[0][i];
for (int v = 1; v < FilterCompareTestMode.count; v++) {
FilterCompareTestMode filterCompareTestMode = FilterCompareTestMode.values()[v];
Object vectorResult = resultObjectsArray[v][i];
if (filterCompareTestMode == FilterCompareTestMode.FILTER_VECTOR_EXPRESSION && expectedResult == null && vectorResult != null) {
// This is OK.
boolean vectorBoolean = ((BooleanWritable) vectorResult).get();
if (vectorBoolean) {
Assert.fail("Row " + i + " typeName1 " + typeName1 + " typeName2 " + typeName2 + " outputTypeName " + outputTypeInfo.getTypeName() + " " + comparison + " " + filterCompareTestMode + " " + columnScalarMode + " result is NOT NULL and true" + " does not match row-mode expected result is NULL which means false here" + (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? " scalar1 " + scalar1Object.toString() : "") + " row values " + Arrays.toString(randomRows[i]) + (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? " scalar2 " + scalar2Object.toString() : ""));
}
} else if (expectedResult == null || vectorResult == null) {
if (expectedResult != null || vectorResult != null) {
Assert.fail("Row " + i + " typeName1 " + typeName1 + " typeName2 " + typeName2 + " outputTypeName " + outputTypeInfo.getTypeName() + " " + comparison + " " + filterCompareTestMode + " " + columnScalarMode + " result is NULL " + (vectorResult == null) + " does not match row-mode expected result is NULL " + (expectedResult == null) + (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? " scalar1 " + scalar1Object.toString() : "") + " row values " + Arrays.toString(randomRows[i]) + (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? " scalar2 " + scalar2Object.toString() : ""));
}
} else {
if (!expectedResult.equals(vectorResult)) {
Assert.fail("Row " + i + " typeName1 " + typeName1 + " typeName2 " + typeName2 + " outputTypeName " + outputTypeInfo.getTypeName() + " " + comparison + " " + filterCompareTestMode + " " + columnScalarMode + " result " + vectorResult.toString() + " (" + vectorResult.getClass().getSimpleName() + ")" + " does not match row-mode expected result " + expectedResult.toString() + " (" + expectedResult.getClass().getSimpleName() + ")" + (columnScalarMode == ColumnScalarMode.SCALAR_COLUMN ? " scalar1 " + scalar1Object.toString() : "") + " row values " + Arrays.toString(randomRows[i]) + (columnScalarMode == ColumnScalarMode.COLUMN_SCALAR ? " scalar2 " + scalar2Object.toString() : ""));
}
}
}
}
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual in project hive by apache.
the class ConstantPropagateProcFactory method shortcutFunction.
private static ExprNodeDesc shortcutFunction(GenericUDF udf, List<ExprNodeDesc> newExprs, Operator<? extends Serializable> op) throws UDFArgumentException {
if (udf instanceof GenericUDFOPEqual) {
assert newExprs.size() == 2;
boolean foundUDFInFirst = false;
ExprNodeGenericFuncDesc caseOrWhenexpr = null;
if (newExprs.get(0) instanceof ExprNodeGenericFuncDesc) {
caseOrWhenexpr = (ExprNodeGenericFuncDesc) newExprs.get(0);
if (caseOrWhenexpr.getGenericUDF() instanceof GenericUDFWhen || caseOrWhenexpr.getGenericUDF() instanceof GenericUDFCase) {
foundUDFInFirst = true;
}
}
if (!foundUDFInFirst && newExprs.get(1) instanceof ExprNodeGenericFuncDesc) {
caseOrWhenexpr = (ExprNodeGenericFuncDesc) newExprs.get(1);
if (!(caseOrWhenexpr.getGenericUDF() instanceof GenericUDFWhen || caseOrWhenexpr.getGenericUDF() instanceof GenericUDFCase)) {
return null;
}
}
if (null == caseOrWhenexpr) {
// we didn't find case or when udf
return null;
}
GenericUDF childUDF = caseOrWhenexpr.getGenericUDF();
List<ExprNodeDesc> children = new ArrayList(caseOrWhenexpr.getChildren());
int i;
if (childUDF instanceof GenericUDFWhen) {
for (i = 1; i < children.size(); i += 2) {
children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
}
if (children.size() % 2 == 1) {
i = children.size() - 1;
children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
}
// after constant folding of child expression the return type of UDFWhen might have changed,
// so recreate the expression
ExprNodeGenericFuncDesc newCaseOrWhenExpr = ExprNodeGenericFuncDesc.newInstance(childUDF, caseOrWhenexpr.getFuncText(), children);
return newCaseOrWhenExpr;
} else if (childUDF instanceof GenericUDFCase) {
for (i = 2; i < children.size(); i += 2) {
children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
}
if (children.size() % 2 == 0) {
i = children.size() - 1;
children.set(i, ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), Lists.newArrayList(children.get(i), newExprs.get(foundUDFInFirst ? 1 : 0))));
}
// after constant folding of child expression the return type of UDFCase might have changed,
// so recreate the expression
ExprNodeGenericFuncDesc newCaseOrWhenExpr = ExprNodeGenericFuncDesc.newInstance(childUDF, caseOrWhenexpr.getFuncText(), children);
return newCaseOrWhenExpr;
} else {
// cant happen
return null;
}
}
if (udf instanceof GenericUDFOPAnd) {
final BitSet positionsToRemove = new BitSet();
final List<ExprNodeDesc> notNullExprs = new ArrayList<ExprNodeDesc>();
final List<Integer> notNullExprsPositions = new ArrayList<Integer>();
final List<ExprNodeDesc> compareExprs = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < newExprs.size(); i++) {
ExprNodeDesc childExpr = newExprs.get(i);
if (childExpr instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr;
if (Boolean.TRUE.equals(c.getValue())) {
// if true, prune it
positionsToRemove.set(i);
} else {
if (Boolean.FALSE.equals(c.getValue())) {
// if false, return false
return childExpr;
}
}
} else if (childExpr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFOPNotNull && childExpr.getChildren().get(0) instanceof ExprNodeColumnDesc) {
notNullExprs.add(childExpr.getChildren().get(0));
notNullExprsPositions.add(i);
} else if (childExpr instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFBaseCompare && !(((ExprNodeGenericFuncDesc) childExpr).getGenericUDF() instanceof GenericUDFOPNotEqual) && childExpr.getChildren().size() == 2) {
// Try to fold (key <op> 86) and (key is not null) to (key <op> 86)
// where <op> can be "=", ">=", "<=", ">", "<".
// Note: (key <> 86) and (key is not null) cannot be folded
ExprNodeColumnDesc colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(0));
if (null == colDesc) {
colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(1));
}
if (colDesc != null) {
compareExprs.add(colDesc);
}
}
}
// Try to fold (key = 86) and (key is not null) to (key = 86)
for (int i = 0; i < notNullExprs.size(); i++) {
for (ExprNodeDesc other : compareExprs) {
if (notNullExprs.get(i).isSame(other)) {
positionsToRemove.set(notNullExprsPositions.get(i));
break;
}
}
}
// Remove unnecessary expressions
int pos = 0;
int removed = 0;
while ((pos = positionsToRemove.nextSetBit(pos)) != -1) {
newExprs.remove(pos - removed);
pos++;
removed++;
}
if (newExprs.size() == 0) {
return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.TRUE);
}
if (newExprs.size() == 1) {
return newExprs.get(0);
}
}
if (udf instanceof GenericUDFOPOr) {
final BitSet positionsToRemove = new BitSet();
for (int i = 0; i < newExprs.size(); i++) {
ExprNodeDesc childExpr = newExprs.get(i);
if (childExpr instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc c = (ExprNodeConstantDesc) childExpr;
if (Boolean.FALSE.equals(c.getValue())) {
// if false, prune it
positionsToRemove.set(i);
} else if (Boolean.TRUE.equals(c.getValue())) {
// if true return true
return childExpr;
}
}
}
int pos = 0;
int removed = 0;
while ((pos = positionsToRemove.nextSetBit(pos)) != -1) {
newExprs.remove(pos - removed);
pos++;
removed++;
}
if (newExprs.size() == 0) {
return new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, Boolean.FALSE);
}
if (newExprs.size() == 1) {
return newExprs.get(0);
}
}
if (udf instanceof GenericUDFWhen) {
if (!(newExprs.size() == 2 || newExprs.size() == 3)) {
// we currently only handle either 1 or 2 branch.
return null;
}
ExprNodeDesc thenExpr = newExprs.get(1);
ExprNodeDesc elseExpr = newExprs.size() == 3 ? newExprs.get(2) : new ExprNodeConstantDesc(newExprs.get(1).getTypeInfo(), null);
ExprNodeDesc whenExpr = newExprs.get(0);
if (whenExpr instanceof ExprNodeConstantDesc) {
Boolean whenVal = (Boolean) ((ExprNodeConstantDesc) whenExpr).getValue();
return (whenVal == null || Boolean.FALSE.equals(whenVal)) ? elseExpr : thenExpr;
}
if (thenExpr instanceof ExprNodeConstantDesc && elseExpr instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constThen = (ExprNodeConstantDesc) thenExpr;
ExprNodeConstantDesc constElse = (ExprNodeConstantDesc) elseExpr;
Object thenVal = constThen.getValue();
Object elseVal = constElse.getValue();
if (thenVal == null) {
if (elseVal == null) {
// both branches are null.
return thenExpr;
} else if (op instanceof FilterOperator) {
// we can still fold, since here null is equivalent to false.
return Boolean.TRUE.equals(elseVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), newExprs.subList(0, 1)) : Boolean.FALSE.equals(elseVal) ? elseExpr : null;
} else {
// can't do much, expression is not in context of filter, so we can't treat null as equivalent to false here.
return null;
}
} else if (elseVal == null && op instanceof FilterOperator) {
return Boolean.TRUE.equals(thenVal) ? whenExpr : Boolean.FALSE.equals(thenVal) ? thenExpr : null;
} else if (thenVal.equals(elseVal)) {
return thenExpr;
} else if (thenVal instanceof Boolean && elseVal instanceof Boolean) {
List<ExprNodeDesc> children = new ArrayList<>();
children.add(whenExpr);
children.add(new ExprNodeConstantDesc(false));
ExprNodeGenericFuncDesc func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFCoalesce(), children);
if (Boolean.TRUE.equals(thenVal)) {
return func;
} else {
List<ExprNodeDesc> exprs = new ArrayList<>();
exprs.add(func);
return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), exprs);
}
} else {
return null;
}
}
}
if (udf instanceof GenericUDFCase) {
// where ss_sold_date= '1998-01-01' ;
if (!(newExprs.size() == 3 || newExprs.size() == 4)) {
// we currently only handle either 1 or 2 branch.
return null;
}
ExprNodeDesc thenExpr = newExprs.get(2);
ExprNodeDesc elseExpr = newExprs.size() == 4 ? newExprs.get(3) : new ExprNodeConstantDesc(newExprs.get(2).getTypeInfo(), null);
if (thenExpr instanceof ExprNodeConstantDesc && elseExpr instanceof ExprNodeConstantDesc) {
ExprNodeConstantDesc constThen = (ExprNodeConstantDesc) thenExpr;
ExprNodeConstantDesc constElse = (ExprNodeConstantDesc) elseExpr;
Object thenVal = constThen.getValue();
Object elseVal = constElse.getValue();
if (thenVal == null) {
if (null == elseVal) {
return thenExpr;
} else if (op instanceof FilterOperator) {
return Boolean.TRUE.equals(elseVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNotEqual(), newExprs.subList(0, 2)) : Boolean.FALSE.equals(elseVal) ? elseExpr : null;
} else {
return null;
}
} else if (null == elseVal && op instanceof FilterOperator) {
return Boolean.TRUE.equals(thenVal) ? ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), newExprs.subList(0, 2)) : Boolean.FALSE.equals(thenVal) ? thenExpr : null;
} else if (thenVal.equals(elseVal)) {
return thenExpr;
} else if (thenVal instanceof Boolean && elseVal instanceof Boolean) {
ExprNodeGenericFuncDesc equal = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPEqual(), newExprs.subList(0, 2));
List<ExprNodeDesc> children = new ArrayList<>();
children.add(equal);
children.add(new ExprNodeConstantDesc(false));
ExprNodeGenericFuncDesc func = ExprNodeGenericFuncDesc.newInstance(new GenericUDFCoalesce(), children);
if (Boolean.TRUE.equals(thenVal)) {
return func;
} else {
List<ExprNodeDesc> exprs = new ArrayList<>();
exprs.add(func);
return ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), exprs);
}
} else {
return null;
}
}
}
if (udf instanceof GenericUDFUnixTimeStamp) {
if (newExprs.size() >= 1) {
// unix_timestamp(args) -> to_unix_timestamp(args)
return ExprNodeGenericFuncDesc.newInstance(new GenericUDFToUnixTimeStamp(), newExprs);
}
}
return null;
}
Aggregations