use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd in project hive by apache.
the class PartitionPruner method compactExpr.
/**
* Taking a partition pruning expression, remove the null operands and non-partition columns.
* The reason why there are null operands is ExprProcFactory classes, for example
* PPRColumnExprProcessor.
* @param expr original partition pruning expression.
* @return partition pruning expression that only contains partition columns.
*/
@VisibleForTesting
static ExprNodeDesc compactExpr(ExprNodeDesc expr) {
// If this is a constant boolean expression, return the value.
if (expr == null) {
return null;
}
if (expr instanceof ExprNodeConstantDesc) {
if (((ExprNodeConstantDesc) expr).getValue() == null)
return null;
if (!isBooleanExpr(expr)) {
throw new IllegalStateException("Unexpected non-boolean ExprNodeConstantDesc: " + expr.getExprString());
}
return expr;
} else if (expr instanceof ExprNodeGenericFuncDesc) {
GenericUDF udf = ((ExprNodeGenericFuncDesc) expr).getGenericUDF();
boolean isAnd = udf instanceof GenericUDFOPAnd;
boolean isOr = udf instanceof GenericUDFOPOr;
List<ExprNodeDesc> children = expr.getChildren();
if (isAnd) {
// Non-partition expressions are converted to nulls.
List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
boolean allTrue = true;
for (ExprNodeDesc child : children) {
ExprNodeDesc compactChild = compactExpr(child);
if (compactChild != null) {
if (!isTrueExpr(compactChild)) {
newChildren.add(compactChild);
allTrue = false;
}
if (isFalseExpr(compactChild)) {
return new ExprNodeConstantDesc(Boolean.FALSE);
}
} else {
allTrue = false;
}
}
if (allTrue) {
return new ExprNodeConstantDesc(Boolean.TRUE);
}
if (newChildren.size() == 0) {
return null;
}
if (newChildren.size() == 1) {
return newChildren.get(0);
}
// Nothing to compact, update expr with compacted children.
((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
} else if (isOr) {
// Non-partition expressions are converted to nulls.
List<ExprNodeDesc> newChildren = new ArrayList<ExprNodeDesc>();
boolean allFalse = true;
boolean isNull = false;
for (ExprNodeDesc child : children) {
ExprNodeDesc compactChild = compactExpr(child);
if (compactChild != null) {
if (isTrueExpr(compactChild)) {
return new ExprNodeConstantDesc(Boolean.TRUE);
}
if (!isNull && !isFalseExpr(compactChild)) {
newChildren.add(compactChild);
allFalse = false;
}
} else {
isNull = true;
}
}
if (isNull) {
return null;
}
if (allFalse) {
return new ExprNodeConstantDesc(Boolean.FALSE);
}
if (newChildren.size() == 1) {
return newChildren.get(0);
}
// Nothing to compact, update expr with compacted children.
((ExprNodeGenericFuncDesc) expr).setChildren(newChildren);
}
return expr;
} else {
throw new IllegalStateException("Unexpected type of ExprNodeDesc: " + expr.getExprString());
}
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd in project hive by apache.
the class SharedWorkOptimizer method pushFilterToTopOfTableScan.
private static void pushFilterToTopOfTableScan(SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp) throws UDFArgumentException {
ExprNodeGenericFuncDesc tableScanExprNode = tsOp.getConf().getFilterExpr();
List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(tsOp.getChildOperators());
for (Operator<? extends OperatorDesc> op : allChildren) {
if (op instanceof FilterOperator) {
FilterOperator filterOp = (FilterOperator) op;
ExprNodeDesc filterExprNode = filterOp.getConf().getPredicate();
if (tableScanExprNode.isSame(filterExprNode)) {
// We do not need to do anything
return;
}
if (tableScanExprNode.getGenericUDF() instanceof GenericUDFOPOr) {
for (ExprNodeDesc childExprNode : tableScanExprNode.getChildren()) {
if (childExprNode.isSame(filterExprNode)) {
// so probably we pushed previously
return;
}
}
}
ExprNodeGenericFuncDesc newPred = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPAnd(), Arrays.<ExprNodeDesc>asList(tableScanExprNode.clone(), filterExprNode));
filterOp.getConf().setPredicate(newPred);
} else {
Operator<FilterDesc> newOp = OperatorFactory.get(tsOp.getCompilationOpContext(), new FilterDesc(tableScanExprNode.clone(), false), new RowSchema(tsOp.getSchema().getSignature()));
tsOp.replaceChild(op, newOp);
newOp.getParentOperators().add(tsOp);
op.replaceParent(tsOp, newOp);
newOp.getChildOperators().add(op);
// Add to cache (same group as tsOp)
optimizerCache.putIfWorkExists(newOp, tsOp);
}
}
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd in project hive by apache.
the class ConstantPropagateProcFactory method foldNegative.
/**
* Combines the logical not() operator with the child operator if possible.
* @param desc the expression to be evaluated
* @return the new expression to be replaced
* @throws UDFArgumentException
*/
private static ExprNodeDesc foldNegative(ExprNodeDesc desc) throws UDFArgumentException {
if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
GenericUDF udf = funcDesc.getGenericUDF();
if (udf instanceof GenericUDFOPNot) {
ExprNodeDesc child = funcDesc.getChildren().get(0);
if (child instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc childDesc = (ExprNodeGenericFuncDesc) child;
GenericUDF childUDF = childDesc.getGenericUDF();
List<ExprNodeDesc> grandChildren = child.getChildren();
if (childUDF instanceof GenericUDFBaseCompare || childUDF instanceof GenericUDFOPNull || childUDF instanceof GenericUDFOPNotNull) {
List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc grandChild : grandChildren) {
newGrandChildren.add(foldNegative(grandChild));
}
return ExprNodeGenericFuncDesc.newInstance(childUDF.negative(), newGrandChildren);
} else if (childUDF instanceof GenericUDFOPAnd || childUDF instanceof GenericUDFOPOr) {
List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc grandChild : grandChildren) {
newGrandChildren.add(foldNegative(ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), Arrays.asList(grandChild))));
}
return ExprNodeGenericFuncDesc.newInstance(childUDF.negative(), newGrandChildren);
} else if (childUDF instanceof GenericUDFOPNot) {
return foldNegative(child.getChildren().get(0));
} else {
// For operator like if() that cannot be handled, leave not() as it
// is and continue processing the children
List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc grandChild : grandChildren) {
newGrandChildren.add(foldNegative(grandChild));
}
childDesc.setChildren(newGrandChildren);
return funcDesc;
}
}
}
}
return desc;
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd in project hive by apache.
the class TestVectorizationContext method testVectorizeAndOrProjectionExpression.
@Test
public void testVectorizeAndOrProjectionExpression() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10));
GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc();
greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
greaterExprDesc.setGenericUDF(udf);
List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
children1.add(col1Expr);
children1.add(constDesc);
greaterExprDesc.setChildren(children1);
ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false);
GenericUDFOPAnd andUdf = new GenericUDFOPAnd();
ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc();
andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
andExprDesc.setGenericUDF(andUdf);
List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
children3.add(greaterExprDesc);
children3.add(col2Expr);
andExprDesc.setChildren(children3);
List<String> columns = new ArrayList<String>();
columns.add("col1");
columns.add("col2");
VectorizationContext vc = new VectorizationContext("name", columns);
VectorExpression veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertEquals(veAnd.getClass(), FilterExprAndExpr.class);
assertEquals(veAnd.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
assertEquals(veAnd.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
veAnd = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertEquals(veAnd.getClass(), ColAndCol.class);
assertEquals(1, veAnd.getChildExpressions().length);
assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
assertEquals(3, ((ColAndCol) veAnd).getOutputColumnNum());
// OR
GenericUDFOPOr orUdf = new GenericUDFOPOr();
ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc();
orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
orExprDesc.setGenericUDF(orUdf);
List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2);
children4.add(greaterExprDesc);
children4.add(col2Expr);
orExprDesc.setChildren(children4);
// Allocate new Vectorization context to reset the intermediate columns.
vc = new VectorizationContext("name", columns);
VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertEquals(veOr.getClass(), FilterExprOrExpr.class);
assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
assertEquals(veOr.getChildExpressions()[1].getClass(), SelectColumnIsTrue.class);
veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.PROJECTION);
assertEquals(veOr.getClass(), ColOrCol.class);
assertEquals(1, veAnd.getChildExpressions().length);
assertEquals(veAnd.getChildExpressions()[0].getClass(), LongColGreaterLongScalar.class);
assertEquals(3, ((ColOrCol) veOr).getOutputColumnNum());
}
use of org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd in project hive by apache.
the class TestOrcSplitElimination method testSplitEliminationComplexExpr.
@Test
public void testSplitEliminationComplexExpr() throws Exception {
ObjectInspector inspector = createIO();
Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.NONE, 10000, 10000);
writeData(writer);
writer.close();
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1000);
HiveConf.setLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, 150000);
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
// predicate expression: userid <= 100 and subtype <= 1000.0
GenericUDF udf = new GenericUDFOPEqualOrLessThan();
List<ExprNodeDesc> childExpr = Lists.newArrayList();
ExprNodeColumnDesc col = new ExprNodeColumnDesc(Long.class, "userid", "T", false);
ExprNodeConstantDesc con = new ExprNodeConstantDesc(100);
childExpr.add(col);
childExpr.add(con);
ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
GenericUDF udf1 = new GenericUDFOPEqualOrLessThan();
List<ExprNodeDesc> childExpr1 = Lists.newArrayList();
ExprNodeColumnDesc col1 = new ExprNodeColumnDesc(Double.class, "subtype", "T", false);
ExprNodeConstantDesc con1 = new ExprNodeConstantDesc(1000.0);
childExpr1.add(col1);
childExpr1.add(con1);
ExprNodeGenericFuncDesc en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
GenericUDF udf2 = new GenericUDFOPAnd();
List<ExprNodeDesc> childExpr2 = Lists.newArrayList();
childExpr2.add(en);
childExpr2.add(en1);
ExprNodeGenericFuncDesc en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
String sargStr = SerializationUtilities.serializeExpression(en2);
conf.set("hive.io.filter.expr.serialized", sargStr);
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(2, splits.length);
con = new ExprNodeConstantDesc(2);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
con1 = new ExprNodeConstantDesc(0.0);
childExpr1.set(1, con1);
en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
childExpr2.set(0, en);
childExpr2.set(1, en1);
en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
sargStr = SerializationUtilities.serializeExpression(en2);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// no stripe will satisfy the predicate
assertEquals(0, splits.length);
con = new ExprNodeConstantDesc(2);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
con1 = new ExprNodeConstantDesc(1.0);
childExpr1.set(1, con1);
en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
childExpr2.set(0, en);
childExpr2.set(1, en1);
en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
sargStr = SerializationUtilities.serializeExpression(en2);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// only first stripe will satisfy condition and hence single split
assertEquals(1, splits.length);
udf = new GenericUDFOPEqual();
con = new ExprNodeConstantDesc(13);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
con1 = new ExprNodeConstantDesc(80.0);
childExpr1.set(1, con1);
en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
childExpr2.set(0, en);
childExpr2.set(1, en1);
en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
sargStr = SerializationUtilities.serializeExpression(en2);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// first two stripes will satisfy condition and hence single split
assertEquals(2, splits.length);
udf = new GenericUDFOPEqual();
con = new ExprNodeConstantDesc(13);
childExpr.set(1, con);
en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
udf1 = new GenericUDFOPEqual();
con1 = new ExprNodeConstantDesc(80.0);
childExpr1.set(1, con1);
en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
childExpr2.set(0, en);
childExpr2.set(1, en1);
en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
sargStr = SerializationUtilities.serializeExpression(en2);
conf.set("hive.io.filter.expr.serialized", sargStr);
splits = in.getSplits(conf, 1);
// only second stripes will satisfy condition and hence single split
assertEquals(1, splits.length);
}
Aggregations