use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.
the class TestExecDriver method populateMapRedPlan6.
@SuppressWarnings("unchecked")
private void populateMapRedPlan6(Table src) throws Exception {
// map-side work
ArrayList<String> outputColumns = new ArrayList<String>();
for (int i = 0; i < 2; i++) {
outputColumns.add("_col" + i);
}
Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("tkey")), Utilities.makeList(getStringColumn("tkey"), getStringColumn("tvalue")), outputColumns, false, -1, 1, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
Operator<ScriptDesc> op0 = OperatorFactory.get(new ScriptDesc("\'cat\'", PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "tkey,tvalue"), TextRecordWriter.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "tkey,tvalue"), TextRecordReader.class, TextRecordReader.class, PlanUtils.getDefaultTableDesc("" + Utilities.tabCode, "key")), op1);
Operator<SelectDesc> op4 = OperatorFactory.get(new SelectDesc(Utilities.makeList(getStringColumn("key"), getStringColumn("value")), outputColumns), op0);
addMapWork(mr, src, "a", op4);
ReduceWork rWork = new ReduceWork();
mr.setReduceWork(rWork);
rWork.setNumReduceTasks(Integer.valueOf(1));
rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
// reduce side work
Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapredplan6.out"), Utilities.defaultTd, false));
Operator<FilterDesc> op2 = OperatorFactory.get(getTestFilterDesc("0"), op3);
List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op2);
rWork.setReducer(op5);
}
use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.
the class TestExecDriver method getTestFilterDesc.
private FilterDesc getTestFilterDesc(String column) throws Exception {
ArrayList<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
children1.add(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, column, "", false));
ExprNodeDesc lhs = new ExprNodeGenericFuncDesc(TypeInfoFactory.doubleTypeInfo, FunctionRegistry.getFunctionInfo(serdeConstants.DOUBLE_TYPE_NAME).getGenericUDF(), children1);
ArrayList<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>();
children2.add(new ExprNodeConstantDesc(TypeInfoFactory.longTypeInfo, Long.valueOf(100)));
ExprNodeDesc rhs = new ExprNodeGenericFuncDesc(TypeInfoFactory.doubleTypeInfo, FunctionRegistry.getFunctionInfo(serdeConstants.DOUBLE_TYPE_NAME).getGenericUDF(), children2);
ArrayList<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>();
children3.add(lhs);
children3.add(rhs);
ExprNodeDesc desc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getFunctionInfo("<").getGenericUDF(), children3);
return new FilterDesc(desc, false);
}
use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.
the class TestExecDriver method populateMapPlan1.
@SuppressWarnings("unchecked")
private void populateMapPlan1(Table src) throws Exception {
Operator<FileSinkDesc> op2 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapplan1.out"), Utilities.defaultTd, true));
Operator<FilterDesc> op1 = OperatorFactory.get(getTestFilterDesc("key"), op2);
addMapWork(mr, src, "a", op1);
}
use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.
the class SharedWorkOptimizer method extractSharedOptimizationInfoForRoot.
private static SharedResult extractSharedOptimizationInfoForRoot(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, TableScanOperator retainableTsOp, TableScanOperator discardableTsOp, boolean mayRemoveDownStreamOperators, boolean mayRemoveInputOps) throws SemanticException {
LinkedHashSet<Operator<?>> retainableOps = new LinkedHashSet<>();
LinkedHashSet<Operator<?>> discardableOps = new LinkedHashSet<>();
Set<Operator<?>> discardableInputOps = new HashSet<>();
long dataSize = 0L;
long maxDataSize = 0L;
retainableOps.add(retainableTsOp);
discardableOps.add(discardableTsOp);
Operator<?> equalOp1 = retainableTsOp;
Operator<?> equalOp2 = discardableTsOp;
if (equalOp1.getNumChild() > 1 || equalOp2.getNumChild() > 1) {
// TODO: Support checking multiple child operators to merge further.
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
}
if (retainableTsOp.getChildOperators().size() == 0 || discardableTsOp.getChildOperators().size() == 0) {
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
}
Operator<?> currentOp1 = retainableTsOp.getChildOperators().get(0);
Operator<?> currentOp2 = discardableTsOp.getChildOperators().get(0);
// Special treatment for Filter operator that ignores the DPP predicates
if (mayRemoveDownStreamOperators && currentOp1 instanceof FilterOperator && currentOp2 instanceof FilterOperator) {
boolean equalFilters = false;
FilterDesc op1Conf = ((FilterOperator) currentOp1).getConf();
FilterDesc op2Conf = ((FilterOperator) currentOp2).getConf();
if (op1Conf.getIsSamplingPred() == op2Conf.getIsSamplingPred() && StringUtils.equals(op1Conf.getSampleDescExpr(), op2Conf.getSampleDescExpr())) {
Multiset<String> conjsOp1String = extractConjsIgnoringDPPPreds(op1Conf.getPredicate());
Multiset<String> conjsOp2String = extractConjsIgnoringDPPPreds(op2Conf.getPredicate());
if (conjsOp1String.equals(conjsOp2String)) {
equalFilters = true;
}
}
if (equalFilters) {
equalOp1 = currentOp1;
equalOp2 = currentOp2;
retainableOps.add(equalOp1);
discardableOps.add(equalOp2);
if (currentOp1.getChildOperators().size() > 1 || currentOp2.getChildOperators().size() > 1) {
// TODO: Support checking multiple child operators to merge further.
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
}
currentOp1 = currentOp1.getChildOperators().get(0);
currentOp2 = currentOp2.getChildOperators().get(0);
} else {
// Bail out
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps, discardableInputOps));
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
}
}
return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2, currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, mayRemoveDownStreamOperators, mayRemoveInputOps);
}
use of org.apache.hadoop.hive.ql.plan.FilterDesc in project hive by apache.
the class SharedWorkOptimizer method pushFilterToTopOfTableScan.
private static void pushFilterToTopOfTableScan(SharedWorkOptimizerCache optimizerCache, DecomposedTs tsModel) throws UDFArgumentException {
TableScanOperator tsOp = tsModel.ts;
ExprNodeGenericFuncDesc tableScanExprNode = (ExprNodeGenericFuncDesc) tsModel.getFullFilterExpr();
if (tableScanExprNode == null) {
return;
}
List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(tsOp.getChildOperators());
childOperators: for (Operator<? extends OperatorDesc> op : allChildren) {
if (optimizerCache.isKnownFilteringOperator(op)) {
continue;
}
if (op instanceof FilterOperator) {
FilterOperator filterOp = (FilterOperator) op;
ExprNodeDesc filterExprNode = filterOp.getConf().getPredicate();
if (tableScanExprNode.isSame(filterExprNode)) {
// We do not need to do anything
optimizerCache.setKnownFilteringOperator(filterOp);
continue;
}
if (tableScanExprNode.getGenericUDF() instanceof GenericUDFOPOr) {
for (ExprNodeDesc childExprNode : tableScanExprNode.getChildren()) {
if (childExprNode.isSame(filterExprNode)) {
// We do not need to do anything, it is in the OR expression
// so probably we pushed previously
optimizerCache.setKnownFilteringOperator(filterOp);
continue childOperators;
}
}
}
ExprNodeDesc newFilterExpr = conjunction(filterExprNode, tableScanExprNode);
if (!isSame(filterOp.getConf().getPredicate(), newFilterExpr)) {
filterOp.getConf().setPredicate(newFilterExpr);
}
optimizerCache.setKnownFilteringOperator(filterOp);
} else {
Operator<FilterDesc> newOp = OperatorFactory.get(tsOp.getCompilationOpContext(), new FilterDesc(tableScanExprNode.clone(), false), new RowSchema(tsOp.getSchema().getSignature()));
tsOp.replaceChild(op, newOp);
newOp.getParentOperators().add(tsOp);
op.replaceParent(tsOp, newOp);
newOp.getChildOperators().add(op);
// Add to cache (same group as tsOp)
optimizerCache.putIfWorkExists(newOp, tsOp);
optimizerCache.setKnownFilteringOperator(newOp);
}
}
}
Aggregations