use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class OpProcFactory method createFilter.
protected static Object createFilter(Operator op, Map<String, List<ExprNodeDesc>> predicates, OpWalkerInfo owi) {
RowSchema inputRS = op.getSchema();
// combine all predicates into a single expression
List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>();
Iterator<List<ExprNodeDesc>> iterator = predicates.values().iterator();
while (iterator.hasNext()) {
for (ExprNodeDesc pred : iterator.next()) {
preds = ExprNodeDescUtils.split(pred, preds);
}
}
if (preds.isEmpty()) {
return null;
}
ExprNodeDesc condn = ExprNodeDescUtils.mergePredicates(preds);
if (op instanceof TableScanOperator && condn instanceof ExprNodeGenericFuncDesc) {
boolean pushFilterToStorage;
HiveConf hiveConf = owi.getParseContext().getConf();
pushFilterToStorage = hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_STORAGE);
if (pushFilterToStorage) {
condn = pushFilterToStorageHandler((TableScanOperator) op, (ExprNodeGenericFuncDesc) condn, owi, hiveConf);
if (condn == null) {
// we pushed the whole thing down
return null;
}
}
}
// add new filter op
List<Operator<? extends OperatorDesc>> originalChilren = op.getChildOperators();
op.setChildOperators(null);
Operator<FilterDesc> output = OperatorFactory.getAndMakeChild(new FilterDesc(condn, false), new RowSchema(inputRS.getSignature()), op);
output.setChildOperators(originalChilren);
for (Operator<? extends OperatorDesc> ch : originalChilren) {
List<Operator<? extends OperatorDesc>> parentOperators = ch.getParentOperators();
int pos = parentOperators.indexOf(op);
assert pos != -1;
parentOperators.remove(pos);
// add the new op as the old
parentOperators.add(pos, output);
}
if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
// remove the candidate filter ops
removeCandidates(op, owi);
}
// push down current ppd context to newly added filter
ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op);
if (walkerInfo != null) {
walkerInfo.getNonFinalCandidates().clear();
owi.putPrunedPreds(output, walkerInfo);
}
return output;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SparkCompiler method getComponents.
// Tarjan's algo
private Set<Set<Operator<?>>> getComponents(OptimizeSparkProcContext procCtx) {
AtomicInteger index = new AtomicInteger();
Map<Operator<?>, Integer> indexes = new HashMap<Operator<?>, Integer>();
Map<Operator<?>, Integer> lowLinks = new HashMap<Operator<?>, Integer>();
Stack<Operator<?>> nodes = new Stack<Operator<?>>();
Set<Set<Operator<?>>> components = new HashSet<Set<Operator<?>>>();
for (Operator<?> o : procCtx.getParseContext().getTopOps().values()) {
if (!indexes.containsKey(o)) {
connect(o, index, nodes, indexes, lowLinks, components);
}
}
return components;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SparkCompiler method setInputFormat.
@Override
protected void setInputFormat(Task<? extends Serializable> task) {
if (task instanceof SparkTask) {
SparkWork work = ((SparkTask) task).getWork();
List<BaseWork> all = work.getAllWork();
for (BaseWork w : all) {
if (w instanceof MapWork) {
MapWork mapWork = (MapWork) w;
HashMap<String, Operator<? extends OperatorDesc>> opMap = mapWork.getAliasToWork();
if (!opMap.isEmpty()) {
for (Operator<? extends OperatorDesc> op : opMap.values()) {
setInputFormat(mapWork, op);
}
}
}
}
} else if (task instanceof ConditionalTask) {
List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
for (Task<? extends Serializable> tsk : listTasks) {
setInputFormat(tsk);
}
}
if (task.getChildTasks() != null) {
for (Task<? extends Serializable> childTask : task.getChildTasks()) {
setInputFormat(childTask);
}
}
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class SplitOpTreeForDPP method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
SparkPartitionPruningSinkOperator pruningSinkOp = (SparkPartitionPruningSinkOperator) nd;
GenSparkProcContext context = (GenSparkProcContext) procCtx;
// Locate the op where the branch starts
// This is guaranteed to succeed since the branch always follow the pattern
// as shown in the first picture above.
Operator<?> filterOp = pruningSinkOp;
Operator<?> selOp = null;
while (filterOp != null) {
if (filterOp.getNumChild() > 1) {
break;
} else {
selOp = filterOp;
filterOp = filterOp.getParentOperators().get(0);
}
}
// Check if this is a MapJoin. If so, do not split.
for (Operator<?> childOp : filterOp.getChildOperators()) {
if (childOp instanceof ReduceSinkOperator && childOp.getChildOperators().get(0) instanceof MapJoinOperator) {
context.pruningSinkSet.add(pruningSinkOp);
return null;
}
}
List<Operator<?>> roots = new LinkedList<Operator<?>>();
collectRoots(roots, pruningSinkOp);
List<Operator<?>> savedChildOps = filterOp.getChildOperators();
filterOp.setChildOperators(Utilities.makeList(selOp));
// Now clone the tree above selOp
List<Operator<?>> newRoots = SerializationUtilities.cloneOperatorTree(roots);
for (int i = 0; i < roots.size(); i++) {
TableScanOperator newTs = (TableScanOperator) newRoots.get(i);
TableScanOperator oldTs = (TableScanOperator) roots.get(i);
newTs.getConf().setTableMetadata(oldTs.getConf().getTableMetadata());
}
context.clonedPruningTableScanSet.addAll(newRoots);
// Restore broken links between operators, and remove the branch from the original tree
filterOp.setChildOperators(savedChildOps);
filterOp.removeChild(selOp);
// Find the cloned PruningSink and add it to pruningSinkSet
Set<Operator<?>> sinkSet = new HashSet<Operator<?>>();
for (Operator<?> root : newRoots) {
SparkUtilities.collectOp(sinkSet, root, SparkPartitionPruningSinkOperator.class);
}
Preconditions.checkArgument(sinkSet.size() == 1, "AssertionError: expected to only contain one SparkPartitionPruningSinkOperator," + " but found " + sinkSet.size());
SparkPartitionPruningSinkOperator clonedPruningSinkOp = (SparkPartitionPruningSinkOperator) sinkSet.iterator().next();
clonedPruningSinkOp.getConf().setTableScan(pruningSinkOp.getConf().getTableScan());
context.pruningSinkSet.add(clonedPruningSinkOp);
return null;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class GenSparkUtils method getEnclosingWork.
/**
* getEncosingWork finds the BaseWork any given operator belongs to.
*/
public BaseWork getEnclosingWork(Operator<?> op, GenSparkProcContext procCtx) {
List<Operator<?>> ops = new ArrayList<Operator<?>>();
OperatorUtils.findRoots(op, ops);
for (Operator<?> r : ops) {
BaseWork work = procCtx.rootToWorkMap.get(r);
if (work != null) {
return work;
}
}
return null;
}
Aggregations