use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class GenSparkUtils method createMapWork.
public MapWork createMapWork(GenSparkProcContext context, Operator<?> root, SparkWork sparkWork, PrunedPartitionList partitions, boolean deferSetup) throws SemanticException {
Preconditions.checkArgument(root.getParentOperators().isEmpty(), "AssertionError: expected root.getParentOperators() to be empty");
MapWork mapWork = new MapWork("Map " + (++sequenceNumber));
LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root);
// map work starts with table scan operators
Preconditions.checkArgument(root instanceof TableScanOperator, "AssertionError: expected root to be an instance of TableScanOperator, but was " + root.getClass().getName());
String alias_id = null;
if (context.parseContext != null && context.parseContext.getTopOps() != null) {
for (String currentAliasID : context.parseContext.getTopOps().keySet()) {
Operator<? extends OperatorDesc> currOp = context.parseContext.getTopOps().get(currentAliasID);
if (currOp == root) {
alias_id = currentAliasID;
break;
}
}
}
if (alias_id == null)
alias_id = ((TableScanOperator) root).getConf().getAlias();
if (!deferSetup) {
setupMapWork(mapWork, context, partitions, (TableScanOperator) root, alias_id);
}
// add new item to the Spark work
sparkWork.add(mapWork);
return mapWork;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class SplitOpTreeForDPP method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
SparkPartitionPruningSinkOperator pruningSinkOp = (SparkPartitionPruningSinkOperator) nd;
GenSparkProcContext context = (GenSparkProcContext) procCtx;
for (Operator<?> op : context.pruningSinkSet) {
if (pruningSinkOp.getOperatorId().equals(op.getOperatorId())) {
return null;
}
}
// separate tree. Add the pruning sink operator to context and return
if (pruningSinkOp.isWithMapjoin()) {
context.pruningSinkSet.add(pruningSinkOp);
return null;
}
List<Operator<?>> roots = new LinkedList<Operator<?>>();
collectRoots(roots, pruningSinkOp);
Operator<?> branchingOp = pruningSinkOp.getBranchingOp();
List<Operator<?>> savedChildOps = branchingOp.getChildOperators();
List<Operator<?>> firstNodesOfPruningBranch = findFirstNodesOfPruningBranch(branchingOp);
branchingOp.setChildOperators(null);
// Now clone the tree above selOp
List<Operator<?>> newRoots = SerializationUtilities.cloneOperatorTree(roots);
for (int i = 0; i < roots.size(); i++) {
TableScanOperator newTs = (TableScanOperator) newRoots.get(i);
TableScanOperator oldTs = (TableScanOperator) roots.get(i);
newTs.getConf().setTableMetadata(oldTs.getConf().getTableMetadata());
}
context.clonedPruningTableScanSet.addAll(newRoots);
Operator newBranchingOp = null;
for (int i = 0; i < newRoots.size() && newBranchingOp == null; i++) {
newBranchingOp = OperatorUtils.findOperatorById(newRoots.get(i), branchingOp.getOperatorId());
}
Preconditions.checkNotNull(newBranchingOp, "Cannot find the branching operator in cloned tree.");
newBranchingOp.setChildOperators(firstNodesOfPruningBranch);
// Restore broken links between operators, and remove the branch from the original tree
branchingOp.setChildOperators(savedChildOps);
for (Operator selOp : firstNodesOfPruningBranch) {
branchingOp.removeChild(selOp);
}
Set<Operator<?>> sinkSet = new LinkedHashSet<>();
for (Operator<?> sel : firstNodesOfPruningBranch) {
SparkUtilities.collectOp(sinkSet, sel, SparkPartitionPruningSinkOperator.class);
sel.setParentOperators(Utilities.makeList(newBranchingOp));
}
context.pruningSinkSet.addAll(sinkSet);
return null;
}
use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.
the class OpProcFactory method createFilter.
protected static Object createFilter(Operator op, Map<String, List<ExprNodeDesc>> predicates, OpWalkerInfo owi) throws SemanticException {
RowSchema inputRS = op.getSchema();
// combine all predicates into a single expression
List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>();
Iterator<List<ExprNodeDesc>> iterator = predicates.values().iterator();
while (iterator.hasNext()) {
for (ExprNodeDesc pred : iterator.next()) {
preds = ExprNodeDescUtils.split(pred, preds);
}
}
if (preds.isEmpty()) {
return null;
}
ExprNodeDesc condn = ExprNodeDescUtils.mergePredicates(preds);
if (op instanceof TableScanOperator && condn instanceof ExprNodeGenericFuncDesc) {
boolean pushFilterToStorage;
HiveConf hiveConf = owi.getParseContext().getConf();
pushFilterToStorage = hiveConf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_STORAGE);
if (pushFilterToStorage) {
condn = pushFilterToStorageHandler((TableScanOperator) op, (ExprNodeGenericFuncDesc) condn, owi, hiveConf);
if (condn == null) {
// we pushed the whole thing down
return null;
}
}
}
// add new filter op
List<Operator<? extends OperatorDesc>> originalChilren = op.getChildOperators();
op.setChildOperators(null);
Operator<FilterDesc> output = OperatorFactory.getAndMakeChild(new FilterDesc(condn, false), new RowSchema(inputRS.getSignature()), op);
output.setChildOperators(originalChilren);
for (Operator<? extends OperatorDesc> ch : originalChilren) {
List<Operator<? extends OperatorDesc>> parentOperators = ch.getParentOperators();
int pos = parentOperators.indexOf(op);
assert pos != -1;
parentOperators.remove(pos);
// add the new op as the old
parentOperators.add(pos, output);
}
if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) {
// remove the candidate filter ops
removeCandidates(op, owi);
}
// push down current ppd context to newly added filter
ExprWalkerInfo walkerInfo = owi.getPrunedPreds(op);
if (walkerInfo != null) {
walkerInfo.getNonFinalCandidates().clear();
owi.putPrunedPreds(output, walkerInfo);
}
return output;
}
Aggregations