use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class SyntheticJoinPredicate method createFilter.
// insert filter operator between target(child) and input(parent)
private static Operator<FilterDesc> createFilter(Operator<?> target, Operator<?> parent, RowSchema parentRS, ExprNodeDesc filterExpr) {
FilterDesc filterDesc = new FilterDesc(filterExpr, false);
filterDesc.setSyntheticJoinPredicate(true);
Operator<FilterDesc> filter = OperatorFactory.get(parent.getCompilationOpContext(), filterDesc, new RowSchema(parentRS.getSignature()));
filter.getParentOperators().add(parent);
filter.getChildOperators().add(target);
parent.replaceChild(target, filter);
target.replaceParent(parent, filter);
return filter;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class PredicateTransitivePropagate method createFilter.
// insert filter operator between target(child) and input(parent)
private Operator<FilterDesc> createFilter(Operator<?> target, Operator<?> parent, RowSchema parentRS, ExprNodeDesc filterExpr) {
Operator<FilterDesc> filter = OperatorFactory.get(parent.getCompilationOpContext(), new FilterDesc(filterExpr, false), new RowSchema(parentRS.getSignature()));
filter.getParentOperators().add(parent);
filter.getChildOperators().add(target);
parent.replaceChild(target, filter);
target.replaceParent(parent, filter);
return filter;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class PredicateTransitivePropagate method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
pGraphContext = pctx;
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", "(" + FilterOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + JoinOperator.getOperatorName() + "%)"), new JoinTransitive());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
TransitiveContext context = new TransitiveContext();
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, context);
GraphWalker ogw = new LevelOrderWalker(disp, 2);
// Create a list of topop nodes
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pGraphContext.getTopOps().values());
ogw.startWalking(topNodes, null);
Map<ReduceSinkOperator, List<ExprNodeDesc>> newFilters = context.getNewfilters();
// insert new filter between RS and parent of RS
for (Map.Entry<ReduceSinkOperator, List<ExprNodeDesc>> entry : newFilters.entrySet()) {
ReduceSinkOperator reducer = entry.getKey();
Operator<?> parent = reducer.getParentOperators().get(0);
List<ExprNodeDesc> exprs = entry.getValue();
if (parent instanceof FilterOperator) {
exprs = ExprNodeDescUtils.split(((FilterOperator) parent).getConf().getPredicate(), exprs);
ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
((FilterOperator) parent).getConf().setPredicate(merged);
} else {
ExprNodeDesc merged = ExprNodeDescUtils.mergePredicates(exprs);
RowSchema parentRS = parent.getSchema();
Operator<FilterDesc> newFilter = createFilter(reducer, parent, parentRS, merged);
}
}
return pGraphContext;
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class ProjectionPusher method pushProjectionsAndFilters.
private void pushProjectionsAndFilters(final JobConf jobConf, final String splitPath, final String splitPathWithNoSchema) {
if (mapWork == null) {
return;
} else if (mapWork.getPathToAliases() == null) {
return;
}
final Set<String> aliases = new HashSet<String>();
final Iterator<Entry<Path, ArrayList<String>>> iterator = mapWork.getPathToAliases().entrySet().iterator();
while (iterator.hasNext()) {
final Entry<Path, ArrayList<String>> entry = iterator.next();
final String key = entry.getKey().toUri().getPath();
if (splitPath.equals(key) || splitPathWithNoSchema.equals(key)) {
aliases.addAll(entry.getValue());
}
}
// Collect the needed columns from all the aliases and create ORed filter
// expression for the table.
boolean allColumnsNeeded = false;
boolean noFilters = false;
Set<Integer> neededColumnIDs = new HashSet<Integer>();
// To support nested column pruning, we need to track the path from the top to the nested
// fields
Set<String> neededNestedColumnPaths = new HashSet<String>();
List<ExprNodeGenericFuncDesc> filterExprs = new ArrayList<ExprNodeGenericFuncDesc>();
RowSchema rowSchema = null;
for (String alias : aliases) {
final Operator<? extends Serializable> op = mapWork.getAliasToWork().get(alias);
if (op != null && op instanceof TableScanOperator) {
final TableScanOperator ts = (TableScanOperator) op;
if (ts.getNeededColumnIDs() == null) {
allColumnsNeeded = true;
} else {
neededColumnIDs.addAll(ts.getNeededColumnIDs());
neededNestedColumnPaths.addAll(ts.getNeededNestedColumnPaths());
}
rowSchema = ts.getSchema();
ExprNodeGenericFuncDesc filterExpr = ts.getConf() == null ? null : ts.getConf().getFilterExpr();
// No filter if any TS has no filter expression
noFilters = filterExpr == null;
filterExprs.add(filterExpr);
}
}
ExprNodeGenericFuncDesc tableFilterExpr = null;
if (!noFilters) {
try {
for (ExprNodeGenericFuncDesc filterExpr : filterExprs) {
if (tableFilterExpr == null) {
tableFilterExpr = filterExpr;
} else {
tableFilterExpr = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPOr(), Arrays.<ExprNodeDesc>asList(tableFilterExpr, filterExpr));
}
}
} catch (UDFArgumentException ex) {
LOG.debug("Turn off filtering due to " + ex);
tableFilterExpr = null;
}
}
// push down projections
if (!allColumnsNeeded) {
if (!neededColumnIDs.isEmpty()) {
ColumnProjectionUtils.appendReadColumns(jobConf, new ArrayList<Integer>(neededColumnIDs));
ColumnProjectionUtils.appendNestedColumnPaths(jobConf, new ArrayList<String>(neededNestedColumnPaths));
}
} else {
ColumnProjectionUtils.setReadAllColumns(jobConf);
}
pushFilters(jobConf, rowSchema, tableFilterExpr);
}
use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.
the class MapJoinProcessor method genSelectPlan.
protected void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
input.setChildOperators(null);
// create a dummy select - This select is needed by the walker to split the
// mapJoin later on
RowSchema inputRS = input.getSchema();
ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputs = new ArrayList<String>();
List<String> outputCols = input.getConf().getOutputColumnNames();
ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < outputCols.size(); i++) {
String internalName = outputCols.get(i);
ColumnInfo valueInfo = inputRS.getColumnInfo(internalName);
ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
exprs.add(colDesc);
outputs.add(internalName);
ColumnInfo newCol = new ColumnInfo(internalName, valueInfo.getType(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol());
newCol.setAlias(valueInfo.getAlias());
outputRS.add(newCol);
colExprMap.put(internalName, colDesc);
}
SelectDesc select = new SelectDesc(exprs, outputs, false);
SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), input);
sel.setColumnExprMap(colExprMap);
// Insert the select operator in between.
sel.setChildOperators(childOps);
for (Operator<? extends OperatorDesc> ch : childOps) {
ch.replaceParent(input, sel);
}
}
Aggregations