use of org.apache.hadoop.hive.ql.exec.LimitOperator in project hive by apache.
the class SemanticAnalyzer method genLimitPlan.
@SuppressWarnings("nls")
private Operator genLimitPlan(String dest, QB qb, Operator input, int offset, int limit) throws SemanticException {
// A map-only job can be optimized - instead of converting it to a
// map-reduce job, we can have another map
// job to do the same to avoid the cost of sorting in the map-reduce phase.
// A better approach would be to
// write into a local file and then have a map-only job.
// Add the limit operator to get the value fields
RowResolver inputRR = opParseCtx.get(input).getRowResolver();
LimitDesc limitDesc = new LimitDesc(offset, limit);
globalLimitCtx.setLastReduceLimitDesc(limitDesc);
Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(inputRR.getColumnInfos()), input), inputRR);
if (LOG.isDebugEnabled()) {
LOG.debug("Created LimitOperator Plan for clause: " + dest + " row schema: " + inputRR.toString());
}
return limitMap;
}
use of org.apache.hadoop.hive.ql.exec.LimitOperator in project hive by apache.
the class GlobalLimitOptimizer method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
Context ctx = pctx.getContext();
Map<String, TableScanOperator> topOps = pctx.getTopOps();
GlobalLimitCtx globalLimitCtx = pctx.getGlobalLimitCtx();
Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
// is used.
if (topOps.size() == 1 && !globalLimitCtx.ifHasTransformOrUDTF() && nameToSplitSample.isEmpty()) {
// Here we recursively check:
// 1. whether there are exact one LIMIT in the query
// 2. whether there is no aggregation, group-by, distinct, sort by,
// distributed by, or table sampling in any of the sub-query.
// The query only qualifies if both conditions are satisfied.
//
// Example qualified queries:
// CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
// INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
// FROM ... LIMIT...
// SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
//
TableScanOperator ts = topOps.values().iterator().next();
Table tab = ts.getConf().getTableMetadata();
if (tab.isNonNative()) {
LOG.info("Not enabling limit optimization on non native table: " + tab.getTableName());
return pctx;
}
// InputFormat.getSplits wont be called if no input path & TS Vertex will have 0 task parallelism
if (tab.getStorageHandler() == null) {
LimitOperator tempGlobalLimit = checkQbpForGlobalLimit(ts);
// query qualify for the optimization
if (tempGlobalLimit != null) {
LimitDesc tempGlobalLimitDesc = tempGlobalLimit.getConf();
Set<FilterOperator> filterOps = OperatorUtils.findOperators(ts, FilterOperator.class);
if (!tab.isPartitioned()) {
if (filterOps.size() == 0) {
Integer tempOffset = tempGlobalLimitDesc.getOffset();
globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
}
} else {
// check if the pruner only contains partition columns
if (onlyContainsPartnCols(tab, filterOps)) {
String alias = (String) topOps.keySet().toArray()[0];
PrunedPartitionList partsList = pctx.getPrunedPartitions(alias, ts);
// the filter to prune correctly
if (!partsList.hasUnknownPartitions()) {
Integer tempOffset = tempGlobalLimitDesc.getOffset();
globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
}
}
}
if (globalLimitCtx.isEnable()) {
LOG.info("Qualify the optimize that reduces input size for 'offset' for offset " + globalLimitCtx.getGlobalOffset());
LOG.info("Qualify the optimize that reduces input size for 'limit' for limit " + globalLimitCtx.getGlobalLimit());
}
}
}
}
return pctx;
}
use of org.apache.hadoop.hive.ql.exec.LimitOperator in project hive by apache.
the class SetSparkReducerParallelism method needSetParallelism.
// tests whether the RS needs automatic setting parallelism
private boolean needSetParallelism(ReduceSinkOperator reduceSink, HiveConf hiveConf) {
ReduceSinkDesc desc = reduceSink.getConf();
if (desc.getNumReducers() <= 0) {
return true;
}
if (desc.getNumReducers() == 1 && desc.hasOrderBy() && hiveConf.getBoolVar(HiveConf.ConfVars.HIVESAMPLINGFORORDERBY) && !desc.isDeduplicated()) {
Stack<Operator<? extends OperatorDesc>> descendants = new Stack<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> children = reduceSink.getChildOperators();
if (children != null) {
for (Operator<? extends OperatorDesc> child : children) {
descendants.push(child);
}
}
while (descendants.size() != 0) {
Operator<? extends OperatorDesc> descendant = descendants.pop();
// If the decendants contains LimitOperator,return false
if (descendant instanceof LimitOperator) {
return false;
}
boolean reachTerminalOperator = (descendant instanceof TerminalOperator);
if (!reachTerminalOperator) {
List<Operator<? extends OperatorDesc>> childrenOfDescendant = descendant.getChildOperators();
if (childrenOfDescendant != null) {
for (Operator<? extends OperatorDesc> childOfDescendant : childrenOfDescendant) {
descendants.push(childOfDescendant);
}
}
}
}
return true;
}
return false;
}
use of org.apache.hadoop.hive.ql.exec.LimitOperator in project hive by apache.
the class ConstantPropagateProcCtx method getPropagatedConstants.
/**
* Get propagated constant map from parents.
*
* Traverse all parents of current operator, if there is propagated constant (determined by
* assignment expression like column=constant value), resolve the column using RowResolver and add
* it to current constant map.
*
* @param op
* operator getting the propagated constants.
* @return map of ColumnInfo to ExprNodeDesc. The values of that map must be either
* ExprNodeConstantDesc or ExprNodeNullDesc.
*/
public Map<ColumnInfo, ExprNodeDesc> getPropagatedConstants(Operator<? extends Serializable> op) {
// this map should map columnInfo to ExprConstantNodeDesc
Map<ColumnInfo, ExprNodeDesc> constants = new HashMap<ColumnInfo, ExprNodeDesc>();
if (op.getSchema() == null) {
return constants;
}
RowSchema rs = op.getSchema();
LOG.debug("Getting constants of op:" + op + " with rs:" + rs);
if (op.getParentOperators() == null) {
return constants;
}
// A previous solution is based on tableAlias and colAlias, which is
// unsafe, esp. when CBO generates derived table names. see HIVE-13602.
// For correctness purpose, we only trust colExpMap.
// We assume that CBO can do the constantPropagation before this function is
// called to help improve the performance.
// UnionOperator, LimitOperator and FilterOperator are special, they should already be
// column-position aligned.
List<Map<Integer, ExprNodeDesc>> parentsToConstant = new ArrayList<>();
boolean areAllParentsContainConstant = true;
boolean noParentsContainConstant = true;
for (Operator<?> parent : op.getParentOperators()) {
Map<ColumnInfo, ExprNodeDesc> constMap = opToConstantExprs.get(parent);
if (constMap == null) {
LOG.debug("Constant of Op " + parent.getOperatorId() + " is not found");
areAllParentsContainConstant = false;
} else {
noParentsContainConstant = false;
Map<Integer, ExprNodeDesc> map = new HashMap<>();
for (Entry<ColumnInfo, ExprNodeDesc> entry : constMap.entrySet()) {
map.put(parent.getSchema().getPosition(entry.getKey().getInternalName()), entry.getValue());
}
parentsToConstant.add(map);
LOG.debug("Constant of Op " + parent.getOperatorId() + " " + constMap);
}
}
if (noParentsContainConstant) {
return constants;
}
List<ColumnInfo> signature = op.getSchema().getSignature();
if (op instanceof LimitOperator || op instanceof FilterOperator) {
// there should be only one parent.
if (op.getParentOperators().size() == 1) {
Map<Integer, ExprNodeDesc> parentToConstant = parentsToConstant.get(0);
for (int index = 0; index < signature.size(); index++) {
if (parentToConstant.containsKey(index)) {
constants.put(signature.get(index), parentToConstant.get(index));
}
}
}
} else if (op instanceof UnionOperator && areAllParentsContainConstant) {
for (int index = 0; index < signature.size(); index++) {
ExprNodeDesc constant = null;
for (Map<Integer, ExprNodeDesc> parentToConstant : parentsToConstant) {
if (!parentToConstant.containsKey(index)) {
// if this parent does not contain a constant at this position, we
// continue to look at other positions.
constant = null;
break;
} else {
if (constant == null) {
constant = parentToConstant.get(index);
} else {
// compare if they are the same constant.
ExprNodeDesc nextConstant = parentToConstant.get(index);
if (!nextConstant.isSame(constant)) {
// they are not the same constant. for example, union all of 1
// and 2.
constant = null;
break;
}
}
}
}
// we have checked all the parents for the "index" position.
if (constant != null) {
constants.put(signature.get(index), constant);
}
}
} else if (op instanceof JoinOperator) {
JoinOperator joinOp = (JoinOperator) op;
Iterator<Entry<Byte, List<ExprNodeDesc>>> itr = joinOp.getConf().getExprs().entrySet().iterator();
while (itr.hasNext()) {
Entry<Byte, List<ExprNodeDesc>> e = itr.next();
int tag = e.getKey();
Operator<?> parent = op.getParentOperators().get(tag);
List<ExprNodeDesc> exprs = e.getValue();
if (exprs == null) {
continue;
}
for (ExprNodeDesc expr : exprs) {
// we are only interested in ExprNodeColumnDesc
if (expr instanceof ExprNodeColumnDesc) {
String parentColName = ((ExprNodeColumnDesc) expr).getColumn();
// find this parentColName in its parent's rs
int parentPos = parent.getSchema().getPosition(parentColName);
if (parentsToConstant.get(tag).containsKey(parentPos)) {
// reverse look up colExprMap to find the childColName
if (op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) {
for (Entry<String, ExprNodeDesc> entry : op.getColumnExprMap().entrySet()) {
if (entry.getValue().isSame(expr)) {
// now propagate the constant from the parent to the child
constants.put(signature.get(op.getSchema().getPosition(entry.getKey())), parentsToConstant.get(tag).get(parentPos));
}
}
}
}
}
}
}
} else {
// there should be only one parent.
if (op.getParentOperators().size() == 1) {
Operator<?> parent = op.getParentOperators().get(0);
if (op.getColumnExprMap() != null && op.getColumnExprMap().entrySet() != null) {
for (Entry<String, ExprNodeDesc> entry : op.getColumnExprMap().entrySet()) {
if (op.getSchema().getPosition(entry.getKey()) == -1) {
// Not present
continue;
}
ExprNodeDesc expr = entry.getValue();
if (expr instanceof ExprNodeColumnDesc) {
String parentColName = ((ExprNodeColumnDesc) expr).getColumn();
// find this parentColName in its parent's rs
int parentPos = parent.getSchema().getPosition(parentColName);
if (parentsToConstant.get(0).containsKey(parentPos)) {
// this position in parent is a constant
// now propagate the constant from the parent to the child
constants.put(signature.get(op.getSchema().getPosition(entry.getKey())), parentsToConstant.get(0).get(parentPos));
}
}
}
}
}
}
LOG.debug("Offering constants " + constants.keySet() + " to operator " + op.toString());
return constants;
}
use of org.apache.hadoop.hive.ql.exec.LimitOperator in project hive by apache.
the class GlobalLimitOptimizer method checkQbpForGlobalLimit.
/**
* Check the limit number in all sub queries
*
* @return if there is one and only one limit for all subqueries, return the limit
* if there is no limit, return 0
* otherwise, return null
*/
private static LimitOperator checkQbpForGlobalLimit(TableScanOperator ts) {
Set<Class<? extends Operator<?>>> searchedClasses = new ImmutableSet.Builder<Class<? extends Operator<?>>>().add(ReduceSinkOperator.class).add(GroupByOperator.class).add(FilterOperator.class).add(LimitOperator.class).build();
Multimap<Class<? extends Operator<?>>, Operator<?>> ops = OperatorUtils.classifyOperators(ts, searchedClasses);
// existsOrdering AND existsPartitioning should be false.
for (Operator<?> op : ops.get(ReduceSinkOperator.class)) {
ReduceSinkDesc reduceSinkConf = ((ReduceSinkOperator) op).getConf();
if (reduceSinkConf.isOrdering() || reduceSinkConf.isPartitioning()) {
return null;
}
}
// - There cannot exist any (distinct) aggregate.
for (Operator<?> op : ops.get(GroupByOperator.class)) {
GroupByDesc groupByConf = ((GroupByOperator) op).getConf();
if (groupByConf.isAggregate() || groupByConf.isDistinct()) {
return null;
}
}
// - There cannot exist any sampling predicate.
for (Operator<?> op : ops.get(FilterOperator.class)) {
FilterDesc filterConf = ((FilterOperator) op).getConf();
if (filterConf.getIsSamplingPred()) {
return null;
}
}
// If there is one and only one limit starting at op, return the limit
// If there is no limit, return 0
// Otherwise, return null
Collection<Operator<?>> limitOps = ops.get(LimitOperator.class);
if (limitOps.size() == 1) {
return (LimitOperator) limitOps.iterator().next();
} else if (limitOps.size() == 0) {
return null;
}
return null;
}
Aggregations