use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.
the class DynamicPartitionPruningOptimization method createFinalRsForSemiJoinOp.
private void createFinalRsForSemiJoinOp(ParseContext parseContext, TableScanOperator ts, GroupByOperator gb, ExprNodeDesc key, String keyBaseAlias, ExprNodeDesc colExpr, boolean isHint) throws SemanticException {
ArrayList<String> gbOutputNames = new ArrayList<>();
// One each for min, max and bloom filter
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(0));
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(1));
gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(2));
int colPos = 0;
ArrayList<ExprNodeDesc> rsValueCols = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < gbOutputNames.size() - 1; i++) {
ExprNodeColumnDesc expr = new ExprNodeColumnDesc(key.getTypeInfo(), gbOutputNames.get(colPos++), "", false);
rsValueCols.add(expr);
}
// Bloom Filter uses binary
ExprNodeColumnDesc colBFExpr = new ExprNodeColumnDesc(TypeInfoFactory.binaryTypeInfo, gbOutputNames.get(colPos++), "", false);
rsValueCols.add(colBFExpr);
// Create the final Reduce Sink Operator
ReduceSinkDesc rsDescFinal = PlanUtils.getReduceSinkDesc(new ArrayList<ExprNodeDesc>(), rsValueCols, gbOutputNames, false, -1, 0, 1, Operation.NOT_ACID, NullOrdering.defaultNullOrder(parseContext.getConf()));
ReduceSinkOperator rsOpFinal = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDescFinal, new RowSchema(gb.getSchema()), gb);
Map<String, ExprNodeDesc> columnExprMap = new HashMap<>();
rsOpFinal.setColumnExprMap(columnExprMap);
LOG.debug("DynamicSemiJoinPushdown: Saving RS to TS mapping: " + rsOpFinal + ": " + ts);
SemiJoinBranchInfo sjInfo = new SemiJoinBranchInfo(ts, isHint);
parseContext.getRsToSemiJoinBranchInfo().put(rsOpFinal, sjInfo);
// Save the info that is required at query time to resolve dynamic/runtime values.
RuntimeValuesInfo runtimeValuesInfo = new RuntimeValuesInfo();
TableDesc rsFinalTableDesc = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(rsValueCols, "_col"));
List<String> dynamicValueIDs = new ArrayList<String>();
dynamicValueIDs.add(keyBaseAlias + "_min");
dynamicValueIDs.add(keyBaseAlias + "_max");
dynamicValueIDs.add(keyBaseAlias + "_bloom_filter");
runtimeValuesInfo.setTableDesc(rsFinalTableDesc);
runtimeValuesInfo.setDynamicValueIDs(dynamicValueIDs);
runtimeValuesInfo.setColExprs(rsValueCols);
runtimeValuesInfo.setTargetColumns(Collections.singletonList(colExpr));
parseContext.getRsToRuntimeValuesInfoMap().put(rsOpFinal, runtimeValuesInfo);
parseContext.getColExprToGBMap().put(key, gb);
}
use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.
the class ColumnPrunerProcFactory method pruneReduceSinkOperator.
private static void pruneReduceSinkOperator(boolean[] retainFlags, ReduceSinkOperator reduce, ColumnPrunerProcCtx cppCtx) throws SemanticException {
ReduceSinkDesc reduceConf = reduce.getConf();
Map<String, ExprNodeDesc> oldMap = reduce.getColumnExprMap();
LOG.info("RS " + reduce.getIdentifier() + " oldColExprMap: " + oldMap);
RowSchema oldRS = reduce.getSchema();
List<ColumnInfo> old_signature = oldRS.getSignature();
List<ColumnInfo> signature = new ArrayList<ColumnInfo>(old_signature);
List<String> valueColNames = reduceConf.getOutputValueColumnNames();
ArrayList<String> newValueColNames = new ArrayList<String>();
List<ExprNodeDesc> keyExprs = reduceConf.getKeyCols();
List<ExprNodeDesc> valueExprs = reduceConf.getValueCols();
ArrayList<ExprNodeDesc> newValueExprs = new ArrayList<ExprNodeDesc>();
for (int i = 0; i < retainFlags.length; i++) {
String outputCol = valueColNames.get(i);
ExprNodeDesc outputColExpr = valueExprs.get(i);
if (!retainFlags[i]) {
ColumnInfo colInfo = oldRS.getColumnInfo(outputCol);
if (colInfo == null) {
outputCol = Utilities.ReduceField.VALUE.toString() + "." + outputCol;
colInfo = oldRS.getColumnInfo(outputCol);
}
// do row resolve once more because the ColumnInfo in row resolver is already removed
if (colInfo == null) {
continue;
}
// i.e. this column is not appearing in keyExprs of the RS
if (ExprNodeDescUtils.indexOf(outputColExpr, keyExprs) == -1) {
oldMap.remove(outputCol);
signature.remove(colInfo);
}
} else {
newValueColNames.add(outputCol);
newValueExprs.add(outputColExpr);
}
}
oldRS.setSignature(signature);
reduce.getSchema().setSignature(signature);
reduceConf.setOutputValueColumnNames(newValueColNames);
reduceConf.setValueCols(newValueExprs);
TableDesc newValueTable = PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(reduceConf.getValueCols(), newValueColNames, 0, ""));
reduceConf.setValueSerializeInfo(newValueTable);
LOG.info("RS " + reduce.getIdentifier() + " newColExprMap: " + oldMap);
}
use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.
the class GlobalLimitOptimizer method checkQbpForGlobalLimit.
/**
* Check the limit number in all sub queries
*
* @return if there is one and only one limit for all subqueries, return the limit
* if there is no limit, return 0
* otherwise, return null
*/
private static LimitOperator checkQbpForGlobalLimit(TableScanOperator ts) {
Set<Class<? extends Operator<?>>> searchedClasses = new ImmutableSet.Builder<Class<? extends Operator<?>>>().add(ReduceSinkOperator.class).add(GroupByOperator.class).add(FilterOperator.class).add(LimitOperator.class).build();
Multimap<Class<? extends Operator<?>>, Operator<?>> ops = OperatorUtils.classifyOperators(ts, searchedClasses);
// existsOrdering AND existsPartitioning should be false.
for (Operator<?> op : ops.get(ReduceSinkOperator.class)) {
ReduceSinkDesc reduceSinkConf = ((ReduceSinkOperator) op).getConf();
if (reduceSinkConf.isOrdering() || reduceSinkConf.isPartitioning()) {
return null;
}
}
// - There cannot exist any (distinct) aggregate.
for (Operator<?> op : ops.get(GroupByOperator.class)) {
GroupByDesc groupByConf = ((GroupByOperator) op).getConf();
if (groupByConf.isAggregate() || groupByConf.isDistinct()) {
return null;
}
}
// - There cannot exist any sampling predicate.
for (Operator<?> op : ops.get(FilterOperator.class)) {
FilterDesc filterConf = ((FilterOperator) op).getConf();
if (filterConf.getIsSamplingPred()) {
return null;
}
}
// If there is one and only one limit starting at op, return the limit
// If there is no limit, return 0
// Otherwise, return null
Collection<Operator<?>> limitOps = ops.get(LimitOperator.class);
if (limitOps.size() == 1) {
return (LimitOperator) limitOps.iterator().next();
} else if (limitOps.size() == 0) {
return null;
}
return null;
}
use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.
the class AbstractSMBJoinProc method canConvertJoinToBucketMapJoin.
// Can the join operator be converted to a bucket map-merge join operator ?
@SuppressWarnings("unchecked")
protected boolean canConvertJoinToBucketMapJoin(JoinOperator joinOp, SortBucketJoinProcCtx context) throws SemanticException {
// This has already been inspected and rejected
if (context.getRejectedJoinOps().contains(joinOp)) {
return false;
}
if (!this.pGraphContext.getJoinOps().contains(joinOp)) {
return false;
}
Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
try {
String selector = HiveConf.getVar(pGraphContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
bigTableMatcherClass = JavaUtils.loadClass(selector);
} catch (ClassNotFoundException e) {
throw new SemanticException(e.getMessage());
}
BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null);
JoinDesc joinDesc = joinOp.getConf();
JoinCondDesc[] joinCondns = joinDesc.getConds();
Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
if (joinCandidates.isEmpty()) {
// of any type. So return false.
return false;
}
int bigTablePosition = bigTableMatcher.getBigTablePosition(pGraphContext, joinOp, joinCandidates);
if (bigTablePosition < 0) {
// contains aliases from sub-query
return false;
}
context.setBigTablePosition(bigTablePosition);
String joinAlias = bigTablePosition == 0 ? joinOp.getConf().getLeftAlias() : joinOp.getConf().getRightAliases()[bigTablePosition - 1];
joinAlias = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinAlias);
Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<Byte, List<ExprNodeDesc>>();
List<Operator<? extends OperatorDesc>> parentOps = joinOp.getParentOperators();
// get the join keys from parent ReduceSink operators
for (Operator<? extends OperatorDesc> parentOp : parentOps) {
ReduceSinkDesc rsconf = ((ReduceSinkOperator) parentOp).getConf();
Byte tag = (byte) rsconf.getTag();
List<ExprNodeDesc> keys = rsconf.getKeyCols();
keyExprMap.put(tag, keys);
}
context.setKeyExprMap(keyExprMap);
// Make a deep copy of the aliases so that they are not changed in the context
String[] joinSrcs = joinOp.getConf().getBaseSrc();
String[] srcs = new String[joinSrcs.length];
for (int srcPos = 0; srcPos < joinSrcs.length; srcPos++) {
joinSrcs[srcPos] = QB.getAppendedAliasFromId(joinOp.getConf().getId(), joinSrcs[srcPos]);
srcs[srcPos] = new String(joinSrcs[srcPos]);
}
// table matcher.
return checkConvertBucketMapJoin(context, joinOp.getConf().getAliasToOpInfo(), keyExprMap, joinAlias, Arrays.asList(srcs));
}
use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.
the class SemanticAnalyzer method genCommonGroupByPlanReduceSinkOperator.
@SuppressWarnings("nls")
private ReduceSinkOperator genCommonGroupByPlanReduceSinkOperator(QB qb, List<String> dests, Operator inputOperatorInfo) throws SemanticException {
RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
QBParseInfo parseInfo = qb.getParseInfo();
RowResolver reduceSinkOutputRowResolver = new RowResolver();
reduceSinkOutputRowResolver.setIsExprResolver(true);
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
// The group by keys and distinct keys should be the same for all dests, so using the first
// one to produce these will be the same as using any other.
String dest = dests.get(0);
// Pre-compute group-by keys and store in reduceKeys
List<String> outputKeyColumnNames = new ArrayList<String>();
List<String> outputValueColumnNames = new ArrayList<String>();
List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
List<ExprNodeDesc> reduceKeys = getReduceKeysForReduceSink(grpByExprs, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, colExprMap);
int keyLength = reduceKeys.size();
List<List<Integer>> distinctColIndices = getDistinctColIndicesForReduceSink(parseInfo, dest, reduceKeys, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputKeyColumnNames, colExprMap);
List<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
// them
for (String destination : dests) {
getReduceValuesForReduceSinkNoMapAgg(parseInfo, destination, reduceSinkInputRowResolver, reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues, colExprMap);
// Need to pass all of the columns used in the where clauses as reduce values
ASTNode whereClause = parseInfo.getWhrForClause(destination);
if (whereClause != null) {
assert whereClause.getChildCount() == 1;
ASTNode predicates = (ASTNode) whereClause.getChild(0);
Map<ASTNode, ExprNodeDesc> nodeOutputs = genAllExprNodeDesc(predicates, reduceSinkInputRowResolver);
removeMappingForKeys(predicates, nodeOutputs, reduceKeys);
// extract columns missing in current RS key/value
for (Map.Entry<ASTNode, ExprNodeDesc> entry : nodeOutputs.entrySet()) {
ASTNode parameter = entry.getKey();
ExprNodeDesc expression = entry.getValue();
if (!(expression instanceof ExprNodeColumnDesc)) {
continue;
}
if (ExprNodeDescUtils.indexOf(expression, reduceValues) >= 0) {
continue;
}
String internalName = getColumnInternalName(reduceValues.size());
String field = Utilities.ReduceField.VALUE.toString() + "." + internalName;
reduceValues.add(expression);
outputValueColumnNames.add(internalName);
reduceSinkOutputRowResolver.putExpression(parameter, new ColumnInfo(field, expression.getTypeInfo(), null, false));
colExprMap.put(field, expression);
}
}
}
// Optimize the scenario when there are no grouping keys - only 1 reducer is needed
int numReducers = -1;
if (grpByExprs.isEmpty()) {
numReducers = 1;
}
ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, distinctColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, keyLength, numReducers, AcidUtils.Operation.NOT_ACID, defaultNullOrder);
ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver);
rsOp.setColumnExprMap(colExprMap);
return rsOp;
}
Aggregations