use of org.apache.hadoop.hive.ql.plan.OpTraits in project hive by apache.
the class ConvertJoinMapJoin method process.
@Override
public /*
* (non-Javadoc) we should ideally not modify the tree we traverse. However,
* since we need to walk the tree at any time when we modify the operator, we
* might as well do it here.
*/
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
OptimizeTezProcContext context = (OptimizeTezProcContext) procCtx;
hashTableLoadFactor = context.conf.getFloatVar(ConfVars.HIVEHASHTABLELOADFACTOR);
fastHashTableAvailable = context.conf.getBoolVar(ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
JoinOperator joinOp = (JoinOperator) nd;
// adjust noconditional task size threshold for LLAP
LlapClusterStateForCompile llapInfo = null;
if ("llap".equalsIgnoreCase(context.conf.getVar(ConfVars.HIVE_EXECUTION_MODE))) {
llapInfo = LlapClusterStateForCompile.getClusterInfo(context.conf);
llapInfo.initClusterInfo();
}
MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(context.conf, llapInfo);
joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo);
maxJoinMemory = memoryMonitorInfo.getAdjustedNoConditionalTaskSize();
LOG.info("maxJoinMemory: {}", maxJoinMemory);
hashMapDataStructure = HashMapDataStructureType.of(joinOp.getConf());
TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin();
if (!hiveConvertJoin) {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
if (retval == null) {
return retval;
} else {
fallbackToReduceSideJoin(joinOp, context);
return null;
}
}
// if we have traits, and table info is present in the traits, we know the
// exact number of buckets. Else choose the largest number of estimated
// reducers from the parent operators.
int numBuckets = -1;
if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
numBuckets = estimateNumBuckets(joinOp, true);
} else {
numBuckets = 1;
}
LOG.info("Estimated number of buckets " + numBuckets);
MapJoinConversion mapJoinConversion = getMapJoinConversion(joinOp, context, numBuckets, false, maxJoinMemory, true);
if (mapJoinConversion == null) {
Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx);
if (retval == null) {
return retval;
} else {
// only case is full outer join with SMB enabled which is not possible. Convert to regular
// join.
fallbackToReduceSideJoin(joinOp, context);
return null;
}
}
if (numBuckets > 1) {
if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
// Check if we are in LLAP, if so it needs to be determined if we should use BMJ or DPHJ
if (llapInfo != null) {
if (selectJoinForLlap(context, joinOp, tezBucketJoinProcCtx, llapInfo, mapJoinConversion, numBuckets)) {
return null;
}
} else if (convertJoinBucketMapJoin(joinOp, context, mapJoinConversion, tezBucketJoinProcCtx)) {
return null;
}
}
}
// check if we can convert to map join no bucket scaling.
LOG.info("Convert to non-bucketed map join");
if (numBuckets != 1) {
mapJoinConversion = getMapJoinConversion(joinOp, context, 1, false, maxJoinMemory, true);
}
if (mapJoinConversion == null) {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
fallbackToReduceSideJoin(joinOp, context);
return null;
}
// Currently, this is a MJ path and we don's support FULL OUTER MapJoin yet.
if (mapJoinConversion.getIsFullOuterJoin() && !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) {
fallbackToReduceSideJoin(joinOp, context);
return null;
}
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversion, true);
if (mapJoinOp == null) {
fallbackToReduceSideJoin(joinOp, context);
return null;
}
// map join operator by default has no bucket cols and num of reduce sinks
// reduced by 1
mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks()));
preserveOperatorInfos(mapJoinOp, joinOp, context);
// propagate this change till the next RS
for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
setAllChildrenTraits(childOp, mapJoinOp.getOpTraits());
}
return null;
}
use of org.apache.hadoop.hive.ql.plan.OpTraits in project hive by apache.
the class ConvertJoinMapJoin method convertJoinBucketMapJoin.
private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, MapJoinConversion mapJoinConversion, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
if (mapJoinConversion.getIsFullOuterJoin() && !mapJoinConversion.getIsFullOuterEnabledForMapJoin()) {
return false;
}
final int bigTablePosition = mapJoinConversion.getBigTablePos();
if (!checkConvertJoinBucketMapJoin(joinOp, bigTablePosition, tezBucketJoinProcCtx)) {
LOG.info("Check conversion to bucket map join failed.");
return false;
}
// Incase the join has extra keys other than bucketed columns, partition keys need to be updated
// on small table(s).
ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
OpTraits opTraits = bigTableRS.getOpTraits();
List<List<String>> listBucketCols = opTraits.getBucketColNames();
List<ExprNodeDesc> bigTablePartitionCols = bigTableRS.getConf().getPartitionCols();
boolean updatePartitionCols = false;
List<Integer> positions = new ArrayList<>();
if (listBucketCols.get(0).size() != bigTablePartitionCols.size()) {
updatePartitionCols = true;
// Prepare updated partition columns for small table(s).
// Get the positions of bucketed columns
int i = 0;
Map<String, ExprNodeDesc> colExprMap = bigTableRS.getColumnExprMap();
for (ExprNodeDesc bigTableExpr : bigTablePartitionCols) {
// It is guaranteed there is only 1 list within listBucketCols.
for (String colName : listBucketCols.get(0)) {
if (colExprMap.get(colName).isSame(bigTableExpr)) {
positions.add(i++);
}
}
}
}
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversion, true);
if (mapJoinOp == null) {
LOG.debug("Conversion to bucket map join failed.");
return false;
}
MapJoinDesc joinDesc = mapJoinOp.getConf();
joinDesc.setBucketMapJoin(true);
// we can set the traits for this join operator
opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks());
mapJoinOp.setOpTraits(opTraits);
preserveOperatorInfos(mapJoinOp, joinOp, context);
setNumberOfBucketsOnChildren(mapJoinOp);
// Once the conversion is done, we can set the partitioner to bucket cols on the small table
Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
// Update the partition columns in small table to ensure correct routing of hash tables.
if (updatePartitionCols) {
// on the small table side.
for (Operator<?> op : mapJoinOp.getParentOperators()) {
if (!(op instanceof ReduceSinkOperator)) {
continue;
}
ReduceSinkOperator rsOp = (ReduceSinkOperator) op;
List<ExprNodeDesc> newPartitionCols = new ArrayList<>();
List<ExprNodeDesc> partitionCols = rsOp.getConf().getPartitionCols();
for (Integer position : positions) {
newPartitionCols.add(partitionCols.get(position));
}
rsOp.getConf().setPartitionCols(newPartitionCols);
}
}
// Update the memory monitor info for LLAP.
MemoryMonitorInfo memoryMonitorInfo = joinDesc.getMemoryMonitorInfo();
if (memoryMonitorInfo.isLlap()) {
memoryMonitorInfo.setHashTableInflationFactor(1);
memoryMonitorInfo.setMemoryOverSubscriptionFactor(0);
}
return true;
}
use of org.apache.hadoop.hive.ql.plan.OpTraits in project hive by apache.
the class ConvertJoinMapJoin method checkConvertJoinSMBJoin.
/*
* This method tries to convert a join to an SMB. This is done based on
* traits. If the sorted by columns are the same as the join columns then, we
* can convert the join to an SMB. Otherwise retain the bucket map join as it
* is still more efficient than a regular join.
*/
private boolean checkConvertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
int numBuckets = bigTableRS.getParentOperators().get(0).getOpTraits().getNumBuckets();
int size = -1;
boolean shouldCheckExternalTables = context.conf.getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS);
StringBuilder sb = new StringBuilder();
for (Operator<?> parentOp : joinOp.getParentOperators()) {
if (shouldCheckExternalTables && hasExternalTableAncestor(parentOp, sb)) {
LOG.debug("External table {} found in join - disabling SMB join.", sb.toString());
return false;
}
// each side better have 0 or more RS. if either side is unbalanced, cannot convert.
// This is a workaround for now. Right fix would be to refactor code in the
// MapRecordProcessor and ReduceRecordProcessor with respect to the sources.
Set<ReduceSinkOperator> set = OperatorUtils.findOperatorsUpstream(parentOp.getParentOperators(), ReduceSinkOperator.class);
if (size < 0) {
size = set.size();
continue;
}
if (((size > 0) && (set.size() > 0)) || ((size == 0) && (set.size() == 0))) {
continue;
} else {
return false;
}
}
// transformation of the join operation
for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
if (!(parentOp instanceof ReduceSinkOperator)) {
// could be mux/demux operators. Currently not supported
LOG.debug("Found correlation optimizer operators. Cannot convert to SMB at this time.");
return false;
}
ReduceSinkOperator rsOp = (ReduceSinkOperator) parentOp;
List<ExprNodeDesc> keyCols = rsOp.getConf().getKeyCols();
// For SMB, the key column(s) in RS should be same as bucket column(s) and sort column(s)`
List<String> sortCols = rsOp.getOpTraits().getSortCols().get(0);
List<String> bucketCols = rsOp.getOpTraits().getBucketColNames().get(0);
if (sortCols.size() != keyCols.size() || bucketCols.size() != keyCols.size()) {
return false;
}
// Check columns.
for (int i = 0; i < sortCols.size(); i++) {
ExprNodeDesc sortCol = rsOp.getColumnExprMap().get(sortCols.get(i));
ExprNodeDesc bucketCol = rsOp.getColumnExprMap().get(bucketCols.get(i));
if (!(sortCol.isSame(keyCols.get(i)) && bucketCol.isSame(keyCols.get(i)))) {
return false;
}
}
// check Parent's traits are same as rs
OpTraits parentTraits = rsOp.getParentOperators().get(0).getOpTraits();
if (null == parentTraits) {
// programming error - shouldn't be null
return false;
}
if (!checkColEquality(parentTraits.getSortCols(), rsOp.getOpTraits().getSortCols(), rsOp.getColumnExprMap(), false)) {
LOG.info("We cannot convert to SMB because the sort column names do not match.");
return false;
}
if (!checkColEquality(parentTraits.getBucketColNames(), rsOp.getOpTraits().getBucketColNames(), rsOp.getColumnExprMap(), true)) {
LOG.info("We cannot convert to SMB because bucket column names do not match.");
return false;
}
}
if (numBuckets < 0) {
numBuckets = bigTableRS.getConf().getNumReducers();
}
tezBucketJoinProcCtx.setNumBuckets(numBuckets);
// With bucketing using two different versions. Version 1 for exiting
// tables and version 2 for new tables. All the inputs to the SMB must be
// from same version. This only applies to tables read directly and not
// intermediate outputs of joins/groupbys
int bucketingVersion = -1;
for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
// Check if the parent is coming from a table scan, if so, what is the version of it.
assert parentOp.getParentOperators() != null && parentOp.getParentOperators().size() == 1;
Operator<?> op = parentOp;
while (op != null && !(op instanceof TableScanOperator || op instanceof ReduceSinkOperator || op instanceof CommonJoinOperator)) {
// If op has parents it is guaranteed to be 1.
List<Operator<?>> parents = op.getParentOperators();
Preconditions.checkState(parents.size() == 0 || parents.size() == 1);
op = parents.size() == 1 ? parents.get(0) : null;
}
if (op instanceof TableScanOperator) {
int localVersion = ((TableScanOperator) op).getConf().getTableMetadata().getBucketingVersion();
if (bucketingVersion == -1) {
bucketingVersion = localVersion;
} else if (bucketingVersion != localVersion) {
// versions dont match, return false.
LOG.debug("SMB Join can't be performed due to bucketing version mismatch");
return false;
}
}
}
LOG.info("We can convert the join to an SMB join.");
return true;
}
use of org.apache.hadoop.hive.ql.plan.OpTraits in project hive by apache.
the class SparkMapJoinOptimizer method process.
@Override
public /**
* We should ideally not modify the tree we traverse. However,
* since we need to walk the tree at any time when we modify the operator, we
* might as well do it here.
*/
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
OptimizeSparkProcContext context = (OptimizeSparkProcContext) procCtx;
HiveConf conf = context.getConf();
JoinOperator joinOp = (JoinOperator) nd;
if (!conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) {
return null;
}
LOG.info("Check if operator " + joinOp + " can be converted to map join");
long[] mapJoinInfo = getMapJoinConversionInfo(joinOp, context);
int mapJoinConversionPos = (int) mapJoinInfo[0];
if (mapJoinConversionPos < 0) {
return null;
}
int numBuckets = -1;
List<List<String>> bucketColNames = null;
LOG.info("Convert to non-bucketed map join");
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos);
// but changing SerDe won't hurt correctness
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED) && conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
mapJoinOp.getConf().getKeyTblDesc().getProperties().setProperty(serdeConstants.SERIALIZATION_LIB, BinarySortableSerDe.class.getName());
}
if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)) {
LOG.info("Check if it can be converted to bucketed map join");
numBuckets = convertJoinBucketMapJoin(joinOp, mapJoinOp, context, mapJoinConversionPos);
if (numBuckets > 1) {
LOG.info("Converted to map join with " + numBuckets + " buckets");
bucketColNames = joinOp.getOpTraits().getBucketColNames();
mapJoinInfo[2] /= numBuckets;
} else {
LOG.info("Can not convert to bucketed map join");
}
}
// we can set the traits for this join operator
OpTraits opTraits = new OpTraits(bucketColNames, numBuckets, null, joinOp.getOpTraits().getNumReduceSinks());
mapJoinOp.setOpTraits(opTraits);
mapJoinOp.setStatistics(joinOp.getStatistics());
setNumberOfBucketsOnChildren(mapJoinOp);
context.getMjOpSizes().put(mapJoinOp, mapJoinInfo[1] + mapJoinInfo[2]);
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.plan.OpTraits in project hive by apache.
the class ConvertJoinMapJoin method convertJoinBucketMapJoin.
private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
if (!checkConvertJoinBucketMapJoin(joinOp, bigTablePosition, tezBucketJoinProcCtx)) {
LOG.info("Check conversion to bucket map join failed.");
return false;
}
// Incase the join has extra keys other than bucketed columns, partition keys need to be updated
// on small table(s).
ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
OpTraits opTraits = bigTableRS.getOpTraits();
List<List<String>> listBucketCols = opTraits.getBucketColNames();
ArrayList<ExprNodeDesc> bigTablePartitionCols = bigTableRS.getConf().getPartitionCols();
boolean updatePartitionCols = false;
List<Integer> positions = new ArrayList<>();
if (listBucketCols.get(0).size() != bigTablePartitionCols.size()) {
updatePartitionCols = true;
// Prepare updated partition columns for small table(s).
// Get the positions of bucketed columns
int i = 0;
Map<String, ExprNodeDesc> colExprMap = bigTableRS.getColumnExprMap();
for (ExprNodeDesc bigTableExpr : bigTablePartitionCols) {
// It is guaranteed there is only 1 list within listBucketCols.
for (String colName : listBucketCols.get(0)) {
if (colExprMap.get(colName).isSame(bigTableExpr)) {
positions.add(i++);
}
}
}
}
MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePosition, true);
if (mapJoinOp == null) {
LOG.debug("Conversion to bucket map join failed.");
return false;
}
MapJoinDesc joinDesc = mapJoinOp.getConf();
joinDesc.setBucketMapJoin(true);
// we can set the traits for this join operator
opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks());
mapJoinOp.setOpTraits(opTraits);
mapJoinOp.setStatistics(joinOp.getStatistics());
setNumberOfBucketsOnChildren(mapJoinOp);
// Once the conversion is done, we can set the partitioner to bucket cols on the small table
Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
// Update the partition columns in small table to ensure correct routing of hash tables.
if (updatePartitionCols) {
// on the small table side.
for (Operator<?> op : mapJoinOp.getParentOperators()) {
if (!(op instanceof ReduceSinkOperator))
continue;
;
ReduceSinkOperator rsOp = (ReduceSinkOperator) op;
ArrayList<ExprNodeDesc> newPartitionCols = new ArrayList<>();
ArrayList<ExprNodeDesc> partitionCols = rsOp.getConf().getPartitionCols();
for (Integer position : positions) {
newPartitionCols.add(partitionCols.get(position));
}
rsOp.getConf().setPartitionCols(newPartitionCols);
}
}
// Update the memory monitor info for LLAP.
MemoryMonitorInfo memoryMonitorInfo = joinDesc.getMemoryMonitorInfo();
if (memoryMonitorInfo.isLlap()) {
memoryMonitorInfo.setHashTableInflationFactor(1);
memoryMonitorInfo.setMemoryOverSubscriptionFactor(0);
}
return true;
}
Aggregations