use of org.apache.hadoop.hive.ql.plan.SMBJoinDesc in project hive by apache.
the class MapJoinProcessor method convertSMBJoinToMapJoin.
/**
* convert a sortmerge join to a a map-side join.
*
* @param smbJoinOp
* join operator
* @param bigTablePos
* position of the source to be read as part of map-reduce framework. All other sources
* are cached in memory
* @param noCheckOuterJoin
*/
public static MapJoinOperator convertSMBJoinToMapJoin(HiveConf hconf, SMBMapJoinOperator smbJoinOp, int bigTablePos, boolean noCheckOuterJoin) throws SemanticException {
// Create a new map join operator
SMBJoinDesc smbJoinDesc = smbJoinOp.getConf();
List<ExprNodeDesc> keyCols = smbJoinDesc.getKeys().get(Byte.valueOf((byte) 0));
TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, PlanUtils.getFieldSchemasFromColumnList(keyCols, MAPJOINKEY_FIELDPREFIX));
MapJoinDesc mapJoinDesc = new MapJoinDesc(smbJoinDesc.getKeys(), keyTableDesc, smbJoinDesc.getExprs(), smbJoinDesc.getValueTblDescs(), smbJoinDesc.getValueTblDescs(), smbJoinDesc.getOutputColumnNames(), bigTablePos, smbJoinDesc.getConds(), smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix(), smbJoinDesc.getMemoryMonitorInfo(), smbJoinDesc.getInMemoryDataSize());
mapJoinDesc.setStatistics(smbJoinDesc.getStatistics());
mapJoinDesc.setColumnExprMap(smbJoinDesc.getColumnExprMap());
RowSchema joinRS = smbJoinOp.getSchema();
// The mapjoin has the same schema as the join operator
MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(smbJoinOp.getCompilationOpContext(), mapJoinDesc, joinRS, new ArrayList<Operator<? extends OperatorDesc>>());
// change the children of the original join operator to point to the map
// join operator
List<Operator<? extends OperatorDesc>> childOps = smbJoinOp.getChildOperators();
for (Operator<? extends OperatorDesc> childOp : childOps) {
childOp.replaceParent(smbJoinOp, mapJoinOp);
}
mapJoinOp.setChildOperators(childOps);
smbJoinOp.setChildOperators(null);
// change the parent of the original SMBjoin operator to point to the map
// join operator
List<Operator<? extends OperatorDesc>> parentOps = smbJoinOp.getParentOperators();
for (Operator<? extends OperatorDesc> parentOp : parentOps) {
parentOp.replaceChild(smbJoinOp, mapJoinOp);
}
mapJoinOp.setParentOperators(parentOps);
smbJoinOp.setParentOperators(null);
return mapJoinOp;
}
use of org.apache.hadoop.hive.ql.plan.SMBJoinDesc in project hive by apache.
the class AbstractSMBJoinProc method convertBucketMapJoinToSMBJoin.
// Convert the bucket map-join operator to a sort-merge map join operator
protected SMBMapJoinOperator convertBucketMapJoinToSMBJoin(MapJoinOperator mapJoinOp, SortBucketJoinProcCtx smbJoinContext) {
String[] srcs = smbJoinContext.getSrcs();
SMBMapJoinOperator smbJop = new SMBMapJoinOperator(mapJoinOp);
SMBJoinDesc smbJoinDesc = new SMBJoinDesc(mapJoinOp.getConf());
smbJop.setConf(smbJoinDesc);
HashMap<Byte, String> tagToAlias = new HashMap<Byte, String>();
for (int i = 0; i < srcs.length; i++) {
tagToAlias.put((byte) i, srcs[i]);
}
smbJoinDesc.setTagToAlias(tagToAlias);
int indexInListMapJoinNoReducer = this.pGraphContext.getListMapJoinOpsNoReducer().indexOf(mapJoinOp);
if (indexInListMapJoinNoReducer >= 0) {
this.pGraphContext.getListMapJoinOpsNoReducer().remove(indexInListMapJoinNoReducer);
this.pGraphContext.getListMapJoinOpsNoReducer().add(indexInListMapJoinNoReducer, smbJop);
}
Map<String, DummyStoreOperator> aliasToSink = new HashMap<String, DummyStoreOperator>();
// For all parents (other than the big table), insert a dummy store operator
/* Consider a query like:
*
* select * from
* (subq1 --> has a filter)
* join
* (subq2 --> has a filter)
* on some key
*
* Let us assume that subq1 is the small table (either specified by the user or inferred
* automatically). The following operator tree will be created:
*
* TableScan (subq1) --> Select --> Filter --> DummyStore
* \
* \ SMBJoin
* /
* /
* TableScan (subq2) --> Select --> Filter
*/
List<Operator<? extends OperatorDesc>> parentOperators = mapJoinOp.getParentOperators();
for (int i = 0; i < parentOperators.size(); i++) {
Operator<? extends OperatorDesc> par = parentOperators.get(i);
int index = par.getChildOperators().indexOf(mapJoinOp);
par.getChildOperators().remove(index);
if (i == smbJoinDesc.getPosBigTable()) {
par.getChildOperators().add(index, smbJop);
} else {
DummyStoreOperator dummyStoreOp = (DummyStoreOperator) OperatorFactory.get(par.getCompilationOpContext(), new DummyStoreDesc());
par.getChildOperators().add(index, dummyStoreOp);
List<Operator<? extends OperatorDesc>> childrenOps = new ArrayList<Operator<? extends OperatorDesc>>();
childrenOps.add(smbJop);
dummyStoreOp.setChildOperators(childrenOps);
List<Operator<? extends OperatorDesc>> parentOps = new ArrayList<Operator<? extends OperatorDesc>>();
parentOps.add(par);
dummyStoreOp.setParentOperators(parentOps);
aliasToSink.put(srcs[i], dummyStoreOp);
smbJop.getParentOperators().remove(i);
smbJop.getParentOperators().add(i, dummyStoreOp);
}
}
smbJoinDesc.setAliasToSink(aliasToSink);
List<Operator<? extends OperatorDesc>> childOps = mapJoinOp.getChildOperators();
for (int i = 0; i < childOps.size(); i++) {
Operator<? extends OperatorDesc> child = childOps.get(i);
int index = child.getParentOperators().indexOf(mapJoinOp);
child.getParentOperators().remove(index);
child.getParentOperators().add(index, smbJop);
}
// Data structures coming from QBJoinTree
smbJop.getConf().setQBJoinTreeProps(mapJoinOp.getConf());
//
pGraphContext.getSmbMapJoinOps().add(smbJop);
pGraphContext.getMapJoinOps().remove(mapJoinOp);
return smbJop;
}
use of org.apache.hadoop.hive.ql.plan.SMBJoinDesc in project hive by apache.
the class SortMergeJoinTaskDispatcher method processCurrentTask.
@Override
public Task<?> processCurrentTask(MapRedTask currTask, ConditionalTask conditionalTask, Context context) throws SemanticException {
// whether it contains a sort merge join operator
MapredWork currWork = currTask.getWork();
SMBMapJoinOperator originalSMBJoinOp = getSMBMapJoinOp(currWork);
if (!isEligibleForOptimization(originalSMBJoinOp)) {
return null;
}
currTask.setTaskTag(Task.CONVERTED_SORTMERGEJOIN);
// Convert the work containing to sort-merge join into a work, as if it had a regular join.
// Note that the operator tree is not changed - is still contains the SMB join, but the
// plan is changed (aliasToWork etc.) to contain all the paths as if it was a regular join.
// This is used to convert the plan to a map-join, and then the original SMB join plan is used
// as a backup task.
MapredWork currJoinWork = convertSMBWorkToJoinWork(currWork, originalSMBJoinOp);
SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(currJoinWork);
currWork.getMapWork().setLeftInputJoin(originalSMBJoinOp.getConf().isLeftInputJoin());
currWork.getMapWork().setBaseSrc(originalSMBJoinOp.getConf().getBaseSrc());
currWork.getMapWork().setMapAliases(originalSMBJoinOp.getConf().getMapAliases());
currJoinWork.getMapWork().setLeftInputJoin(originalSMBJoinOp.getConf().isLeftInputJoin());
currJoinWork.getMapWork().setBaseSrc(originalSMBJoinOp.getConf().getBaseSrc());
currJoinWork.getMapWork().setMapAliases(originalSMBJoinOp.getConf().getMapAliases());
// create conditional work list and task list
List<Serializable> listWorks = new ArrayList<Serializable>();
List<Task<?>> listTasks = new ArrayList<Task<?>>();
// create task to aliases mapping and alias to input file mapping for resolver
// Must be deterministic order map for consistent q-test output across Java versions
HashMap<Task<?>, Set<String>> taskToAliases = new LinkedHashMap<Task<?>, Set<String>>();
// Note that pathToAlias will behave as if the original plan was a join plan
Map<Path, List<String>> pathToAliases = currJoinWork.getMapWork().getPathToAliases();
// generate a map join task for the big table
SMBJoinDesc originalSMBJoinDesc = originalSMBJoinOp.getConf();
Byte[] order = originalSMBJoinDesc.getTagOrder();
int numAliases = order.length;
Set<Integer> bigTableCandidates = MapJoinProcessor.getBigTableCandidates(originalSMBJoinDesc.getConds());
HashMap<String, Long> aliasToSize = new HashMap<String, Long>();
Configuration conf = context.getConf();
try {
long aliasTotalKnownInputSize = getTotalKnownInputSize(context, currJoinWork.getMapWork(), pathToAliases, aliasToSize);
long ThresholdOfSmallTblSizeSum = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVESMALLTABLESFILESIZE);
for (int bigTablePosition = 0; bigTablePosition < numAliases; bigTablePosition++) {
// this table cannot be big table
if (!bigTableCandidates.contains(bigTablePosition)) {
continue;
}
// create map join task for the given big table position
MapRedTask newTask = convertSMBTaskToMapJoinTask(currJoinWork, bigTablePosition, newSMBJoinOp);
MapWork mapWork = newTask.getWork().getMapWork();
Operator<?> parentOp = originalSMBJoinOp.getParentOperators().get(bigTablePosition);
Set<String> aliases = GenMapRedUtils.findAliases(mapWork, parentOp);
long aliasKnownSize = Utilities.sumOf(aliasToSize, aliases);
if (aliasKnownSize > 0) {
long smallTblTotalKnownSize = aliasTotalKnownInputSize - aliasKnownSize;
if (smallTblTotalKnownSize > ThresholdOfSmallTblSizeSum) {
// this table is not good to be a big table.
continue;
}
}
// add into conditional task
listWorks.add(newTask.getWork());
listTasks.add(newTask);
newTask.setTaskTag(Task.CONVERTED_MAPJOIN);
newTask.setFetchSource(currTask.isFetchSource());
// set up backup task
newTask.setBackupTask(currTask);
newTask.setBackupChildrenTasks(currTask.getChildTasks());
// put the mapping task to aliases
taskToAliases.put(newTask, aliases);
}
} catch (Exception e) {
throw new SemanticException("Generate Map Join Task Error", e);
}
// insert current common join task to conditional task
listWorks.add(currTask.getWork());
listTasks.add(currTask);
// clear JoinTree and OP Parse Context
currWork.getMapWork().setLeftInputJoin(false);
currWork.getMapWork().setBaseSrc(null);
currWork.getMapWork().setMapAliases(null);
// create conditional task and insert conditional task into task tree
ConditionalWork cndWork = new ConditionalWork(listWorks);
ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork);
cndTsk.setListTasks(listTasks);
// set resolver and resolver context
cndTsk.setResolver(new ConditionalResolverCommonJoin());
ConditionalResolverCommonJoinCtx resolverCtx = new ConditionalResolverCommonJoinCtx();
resolverCtx.setPathToAliases(pathToAliases);
resolverCtx.setAliasToKnownSize(aliasToSize);
resolverCtx.setTaskToAliases(taskToAliases);
resolverCtx.setCommonJoinTask(currTask);
resolverCtx.setLocalTmpDir(context.getLocalScratchDir(false));
resolverCtx.setHdfsTmpDir(context.getMRScratchDir());
cndTsk.setResolverCtx(resolverCtx);
// replace the current task with the new generated conditional task
replaceTaskWithConditionalTask(currTask, cndTsk);
return cndTsk;
}
use of org.apache.hadoop.hive.ql.plan.SMBJoinDesc in project hive by apache.
the class Vectorizer method validateAndVectorizeOperator.
public Operator<? extends OperatorDesc> validateAndVectorizeOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, boolean isReduce, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException, VectorizerCannotVectorizeException {
Operator<? extends OperatorDesc> vectorOp = null;
// This "global" allows various validation methods to set the "not vectorized" reason.
currentOperator = op;
boolean isNative;
try {
switch(op.getType()) {
case MAPJOIN:
{
if (op instanceof MapJoinOperator) {
if (!validateMapJoinOperator((MapJoinOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
} else if (op instanceof SMBMapJoinOperator) {
if (!validateSMBMapJoinOperator((SMBMapJoinOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
} else {
setOperatorNotSupported(op);
throw new VectorizerCannotVectorizeException();
}
if (op instanceof MapJoinOperator) {
MapJoinDesc desc = (MapJoinDesc) op.getConf();
int joinType = desc.getConds()[0].getType();
VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc();
boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinDesc);
if (!specialize) {
Class<? extends Operator<?>> opClass = null;
// *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
if (!isOuterAndFiltered) {
opClass = VectorMapJoinOperator.class;
} else {
if (joinType == JoinDesc.FULL_OUTER_JOIN) {
setOperatorIssue("Vectorized & filtered full-outer joins not supported");
throw new VectorizerCannotVectorizeException();
}
opClass = VectorMapJoinOuterFilteredOperator.class;
}
vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), desc, vContext, vectorMapJoinDesc);
isNative = false;
} else {
// TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
// HiveConf.setBoolVar(physicalContext.getConf(),
// HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
} else {
Preconditions.checkState(op instanceof SMBMapJoinOperator);
SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf();
// Check additional constraint.
if (smbJoinSinkDesc.getFilterMap() != null) {
setOperatorIssue("FilterMaps not supported for Vector Pass-Thru SMB MapJoin");
throw new VectorizerCannotVectorizeException();
}
VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), smbJoinSinkDesc, vContext, vectorSMBJoinDesc);
isNative = false;
}
}
break;
case REDUCESINK:
{
if (!validateReduceSinkOperator((ReduceSinkOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
ReduceSinkDesc reduceDesc = (ReduceSinkDesc) op.getConf();
VectorReduceSinkDesc vectorReduceSinkDesc = new VectorReduceSinkDesc();
boolean specialize = canSpecializeReduceSink(reduceDesc, isTezOrSpark, vContext, vectorReduceSinkDesc);
if (!specialize) {
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), reduceDesc, vContext, vectorReduceSinkDesc);
isNative = false;
} else {
vectorOp = specializeReduceSinkOperator(op, vContext, reduceDesc, vectorReduceSinkDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorReduceSinkInfo vectorReduceSinkInfo = vectorReduceSinkDesc.getVectorReduceSinkInfo();
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILTER:
{
if (!validateFilterOperator((FilterOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
VectorFilterDesc vectorFilterDesc = new VectorFilterDesc();
vectorOp = vectorizeFilterOperator(op, vContext, vectorFilterDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression();
if (usesVectorUDFAdaptor(vectorPredicateExpr)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case TOPNKEY:
{
if (!validateTopNKeyOperator((TopNKeyOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
VectorTopNKeyDesc vectorTopNKeyDesc = new VectorTopNKeyDesc();
vectorOp = vectorizeTopNKeyOperator(op, vContext, vectorTopNKeyDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorExpression[] keyExpressions = vectorTopNKeyDesc.getKeyExpressions();
if (usesVectorUDFAdaptor(keyExpressions)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case SELECT:
{
if (!validateSelectOperator((SelectOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
vectorOp = vectorizeSelectOperator(op, vContext, vectorSelectDesc);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions();
if (usesVectorUDFAdaptor(vectorSelectExprs)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case GROUPBY:
{
// The validateGroupByOperator method will update vectorGroupByDesc.
VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
if (!validateGroupByOperator((GroupByOperator) op, isReduce, isTezOrSpark, vectorGroupByDesc)) {
throw new VectorizerCannotVectorizeException();
}
ImmutablePair<Operator<? extends OperatorDesc>, String> pair = doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc);
if (pair.left == null) {
setOperatorIssue(pair.right);
throw new VectorizerCannotVectorizeException();
}
vectorOp = pair.left;
isNative = false;
if (vectorTaskColumnInfo != null) {
VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions();
if (usesVectorUDFAdaptor(vecKeyExpressions)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs();
for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) {
if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILESINK:
{
if (!validateFileSinkOperator((FileSinkOperator) op)) {
throw new VectorizerCannotVectorizeException();
}
FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf();
VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc();
boolean isArrowSpecialization = checkForArrowFileSink(fileSinkDesc, isTezOrSpark, vContext, vectorFileSinkDesc);
if (isArrowSpecialization) {
vectorOp = specializeArrowFileSinkOperator(op, vContext, fileSinkDesc, vectorFileSinkDesc);
isNative = true;
} else {
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), fileSinkDesc, vContext, vectorFileSinkDesc);
isNative = false;
}
}
break;
case LIMIT:
{
// No validation.
LimitDesc limitDesc = (LimitDesc) op.getConf();
VectorLimitDesc vectorLimitDesc = new VectorLimitDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), limitDesc, vContext, vectorLimitDesc);
isNative = true;
}
break;
case EVENT:
{
// No validation.
AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf();
VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), eventDesc, vContext, vectorEventDesc);
isNative = true;
}
break;
case PTF:
{
// The validatePTFOperator method will update vectorPTFDesc.
VectorPTFDesc vectorPTFDesc = new VectorPTFDesc();
if (!validatePTFOperator((PTFOperator) op, vContext, vectorPTFDesc)) {
throw new VectorizerCannotVectorizeException();
}
vectorOp = vectorizePTFOperator(op, vContext, vectorPTFDesc);
isNative = true;
}
break;
case HASHTABLESINK:
{
// No validation.
SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf();
VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext, vectorSparkHashTableSinkDesc);
isNative = true;
}
break;
case SPARKPRUNINGSINK:
{
// No validation.
SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf();
VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc();
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext, vectorSparkPartitionPruningSinkDesc);
// need to maintain the unique ID so that target map works can
// read the output
((SparkPartitionPruningSinkOperator) vectorOp).setUniqueId(((SparkPartitionPruningSinkOperator) op).getUniqueId());
isNative = true;
}
break;
default:
setOperatorNotSupported(op);
throw new VectorizerCannotVectorizeException();
}
} catch (HiveException e) {
setOperatorIssue(e.getMessage());
throw new VectorizerCannotVectorizeException();
}
Preconditions.checkState(vectorOp != null);
if (vectorTaskColumnInfo != null && !isNative) {
vectorTaskColumnInfo.setAllNative(false);
}
LOG.debug("vectorizeOperator " + vectorOp.getClass().getName());
LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName());
// These operators need to be linked to enable runtime statistics to be gathered/used correctly
planMapper.link(op, vectorOp);
return vectorOp;
}
use of org.apache.hadoop.hive.ql.plan.SMBJoinDesc in project hive by apache.
the class Vectorizer method vectorizeOperator.
public Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException {
Operator<? extends OperatorDesc> vectorOp = null;
boolean isNative;
switch(op.getType()) {
case TABLESCAN:
vectorOp = vectorizeTableScanOperator(op, vContext);
isNative = true;
break;
case MAPJOIN:
{
if (op instanceof MapJoinOperator) {
VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
MapJoinDesc desc = (MapJoinDesc) op.getConf();
boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinInfo);
if (!specialize) {
Class<? extends Operator<?>> opClass = null;
// *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
if (!isOuterAndFiltered) {
opClass = VectorMapJoinOperator.class;
} else {
opClass = VectorMapJoinOuterFilteredOperator.class;
}
vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext);
isNative = false;
} else {
// TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
// HiveConf.setBoolVar(physicalContext.getConf(),
// HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinInfo);
isNative = true;
if (vectorTaskColumnInfo != null) {
if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
} else {
Preconditions.checkState(op instanceof SMBMapJoinOperator);
SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf();
VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc();
smbJoinSinkDesc.setVectorDesc(vectorSMBJoinDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), smbJoinSinkDesc, vContext);
isNative = false;
}
}
break;
case REDUCESINK:
{
VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo();
ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf();
boolean specialize = canSpecializeReduceSink(desc, isTezOrSpark, vContext, vectorReduceSinkInfo);
if (!specialize) {
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), op.getConf(), vContext);
isNative = false;
} else {
vectorOp = specializeReduceSinkOperator(op, vContext, desc, vectorReduceSinkInfo);
isNative = true;
if (vectorTaskColumnInfo != null) {
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILTER:
{
vectorOp = vectorizeFilterOperator(op, vContext);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorFilterDesc vectorFilterDesc = (VectorFilterDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression();
if (usesVectorUDFAdaptor(vectorPredicateExpr)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case SELECT:
{
vectorOp = vectorizeSelectOperator(op, vContext);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorSelectDesc vectorSelectDesc = (VectorSelectDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions();
if (usesVectorUDFAdaptor(vectorSelectExprs)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case GROUPBY:
{
vectorOp = vectorizeGroupByOperator(op, vContext);
isNative = false;
if (vectorTaskColumnInfo != null) {
VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
if (!vectorGroupByDesc.isVectorOutput()) {
vectorTaskColumnInfo.setGroupByVectorOutput(false);
}
VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions();
if (usesVectorUDFAdaptor(vecKeyExpressions)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators();
for (VectorAggregateExpression vecAggr : vecAggregators) {
if (usesVectorUDFAdaptor(vecAggr.inputExpression())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILESINK:
{
FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf();
VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc();
fileSinkDesc.setVectorDesc(vectorFileSinkDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), fileSinkDesc, vContext);
isNative = false;
}
break;
case LIMIT:
{
LimitDesc limitDesc = (LimitDesc) op.getConf();
VectorLimitDesc vectorLimitDesc = new VectorLimitDesc();
limitDesc.setVectorDesc(vectorLimitDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), limitDesc, vContext);
isNative = true;
}
break;
case EVENT:
{
AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf();
VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc();
eventDesc.setVectorDesc(vectorEventDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), eventDesc, vContext);
isNative = true;
}
break;
case HASHTABLESINK:
{
SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf();
VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc();
sparkHashTableSinkDesc.setVectorDesc(vectorSparkHashTableSinkDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext);
isNative = true;
}
break;
case SPARKPRUNINGSINK:
{
SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf();
VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc();
sparkPartitionPruningSinkDesc.setVectorDesc(vectorSparkPartitionPruningSinkDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext);
isNative = true;
}
break;
default:
// These are children of GROUP BY operators with non-vector outputs.
isNative = false;
vectorOp = op;
break;
}
Preconditions.checkState(vectorOp != null);
if (vectorTaskColumnInfo != null && !isNative) {
vectorTaskColumnInfo.setAllNative(false);
}
LOG.debug("vectorizeOperator " + vectorOp.getClass().getName());
LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName());
if (vectorOp != op) {
fixupParentChildOperators(op, vectorOp);
((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true);
}
return vectorOp;
}
Aggregations