use of org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo in project hive by apache.
the class Vectorizer method specializeMapJoinOperator.
Operator<? extends OperatorDesc> specializeMapJoinOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, MapJoinDesc desc, VectorMapJoinDesc vectorDesc) throws HiveException {
Operator<? extends OperatorDesc> vectorOp = null;
Class<? extends Operator<?>> opClass = null;
VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE;
HashTableKind hashTableKind = HashTableKind.NONE;
HashTableKeyType hashTableKeyType = HashTableKeyType.NONE;
VectorMapJoinVariation vectorMapJoinVariation = null;
if (vectorDesc.getIsFastHashTableEnabled()) {
hashTableImplementationType = HashTableImplementationType.FAST;
} else {
hashTableImplementationType = HashTableImplementationType.OPTIMIZED;
}
int joinType = desc.getConds()[0].getType();
boolean isInnerBigOnly = false;
if (joinType == JoinDesc.INNER_JOIN && isBigTableOnlyResults(desc)) {
isInnerBigOnly = true;
}
// By default, we can always use the multi-key class.
hashTableKeyType = HashTableKeyType.MULTI_KEY;
if (!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MULTIKEY_ONLY_ENABLED)) {
// Look for single column optimization.
byte posBigTable = (byte) desc.getPosBigTable();
Map<Byte, List<ExprNodeDesc>> keyExprs = desc.getKeys();
List<ExprNodeDesc> bigTableKeyExprs = keyExprs.get(posBigTable);
if (bigTableKeyExprs.size() == 1) {
TypeInfo typeInfo = bigTableKeyExprs.get(0).getTypeInfo();
LOG.info("Vectorizer vectorizeOperator map join typeName " + typeInfo.getTypeName());
switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
case BOOLEAN:
hashTableKeyType = HashTableKeyType.BOOLEAN;
break;
case BYTE:
hashTableKeyType = HashTableKeyType.BYTE;
break;
case SHORT:
hashTableKeyType = HashTableKeyType.SHORT;
break;
case INT:
hashTableKeyType = HashTableKeyType.INT;
break;
case DATE:
hashTableKeyType = HashTableKeyType.DATE;
break;
case LONG:
hashTableKeyType = HashTableKeyType.LONG;
break;
case STRING:
case CHAR:
case VARCHAR:
case BINARY:
hashTableKeyType = HashTableKeyType.STRING;
default:
}
}
}
switch(joinType) {
case JoinDesc.INNER_JOIN:
if (!isInnerBigOnly) {
vectorMapJoinVariation = VectorMapJoinVariation.INNER;
hashTableKind = HashTableKind.HASH_MAP;
} else {
vectorMapJoinVariation = VectorMapJoinVariation.INNER_BIG_ONLY;
hashTableKind = HashTableKind.HASH_MULTISET;
}
break;
case JoinDesc.LEFT_OUTER_JOIN:
case JoinDesc.RIGHT_OUTER_JOIN:
vectorMapJoinVariation = VectorMapJoinVariation.OUTER;
hashTableKind = HashTableKind.HASH_MAP;
break;
case JoinDesc.FULL_OUTER_JOIN:
vectorMapJoinVariation = VectorMapJoinVariation.FULL_OUTER;
hashTableKind = HashTableKind.HASH_MAP;
break;
case JoinDesc.LEFT_SEMI_JOIN:
vectorMapJoinVariation = VectorMapJoinVariation.LEFT_SEMI;
hashTableKind = HashTableKind.HASH_SET;
break;
case JoinDesc.ANTI_JOIN:
vectorMapJoinVariation = VectorMapJoinVariation.LEFT_ANTI;
hashTableKind = HashTableKind.HASH_SET;
break;
default:
throw new HiveException("Unknown join type " + joinType);
}
LOG.info("Vectorizer vectorizeOperator map join hashTableKind " + hashTableKind.name() + " hashTableKeyType " + hashTableKeyType.name());
switch(hashTableKeyType) {
case BOOLEAN:
case BYTE:
case SHORT:
case INT:
case DATE:
case LONG:
switch(vectorMapJoinVariation) {
case INNER:
opClass = VectorMapJoinInnerLongOperator.class;
break;
case INNER_BIG_ONLY:
opClass = VectorMapJoinInnerBigOnlyLongOperator.class;
break;
case LEFT_SEMI:
opClass = VectorMapJoinLeftSemiLongOperator.class;
break;
case LEFT_ANTI:
opClass = VectorMapJoinAntiJoinLongOperator.class;
break;
case OUTER:
opClass = VectorMapJoinOuterLongOperator.class;
break;
case FULL_OUTER:
opClass = VectorMapJoinFullOuterLongOperator.class;
break;
default:
throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
}
break;
case STRING:
switch(vectorMapJoinVariation) {
case INNER:
opClass = VectorMapJoinInnerStringOperator.class;
break;
case INNER_BIG_ONLY:
opClass = VectorMapJoinInnerBigOnlyStringOperator.class;
break;
case LEFT_SEMI:
opClass = VectorMapJoinLeftSemiStringOperator.class;
break;
case LEFT_ANTI:
opClass = VectorMapJoinAntiJoinStringOperator.class;
break;
case OUTER:
opClass = VectorMapJoinOuterStringOperator.class;
break;
case FULL_OUTER:
opClass = VectorMapJoinFullOuterStringOperator.class;
break;
default:
throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
}
break;
case MULTI_KEY:
switch(vectorMapJoinVariation) {
case INNER:
opClass = VectorMapJoinInnerMultiKeyOperator.class;
break;
case INNER_BIG_ONLY:
opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class;
break;
case LEFT_SEMI:
opClass = VectorMapJoinLeftSemiMultiKeyOperator.class;
break;
case LEFT_ANTI:
opClass = VectorMapJoinAntiJoinMultiKeyOperator.class;
break;
case OUTER:
opClass = VectorMapJoinOuterMultiKeyOperator.class;
break;
case FULL_OUTER:
opClass = VectorMapJoinFullOuterMultiKeyOperator.class;
break;
default:
throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
}
break;
default:
throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
}
boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED);
vectorDesc.setHashTableImplementationType(hashTableImplementationType);
vectorDesc.setHashTableKind(hashTableKind);
vectorDesc.setHashTableKeyType(hashTableKeyType);
vectorDesc.setVectorMapJoinVariation(vectorMapJoinVariation);
if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) {
vectorDesc.setIsFullOuter(true);
}
vectorDesc.setMinMaxEnabled(minMaxEnabled);
vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext, vectorDesc);
LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName());
return vectorOp;
}
use of org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo in project hive by apache.
the class MapJoinTestConfig method createVectorMapJoinDesc.
public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription testDesc) {
VectorMapJoinDesc vectorDesc = new VectorMapJoinDesc();
vectorDesc.setHashTableImplementationType(HashTableImplementationType.FAST);
HashTableKind hashTableKind;
switch(testDesc.vectorMapJoinVariation) {
case INNER:
hashTableKind = HashTableKind.HASH_MAP;
break;
case INNER_BIG_ONLY:
hashTableKind = HashTableKind.HASH_MULTISET;
break;
case LEFT_SEMI:
case LEFT_ANTI:
hashTableKind = HashTableKind.HASH_SET;
break;
case OUTER:
case FULL_OUTER:
hashTableKind = HashTableKind.HASH_MAP;
break;
default:
throw new RuntimeException("unknown operator variation " + testDesc.vectorMapJoinVariation);
}
vectorDesc.setHashTableKind(hashTableKind);
// Assume.
HashTableKeyType hashTableKeyType = HashTableKeyType.MULTI_KEY;
if (testDesc.bigTableKeyTypeInfos.length == 1) {
switch(((PrimitiveTypeInfo) testDesc.bigTableKeyTypeInfos[0]).getPrimitiveCategory()) {
case BOOLEAN:
hashTableKeyType = HashTableKeyType.BOOLEAN;
break;
case BYTE:
hashTableKeyType = HashTableKeyType.BYTE;
break;
case SHORT:
hashTableKeyType = HashTableKeyType.SHORT;
break;
case INT:
hashTableKeyType = HashTableKeyType.INT;
break;
case LONG:
hashTableKeyType = HashTableKeyType.LONG;
break;
case STRING:
hashTableKeyType = HashTableKeyType.STRING;
break;
default:
}
}
vectorDesc.setHashTableKeyType(hashTableKeyType);
vectorDesc.setVectorMapJoinVariation(testDesc.vectorMapJoinVariation);
vectorDesc.setMinMaxEnabled(false);
VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
vectorMapJoinInfo.setBigTableKeyColumnMap(testDesc.bigTableKeyColumnNums);
vectorMapJoinInfo.setBigTableKeyColumnNames(testDesc.bigTableKeyColumnNames);
vectorMapJoinInfo.setBigTableKeyTypeInfos(testDesc.bigTableKeyTypeInfos);
vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(null);
vectorDesc.setAllBigTableKeyExpressions(null);
vectorMapJoinInfo.setBigTableValueColumnMap(testDesc.bigTableColumnNums);
vectorMapJoinInfo.setBigTableValueColumnNames(testDesc.bigTableColumnNames);
vectorMapJoinInfo.setBigTableValueTypeInfos(testDesc.bigTableTypeInfos);
vectorMapJoinInfo.setSlimmedBigTableValueExpressions(null);
vectorDesc.setAllBigTableValueExpressions(null);
vectorMapJoinInfo.setBigTableFilterExpressions(new VectorExpression[0]);
/*
* Column mapping.
*/
VectorColumnOutputMapping bigTableRetainMapping = new VectorColumnOutputMapping("Big Table Retain Mapping");
VectorColumnOutputMapping nonOuterSmallTableKeyMapping = new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping");
VectorColumnOutputMapping outerSmallTableKeyMapping = new VectorColumnOutputMapping("Outer Small Table Key Mapping");
VectorColumnSourceMapping fullOuterSmallTableKeyMapping = new VectorColumnSourceMapping("Full Outer Small Table Key Mapping");
VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping");
int nextOutputColumn = 0;
final int bigTableRetainedSize = testDesc.bigTableRetainColumnNums.length;
for (int i = 0; i < bigTableRetainedSize; i++) {
final int batchColumnIndex = testDesc.bigTableRetainColumnNums[i];
TypeInfo typeInfo = testDesc.bigTableTypeInfos[i];
projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo);
// Collect columns we copy from the big table batch to the overflow batch.
if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) {
// Tolerate repeated use of a big table column.
bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo);
}
nextOutputColumn++;
}
boolean isOuterJoin = (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER);
int emulateScratchColumn = testDesc.bigTableTypeInfos.length;
VectorColumnOutputMapping smallTableKeyOutputMapping = new VectorColumnOutputMapping("Small Table Key Output Mapping");
final int smallTableKeyRetainSize = testDesc.smallTableRetainKeyColumnNums.length;
for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) {
final int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[i];
final int bigTableKeyColumnNum = testDesc.bigTableKeyColumnNums[smallTableKeyColumnNum];
TypeInfo keyTypeInfo = testDesc.smallTableKeyTypeInfos[smallTableKeyColumnNum];
if (!isOuterJoin) {
// Project the big table key into the small table result "area".
projectionMapping.add(nextOutputColumn, bigTableKeyColumnNum, keyTypeInfo);
if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumnNum)) {
nonOuterSmallTableKeyMapping.add(bigTableKeyColumnNum, bigTableKeyColumnNum, keyTypeInfo);
}
} else {
outerSmallTableKeyMapping.add(bigTableKeyColumnNum, emulateScratchColumn, keyTypeInfo);
projectionMapping.add(nextOutputColumn, emulateScratchColumn, keyTypeInfo);
// For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key
// into the output result.
fullOuterSmallTableKeyMapping.add(smallTableKeyColumnNum, emulateScratchColumn, keyTypeInfo);
emulateScratchColumn++;
}
nextOutputColumn++;
}
// The order of the fields in the LazyBinary small table value must be used, so
// we use the source ordering flavor for the mapping.
VectorColumnSourceMapping smallTableValueMapping = new VectorColumnSourceMapping("Small Table Value Mapping");
for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) {
smallTableValueMapping.add(i, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]);
projectionMapping.add(nextOutputColumn, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]);
emulateScratchColumn++;
nextOutputColumn++;
}
// Convert dynamic arrays and maps to simple arrays.
bigTableRetainMapping.finalize();
vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns());
vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos());
nonOuterSmallTableKeyMapping.finalize();
vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns());
vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos());
outerSmallTableKeyMapping.finalize();
fullOuterSmallTableKeyMapping.finalize();
vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping);
vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping);
smallTableValueMapping.finalize();
vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping);
projectionMapping.finalize();
// Verify we added an entry for each output.
assert projectionMapping.isSourceSequenceGood();
vectorMapJoinInfo.setProjectionMapping(projectionMapping);
if (projectionMapping.getCount() != testDesc.outputColumnNames.length) {
throw new RuntimeException();
}
;
vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
return vectorDesc;
}
use of org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo in project hive by apache.
the class Vectorizer method vectorizeOperator.
public Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException {
Operator<? extends OperatorDesc> vectorOp = null;
boolean isNative;
switch(op.getType()) {
case TABLESCAN:
vectorOp = vectorizeTableScanOperator(op, vContext);
isNative = true;
break;
case MAPJOIN:
{
if (op instanceof MapJoinOperator) {
VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
MapJoinDesc desc = (MapJoinDesc) op.getConf();
boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinInfo);
if (!specialize) {
Class<? extends Operator<?>> opClass = null;
// *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
if (!isOuterAndFiltered) {
opClass = VectorMapJoinOperator.class;
} else {
opClass = VectorMapJoinOuterFilteredOperator.class;
}
vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext);
isNative = false;
} else {
// TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
// HiveConf.setBoolVar(physicalContext.getConf(),
// HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinInfo);
isNative = true;
if (vectorTaskColumnInfo != null) {
if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
} else {
Preconditions.checkState(op instanceof SMBMapJoinOperator);
SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf();
VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc();
smbJoinSinkDesc.setVectorDesc(vectorSMBJoinDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), smbJoinSinkDesc, vContext);
isNative = false;
}
}
break;
case REDUCESINK:
{
VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo();
ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf();
boolean specialize = canSpecializeReduceSink(desc, isTezOrSpark, vContext, vectorReduceSinkInfo);
if (!specialize) {
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), op.getConf(), vContext);
isNative = false;
} else {
vectorOp = specializeReduceSinkOperator(op, vContext, desc, vectorReduceSinkInfo);
isNative = true;
if (vectorTaskColumnInfo != null) {
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILTER:
{
vectorOp = vectorizeFilterOperator(op, vContext);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorFilterDesc vectorFilterDesc = (VectorFilterDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression();
if (usesVectorUDFAdaptor(vectorPredicateExpr)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case SELECT:
{
vectorOp = vectorizeSelectOperator(op, vContext);
isNative = true;
if (vectorTaskColumnInfo != null) {
VectorSelectDesc vectorSelectDesc = (VectorSelectDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions();
if (usesVectorUDFAdaptor(vectorSelectExprs)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
break;
case GROUPBY:
{
vectorOp = vectorizeGroupByOperator(op, vContext);
isNative = false;
if (vectorTaskColumnInfo != null) {
VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
if (!vectorGroupByDesc.isVectorOutput()) {
vectorTaskColumnInfo.setGroupByVectorOutput(false);
}
VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions();
if (usesVectorUDFAdaptor(vecKeyExpressions)) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators();
for (VectorAggregateExpression vecAggr : vecAggregators) {
if (usesVectorUDFAdaptor(vecAggr.inputExpression())) {
vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
}
}
}
}
break;
case FILESINK:
{
FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf();
VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc();
fileSinkDesc.setVectorDesc(vectorFileSinkDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), fileSinkDesc, vContext);
isNative = false;
}
break;
case LIMIT:
{
LimitDesc limitDesc = (LimitDesc) op.getConf();
VectorLimitDesc vectorLimitDesc = new VectorLimitDesc();
limitDesc.setVectorDesc(vectorLimitDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), limitDesc, vContext);
isNative = true;
}
break;
case EVENT:
{
AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf();
VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc();
eventDesc.setVectorDesc(vectorEventDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), eventDesc, vContext);
isNative = true;
}
break;
case HASHTABLESINK:
{
SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf();
VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc();
sparkHashTableSinkDesc.setVectorDesc(vectorSparkHashTableSinkDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext);
isNative = true;
}
break;
case SPARKPRUNINGSINK:
{
SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf();
VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc();
sparkPartitionPruningSinkDesc.setVectorDesc(vectorSparkPartitionPruningSinkDesc);
vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext);
isNative = true;
}
break;
default:
// These are children of GROUP BY operators with non-vector outputs.
isNative = false;
vectorOp = op;
break;
}
Preconditions.checkState(vectorOp != null);
if (vectorTaskColumnInfo != null && !isNative) {
vectorTaskColumnInfo.setAllNative(false);
}
LOG.debug("vectorizeOperator " + vectorOp.getClass().getName());
LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName());
if (vectorOp != op) {
fixupParentChildOperators(op, vectorOp);
((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true);
}
return vectorOp;
}
use of org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo in project hive by apache.
the class MapJoinTestConfig method createNativeVectorMapJoin.
public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription testDesc, Operator<? extends OperatorDesc> collectorOperator, MapJoinTestData testData, MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType) throws SerDeException, IOException, HiveException {
VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc);
// UNDONE
mapJoinDesc.setVectorDesc(vectorDesc);
vectorDesc.setHashTableImplementationType(hashTableImplementationType);
VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
MapJoinTableContainer mapJoinTableContainer;
switch(vectorDesc.getHashTableImplementationType()) {
case OPTIMIZED:
mapJoinTableContainer = new MapJoinBytesTableContainer(testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0);
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc);
mapJoinTableContainer.setSerde(mapJoinTableContainerSerDe.getKeyContext(), mapJoinTableContainerSerDe.getValueContext());
break;
case FAST:
mapJoinTableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, testDesc.hiveConf, testData.smallTableKeyHashMap.size());
break;
default:
throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType());
}
loadTableContainerData(testDesc, testData, mapJoinTableContainer);
VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc);
byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
VectorExpression[] slimmedBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable));
vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs();
VectorExpression[] slimmedBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable));
vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
VectorMapJoinCommonOperator operator = MapJoinTestConfig.createNativeVectorMapJoinOperator(testDesc.vectorMapJoinVariation, mapJoinDesc, vectorDesc, vContext);
MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator);
operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null);
return operator;
}
use of org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo in project hive by apache.
the class Vectorizer method canSpecializeMapJoin.
private boolean canSpecializeMapJoin(Operator<? extends OperatorDesc> op, MapJoinDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorMapJoinDesc vectorDesc) throws HiveException {
Preconditions.checkState(op instanceof MapJoinOperator);
VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
boolean isVectorizationMapJoinNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED);
String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
boolean oneMapJoinCondition = (desc.getConds().length == 1);
boolean hasNullSafes = onExpressionHasNullSafes(desc);
byte posBigTable = (byte) desc.getPosBigTable();
// Since we want to display all the met and not met conditions in EXPLAIN, we determine all
// information first....
List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable);
boolean outerJoinHasNoKeys = (!desc.isNoOuterJoin() && keyDesc.size() == 0);
// For now, we don't support joins on or using DECIMAL_64.
VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressionsUpConvertDecimal64(keyDesc);
final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length;
// Assume.
boolean supportsKeyTypes = true;
HashSet<String> notSupportedKeyTypes = new HashSet<String>();
// Since a key expression can be a calculation and the key will go into a scratch column,
// we need the mapping and type information.
int[] bigTableKeyColumnMap = new int[allBigTableKeyExpressionsLength];
String[] bigTableKeyColumnNames = new String[allBigTableKeyExpressionsLength];
TypeInfo[] bigTableKeyTypeInfos = new TypeInfo[allBigTableKeyExpressionsLength];
ArrayList<VectorExpression> bigTableKeyExpressionsList = new ArrayList<VectorExpression>();
VectorExpression[] slimmedBigTableKeyExpressions;
for (int i = 0; i < allBigTableKeyExpressionsLength; i++) {
VectorExpression ve = allBigTableKeyExpressions[i];
if (!IdentityExpression.isColumnOnly(ve)) {
bigTableKeyExpressionsList.add(ve);
}
bigTableKeyColumnMap[i] = ve.getOutputColumnNum();
ExprNodeDesc exprNode = keyDesc.get(i);
bigTableKeyColumnNames[i] = exprNode.toString();
TypeInfo typeInfo = exprNode.getTypeInfo();
// same check used in HashTableLoader.
if (!MapJoinKey.isSupportedField(typeInfo)) {
supportsKeyTypes = false;
Category category = typeInfo.getCategory();
notSupportedKeyTypes.add((category != Category.PRIMITIVE ? category.toString() : ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory().toString()));
}
bigTableKeyTypeInfos[i] = typeInfo;
}
if (bigTableKeyExpressionsList.size() == 0) {
slimmedBigTableKeyExpressions = null;
} else {
slimmedBigTableKeyExpressions = bigTableKeyExpressionsList.toArray(new VectorExpression[0]);
}
List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable);
// For now, we don't support joins on or using DECIMAL_64.
VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs);
boolean isFastHashTableEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED);
// Especially since LLAP is prone to turn it off in the MapJoinDesc in later
// physical optimizer stages...
boolean isHybridHashJoin = desc.isHybridHashJoin();
/*
* Populate vectorMapJoininfo.
*/
/*
* Similarly, we need a mapping since a value expression can be a calculation and the value
* will go into a scratch column.
*
* Value expressions include keys? YES.
*/
// Assume.
boolean supportsValueTypes = true;
HashSet<String> notSupportedValueTypes = new HashSet<String>();
int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length];
String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length];
TypeInfo[] bigTableValueTypeInfos = new TypeInfo[allBigTableValueExpressions.length];
ArrayList<VectorExpression> bigTableValueExpressionsList = new ArrayList<VectorExpression>();
VectorExpression[] slimmedBigTableValueExpressions;
for (int i = 0; i < bigTableValueColumnMap.length; i++) {
VectorExpression ve = allBigTableValueExpressions[i];
if (!IdentityExpression.isColumnOnly(ve)) {
bigTableValueExpressionsList.add(ve);
}
bigTableValueColumnMap[i] = ve.getOutputColumnNum();
ExprNodeDesc exprNode = bigTableExprs.get(i);
bigTableValueColumnNames[i] = exprNode.toString();
TypeInfo typeInfo = exprNode.getTypeInfo();
if (!(typeInfo instanceof PrimitiveTypeInfo)) {
supportsValueTypes = false;
Category category = typeInfo.getCategory();
notSupportedValueTypes.add(category.toString());
}
bigTableValueTypeInfos[i] = typeInfo;
}
if (bigTableValueExpressionsList.size() == 0) {
slimmedBigTableValueExpressions = null;
} else {
slimmedBigTableValueExpressions = bigTableValueExpressionsList.toArray(new VectorExpression[0]);
}
vectorMapJoinInfo.setBigTableKeyColumnMap(bigTableKeyColumnMap);
vectorMapJoinInfo.setBigTableKeyColumnNames(bigTableKeyColumnNames);
vectorMapJoinInfo.setBigTableKeyTypeInfos(bigTableKeyTypeInfos);
vectorMapJoinInfo.setSlimmedBigTableKeyExpressions(slimmedBigTableKeyExpressions);
vectorDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions);
vectorMapJoinInfo.setBigTableValueColumnMap(bigTableValueColumnMap);
vectorMapJoinInfo.setBigTableValueColumnNames(bigTableValueColumnNames);
vectorMapJoinInfo.setBigTableValueTypeInfos(bigTableValueTypeInfos);
vectorMapJoinInfo.setSlimmedBigTableValueExpressions(slimmedBigTableValueExpressions);
vectorDesc.setAllBigTableValueExpressions(allBigTableValueExpressions);
/*
* Column mapping.
*/
VectorColumnOutputMapping bigTableRetainMapping = new VectorColumnOutputMapping("Big Table Retain Mapping");
VectorColumnOutputMapping nonOuterSmallTableKeyMapping = new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping");
VectorColumnOutputMapping outerSmallTableKeyMapping = new VectorColumnOutputMapping("Outer Small Table Key Mapping");
VectorColumnSourceMapping fullOuterSmallTableKeyMapping = new VectorColumnSourceMapping("Full Outer Small Table Key Mapping");
// The order of the fields in the LazyBinary small table value must be used, so
// we use the source ordering flavor for the mapping.
VectorColumnSourceMapping smallTableValueMapping = new VectorColumnSourceMapping("Small Table Value Mapping");
Byte[] order = desc.getTagOrder();
Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
boolean isOuterJoin = !desc.getNoOuterJoin();
/*
* Gather up big and small table output result information from the MapJoinDesc.
*/
List<Integer> bigTableRetainList = desc.getRetainList().get(posBigTable);
int[] smallTableIndices;
int smallTableIndicesSize;
List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable);
if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) {
smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable);
smallTableIndicesSize = smallTableIndices.length;
} else {
smallTableIndices = null;
smallTableIndicesSize = 0;
}
List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable);
int smallTableRetainSize = smallTableRetainList.size();
int smallTableResultSize = 0;
if (smallTableIndicesSize > 0) {
smallTableResultSize = smallTableIndicesSize;
} else if (smallTableRetainSize > 0) {
smallTableResultSize = smallTableRetainSize;
}
/*
* Determine the big table retained mapping first so we can optimize out (with
* projection) copying inner join big table keys in the subsequent small table results section.
*/
// We use a mapping object here so we can build the projection in any order and
// get the ordered by 0 to n-1 output columns at the end.
//
// Also, to avoid copying a big table key into the small table result area for inner joins,
// we reference it with the projection so there can be duplicate output columns
// in the projection.
VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping");
int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize);
final int bigTableRetainSize = bigTableRetainList.size();
for (int i = 0; i < bigTableRetainSize; i++) {
// Since bigTableValueExpressions may do a calculation and produce a scratch column, we
// need to map to the right batch column.
int retainColumn = bigTableRetainList.get(i);
int batchColumnIndex = bigTableValueColumnMap[retainColumn];
TypeInfo typeInfo = bigTableValueTypeInfos[i];
// With this map we project the big table batch to make it look like an output batch.
projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo);
// Collect columns we copy from the big table batch to the overflow batch.
if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) {
// Tolerate repeated use of a big table column.
bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo);
}
nextOutputColumn++;
}
/*
* Now determine the small table results.
*/
boolean smallTableExprVectorizes = true;
int firstSmallTableOutputColumn;
firstSmallTableOutputColumn = (order[0] == posBigTable ? bigTableRetainSize : 0);
nextOutputColumn = firstSmallTableOutputColumn;
// Small table indices has more information (i.e. keys) than retain, so use it if it exists...
if (smallTableIndicesSize > 0) {
for (int i = 0; i < smallTableIndicesSize; i++) {
if (smallTableIndices[i] >= 0) {
// Zero and above numbers indicate a big table key is needed for
// small table result "area".
int keyIndex = smallTableIndices[i];
// Since bigTableKeyExpressions may do a calculation and produce a scratch column, we
// need to map the right column.
int bigTableKeyColumn = bigTableKeyColumnMap[keyIndex];
TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex];
if (!isOuterJoin) {
// Optimize inner join keys of small table results.
// Project the big table key into the small table result "area".
projectionMapping.add(nextOutputColumn, bigTableKeyColumn, typeInfo);
if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumn)) {
// When the Big Key is not retained in the output result, we do need to copy the
// Big Table key into the overflow batch so the projection of it (Big Table key) to
// the Small Table key will work properly...
//
nonOuterSmallTableKeyMapping.add(bigTableKeyColumn, bigTableKeyColumn, typeInfo);
}
} else {
// For outer joins, since the small table key can be null when there for NOMATCH,
// we must have a physical (scratch) column for those keys. We cannot use the
// projection optimization used by non-[FULL} OUTER joins above.
int scratchColumn = vContext.allocateScratchColumn(typeInfo);
projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
outerSmallTableKeyMapping.add(bigTableKeyColumn, scratchColumn, typeInfo);
// For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key
// into the output result.
fullOuterSmallTableKeyMapping.add(keyIndex, scratchColumn, typeInfo);
}
} else {
// Negative numbers indicate a column to be (deserialize) read from the small table's
// LazyBinary value row.
int smallTableValueIndex = -smallTableIndices[i] - 1;
ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
clearNotVectorizedReason();
smallTableExprVectorizes = false;
}
TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
// Make a new big table scratch column for the small table value.
int scratchColumn = vContext.allocateScratchColumn(typeInfo);
projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
}
nextOutputColumn++;
}
} else if (smallTableRetainSize > 0) {
for (int i = 0; i < smallTableRetainSize; i++) {
int smallTableValueIndex = smallTableRetainList.get(i);
ExprNodeDesc smallTableExprNode = smallTableExprs.get(i);
if (!validateExprNodeDesc(smallTableExprNode, "Small Table")) {
clearNotVectorizedReason();
smallTableExprVectorizes = false;
}
// Make a new big table scratch column for the small table value.
TypeInfo typeInfo = smallTableExprNode.getTypeInfo();
int scratchColumn = vContext.allocateScratchColumn(typeInfo);
projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo);
smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo);
nextOutputColumn++;
}
}
Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters();
VectorExpression[] bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), VectorExpressionDescriptor.Mode.FILTER);
vectorMapJoinInfo.setBigTableFilterExpressions(bigTableFilterExpressions);
boolean useOptimizedTable = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE);
// Remember the condition variables for EXPLAIN regardless of whether we specialize or not.
vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
vectorDesc.setUseOptimizedTable(useOptimizedTable);
vectorDesc.setIsVectorizationMapJoinNativeEnabled(isVectorizationMapJoinNativeEnabled);
vectorDesc.setEngine(engine);
vectorDesc.setOneMapJoinCondition(oneMapJoinCondition);
vectorDesc.setHasNullSafes(hasNullSafes);
vectorDesc.setSmallTableExprVectorizes(smallTableExprVectorizes);
vectorDesc.setOuterJoinHasNoKeys(outerJoinHasNoKeys);
vectorDesc.setIsFastHashTableEnabled(isFastHashTableEnabled);
vectorDesc.setIsHybridHashJoin(isHybridHashJoin);
vectorDesc.setSupportsKeyTypes(supportsKeyTypes);
if (!supportsKeyTypes) {
vectorDesc.setNotSupportedKeyTypes(new ArrayList<>(notSupportedKeyTypes));
}
vectorDesc.setSupportsValueTypes(supportsValueTypes);
if (!supportsValueTypes) {
vectorDesc.setNotSupportedValueTypes(new ArrayList<>(notSupportedValueTypes));
}
// Check common conditions for both Optimized and Fast Hash Tables.
// Assume.
boolean result = true;
if (!useOptimizedTable || !isVectorizationMapJoinNativeEnabled || !isTezOrSpark || !oneMapJoinCondition || hasNullSafes || !smallTableExprVectorizes || outerJoinHasNoKeys || !supportsValueTypes) {
result = false;
}
if (!isFastHashTableEnabled) {
// Check optimized-only hash table restrictions.
if (!supportsKeyTypes) {
result = false;
}
} else {
if (isHybridHashJoin) {
result = false;
}
}
// Convert dynamic arrays and maps to simple arrays.
bigTableRetainMapping.finalize();
vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns());
vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos());
nonOuterSmallTableKeyMapping.finalize();
vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns());
vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos());
outerSmallTableKeyMapping.finalize();
fullOuterSmallTableKeyMapping.finalize();
vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping);
vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping);
smallTableValueMapping.finalize();
vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping);
projectionMapping.finalize();
// Verify we added an entry for each output.
assert projectionMapping.isSourceSequenceGood();
vectorMapJoinInfo.setProjectionMapping(projectionMapping);
return result;
}
Aggregations