use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinLeftSemiMultiKeyOperator method process.
// ---------------------------------------------------------------------------
// Process Multi-Key Left-Semi Join on a vectorized row batch.
//
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
alias = (byte) tag;
if (needCommonSetup) {
// Our one time process method initialization.
commonSetup(batch);
/*
* Initialize Multi-Key members for this specialized class.
*/
keyVectorSerializeWrite = new VectorSerializeRow(new BinarySortableSerializeWrite(bigTableKeyColumnMap.length));
keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap);
currentKeyOutput = new Output();
saveKeyOutput = new Output();
needCommonSetup = false;
}
if (needHashTableSetup) {
// Setup our hash table specialization. It will be the first time the process
// method is called, or after a Hybrid Grace reload.
/*
* Get our Multi-Key hash set information for this specialized class.
*/
hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable;
needHashTableSetup = false;
}
batchCounter++;
// For left semi joins, we may apply the filter(s) now.
for (VectorExpression ve : bigTableFilterExpressions) {
ve.evaluate(batch);
}
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
ve.evaluate(batch);
}
}
/*
* Multi-Key specific declarations.
*/
// None.
/*
* Multi-Key Long check for repeating.
*/
// If all BigTable input columns to key expressions are isRepeating, then
// calculate key once; lookup once.
boolean allKeyInputColumnsRepeating;
if (bigTableKeyColumnMap.length == 0) {
allKeyInputColumnsRepeating = false;
} else {
allKeyInputColumnsRepeating = true;
for (int i = 0; i < bigTableKeyColumnMap.length; i++) {
if (!batch.cols[bigTableKeyColumnMap[i]].isRepeating) {
allKeyInputColumnsRepeating = false;
break;
}
}
}
if (allKeyInputColumnsRepeating) {
/*
* Repeating.
*/
// All key input columns are repeating. Generate key once. Lookup once.
// Since the key is repeated, we must use entry 0 regardless of selectedInUse.
/*
* Multi-Key specific repeated lookup.
*/
keyVectorSerializeWrite.setOutput(currentKeyOutput);
keyVectorSerializeWrite.serializeWrite(batch, 0);
JoinUtil.JoinResult joinResult;
if (keyVectorSerializeWrite.getHasAnyNulls()) {
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
byte[] keyBytes = currentKeyOutput.getData();
int keyLength = currentKeyOutput.getLength();
// LOG.debug(CLASS_NAME + " processOp all " + displayBytes(keyBytes, 0, keyLength));
joinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[0]);
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
}
// We remember any matching rows in matchs / matchSize. At the end of the loop,
// selected / batch.size will represent both matching and non-matching rows for outer join.
// Only deferred rows will have been removed from selected.
int[] selected = batch.selected;
boolean selectedInUse = batch.selectedInUse;
int hashSetResultCount = 0;
int allMatchCount = 0;
int spillCount = 0;
/*
* Multi-Key specific variables.
*/
Output temp;
// We optimize performance by only looking up the first key in a series of equal keys.
boolean haveSaveKey = false;
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
for (int logical = 0; logical < inputLogicalSize; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
/*
* Multi-Key get key.
*/
// Generate binary sortable key for current row in vectorized row batch.
keyVectorSerializeWrite.setOutput(currentKeyOutput);
keyVectorSerializeWrite.serializeWrite(batch, batchIndex);
boolean isAnyNull = keyVectorSerializeWrite.getHasAnyNulls();
if (isAnyNull || !haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) {
if (haveSaveKey) {
// Move on with our counts.
switch(saveJoinResult) {
case MATCH:
// We have extracted the existence from the hash set result, so we don't keep it.
break;
case SPILL:
// We keep the hash set result for its spill information.
hashSetResultCount++;
break;
case NOMATCH:
break;
}
}
if (isAnyNull) {
saveJoinResult = JoinUtil.JoinResult.NOMATCH;
haveSaveKey = false;
} else {
// Regardless of our matching result, we keep that information to make multiple use
// of it for a possible series of equal keys.
haveSaveKey = true;
/*
* Multi-Key specific save key and lookup.
*/
temp = saveKeyOutput;
saveKeyOutput = currentKeyOutput;
currentKeyOutput = temp;
/*
* Multi-key specific lookup key.
*/
byte[] keyBytes = saveKeyOutput.getData();
int keyLength = saveKeyOutput.getLength();
saveJoinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[hashSetResultCount]);
}
switch(saveJoinResult) {
case MATCH:
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashSetResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
break;
}
} else {
switch(saveJoinResult) {
case MATCH:
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashSetResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
break;
}
}
}
if (haveSaveKey) {
// Update our counts for the last key.
switch(saveJoinResult) {
case MATCH:
// We have extracted the existence from the hash set result, so we don't keep it.
break;
case SPILL:
// We keep the hash set result for its spill information.
hashSetResultCount++;
break;
case NOMATCH:
break;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
}
finishLeftSemi(batch, allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults);
}
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
}
} catch (IOException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinLeftSemiStringOperator method process.
// ---------------------------------------------------------------------------
// Process Single-Column String Left-Semi Join on a vectorized row batch.
//
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
alias = (byte) tag;
if (needCommonSetup) {
// Our one time process method initialization.
commonSetup(batch);
/*
* Initialize Single-Column String members for this specialized class.
*/
singleJoinColumn = bigTableKeyColumnMap[0];
needCommonSetup = false;
}
if (needHashTableSetup) {
// Setup our hash table specialization. It will be the first time the process
// method is called, or after a Hybrid Grace reload.
/*
* Get our Single-Column String hash set information for this specialized class.
*/
hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable;
needHashTableSetup = false;
}
batchCounter++;
// For left semi joins, we may apply the filter(s) now.
for (VectorExpression ve : bigTableFilterExpressions) {
ve.evaluate(batch);
}
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
ve.evaluate(batch);
}
}
/*
* Single-Column String specific declarations.
*/
// The one join column for this specialized class.
BytesColumnVector joinColVector = (BytesColumnVector) batch.cols[singleJoinColumn];
byte[][] vector = joinColVector.vector;
int[] start = joinColVector.start;
int[] length = joinColVector.length;
/*
* Single-Column Long check for repeating.
*/
// Check single column for repeating.
boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
if (allKeyInputColumnsRepeating) {
/*
* Repeating.
*/
// All key input columns are repeating. Generate key once. Lookup once.
// Since the key is repeated, we must use entry 0 regardless of selectedInUse.
/*
* Single-Column String specific repeated lookup.
*/
JoinUtil.JoinResult joinResult;
if (!joinColVector.noNulls && joinColVector.isNull[0]) {
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
byte[] keyBytes = vector[0];
int keyStart = start[0];
int keyLength = length[0];
joinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[0]);
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
finishLeftSemiRepeated(batch, joinResult, hashSetResults[0]);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
}
// We remember any matching rows in matchs / matchSize. At the end of the loop,
// selected / batch.size will represent both matching and non-matching rows for outer join.
// Only deferred rows will have been removed from selected.
int[] selected = batch.selected;
boolean selectedInUse = batch.selectedInUse;
int hashSetResultCount = 0;
int allMatchCount = 0;
int spillCount = 0;
/*
* Single-Column String specific variables.
*/
int saveKeyBatchIndex = -1;
// We optimize performance by only looking up the first key in a series of equal keys.
boolean haveSaveKey = false;
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
for (int logical = 0; logical < inputLogicalSize; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
/*
* Single-Column String get key.
*/
// Implicit -- use batchIndex.
boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex];
if (isNull || !haveSaveKey || StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex], vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
if (haveSaveKey) {
// Move on with our counts.
switch(saveJoinResult) {
case MATCH:
// We have extracted the existence from the hash set result, so we don't keep it.
break;
case SPILL:
// We keep the hash set result for its spill information.
hashSetResultCount++;
break;
case NOMATCH:
break;
}
}
if (isNull) {
saveJoinResult = JoinUtil.JoinResult.NOMATCH;
haveSaveKey = false;
} else {
// Regardless of our matching result, we keep that information to make multiple use
// of it for a possible series of equal keys.
haveSaveKey = true;
/*
* Single-Column String specific save key and lookup.
*/
saveKeyBatchIndex = batchIndex;
/*
* Single-Column String specific lookup key.
*/
byte[] keyBytes = vector[batchIndex];
int keyStart = start[batchIndex];
int keyLength = length[batchIndex];
saveJoinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[hashSetResultCount]);
}
switch(saveJoinResult) {
case MATCH:
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashSetResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
break;
}
} else {
switch(saveJoinResult) {
case MATCH:
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashSetResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
break;
}
}
}
if (haveSaveKey) {
// Update our counts for the last key.
switch(saveJoinResult) {
case MATCH:
// We have extracted the existence from the hash set result, so we don't keep it.
break;
case SPILL:
// We keep the hash set result for its spill information.
hashSetResultCount++;
break;
case NOMATCH:
break;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashSetResults, 0, hashSetResultCount)));
}
finishLeftSemi(batch, allMatchCount, spillCount, (VectorMapJoinHashTableResult[]) hashSetResults);
}
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
}
} catch (IOException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getBetweenExpression.
/* Get a [NOT] BETWEEN filter or projection expression. This is treated as a special case
* because the NOT is actually specified in the expression tree as the first argument,
* and we don't want any runtime cost for that. So creating the VectorExpression
* needs to be done differently than the standard way where all arguments are
* passed to the VectorExpression constructor.
*/
private VectorExpression getBetweenExpression(List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
boolean hasDynamicValues = false;
// We don't currently support the BETWEEN ends being columns. They must be scalars.
if ((childExpr.get(2) instanceof ExprNodeDynamicValueDesc) && (childExpr.get(3) instanceof ExprNodeDynamicValueDesc)) {
hasDynamicValues = true;
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
// Projection mode is not applicable.
return null;
}
} else if (!(childExpr.get(2) instanceof ExprNodeConstantDesc) || !(childExpr.get(3) instanceof ExprNodeConstantDesc)) {
return null;
}
boolean notKeywordPresent = (Boolean) ((ExprNodeConstantDesc) childExpr.get(0)).getValue();
ExprNodeDesc colExpr = childExpr.get(1);
// The children after not, might need a cast. Get common types for the two comparisons.
// Casting for 'between' is handled here as a special case, because the first child is for NOT and doesn't need
// cast
TypeInfo commonType = FunctionRegistry.getCommonClassForComparison(childExpr.get(1).getTypeInfo(), childExpr.get(2).getTypeInfo());
if (commonType == null) {
// Can't vectorize
return null;
}
commonType = FunctionRegistry.getCommonClassForComparison(commonType, childExpr.get(3).getTypeInfo());
if (commonType == null) {
// Can't vectorize
return null;
}
List<ExprNodeDesc> castChildren = new ArrayList<>();
boolean wereCastUdfs = false;
Category commonTypeCategory = commonType.getCategory();
for (ExprNodeDesc desc : childExpr.subList(1, 4)) {
TypeInfo childTypeInfo = desc.getTypeInfo();
Category childCategory = childTypeInfo.getCategory();
if (childCategory != commonTypeCategory) {
return null;
}
final boolean isNeedsCast;
if (commonTypeCategory == Category.PRIMITIVE) {
// Do not to strict TypeInfo comparisons for DECIMAL -- just compare the category.
// Otherwise, we generate unnecessary casts.
isNeedsCast = ((PrimitiveTypeInfo) commonType).getPrimitiveCategory() != ((PrimitiveTypeInfo) childTypeInfo).getPrimitiveCategory();
} else {
isNeedsCast = !commonType.equals(desc.getTypeInfo());
}
if (!isNeedsCast) {
castChildren.add(desc);
} else {
GenericUDF castUdf = getGenericUDFForCast(commonType);
ExprNodeGenericFuncDesc engfd = new ExprNodeGenericFuncDesc(commonType, castUdf, Arrays.asList(new ExprNodeDesc[] { desc }));
castChildren.add(engfd);
wereCastUdfs = true;
}
}
String colType = commonType.getTypeName();
// prepare arguments for createVectorExpression
List<ExprNodeDesc> childrenAfterNot = evaluateCastOnConstants(castChildren);
// determine class
Class<?> cl = null;
if (isIntFamily(colType) && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterLongColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
}
} else if (isIntFamily(colType) && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnNotBetween.class;
} else {
cl = FilterLongColumnNotBetween.class;
}
} else if (isFloatFamily(colType) && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DoubleColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterDoubleColumnBetweenDynamicValue.class : FilterDoubleColumnBetween.class);
}
} else if (isFloatFamily(colType) && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DoubleColumnNotBetween.class;
} else {
cl = FilterDoubleColumnNotBetween.class;
}
} else if (colType.equals("string") && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = StringColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterStringColumnBetweenDynamicValue.class : FilterStringColumnBetween.class);
}
} else if (colType.equals("string") && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = StringColumnNotBetween.class;
} else {
cl = FilterStringColumnNotBetween.class;
}
} else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = VarCharColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterVarCharColumnBetweenDynamicValue.class : FilterVarCharColumnBetween.class);
}
} else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = VarCharColumnNotBetween.class;
} else {
cl = FilterVarCharColumnNotBetween.class;
}
} else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = CharColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterCharColumnBetweenDynamicValue.class : FilterCharColumnBetween.class);
}
} else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = CharColumnNotBetween.class;
} else {
cl = FilterCharColumnNotBetween.class;
}
} else if (colType.equals("timestamp") && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = TimestampColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterTimestampColumnBetweenDynamicValue.class : FilterTimestampColumnBetween.class);
}
} else if (colType.equals("timestamp") && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = TimestampColumnNotBetween.class;
} else {
cl = FilterTimestampColumnNotBetween.class;
}
} else if (isDecimalFamily(colType) && !notKeywordPresent) {
final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
if (tryDecimal64) {
VectorExpression decimal64VecExpr = tryDecimal64Between(mode, /* isNot */
false, colExpr, childrenAfterNot, returnType);
if (decimal64VecExpr != null) {
return decimal64VecExpr;
}
}
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DecimalColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterDecimalColumnBetweenDynamicValue.class : FilterDecimalColumnBetween.class);
}
} else if (isDecimalFamily(colType) && notKeywordPresent) {
final boolean tryDecimal64 = checkExprNodeDescForDecimal64(colExpr) && !wereCastUdfs && !hasDynamicValues;
if (tryDecimal64) {
VectorExpression decimal64VecExpr = tryDecimal64Between(mode, /* isNot */
true, colExpr, childrenAfterNot, returnType);
if (decimal64VecExpr != null) {
return decimal64VecExpr;
}
}
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = DecimalColumnNotBetween.class;
} else {
cl = FilterDecimalColumnNotBetween.class;
}
} else if (isDateFamily(colType) && !notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnBetween.class;
} else {
cl = (hasDynamicValues ? FilterDateColumnBetweenDynamicValue.class : FilterLongColumnBetween.class);
}
} else if (isDateFamily(colType) && notKeywordPresent) {
if (mode == VectorExpressionDescriptor.Mode.PROJECTION) {
cl = LongColumnNotBetween.class;
} else {
cl = FilterLongColumnNotBetween.class;
}
}
return createVectorExpression(cl, childrenAfterNot, VectorExpressionDescriptor.Mode.PROJECTION, returnType, DataTypePhysicalVariation.NONE);
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getIfExpression.
private VectorExpression getIfExpression(GenericUDFIf genericUDFIf, List<ExprNodeDesc> childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException {
// Assume.
boolean isFilter = false;
if (mode == VectorExpressionDescriptor.Mode.FILTER) {
// Is output type a BOOLEAN?
if (returnType.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) returnType).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
isFilter = true;
} else {
return null;
}
}
// Get a PROJECTION IF expression.
VectorExpression ve = doGetIfExpression(genericUDFIf, childExpr, returnType);
if (ve == null) {
return null;
}
if (isFilter) {
// Wrap the PROJECTION IF expression output with a filter.
SelectColumnIsTrue filterVectorExpr = new SelectColumnIsTrue(ve.getOutputColumnNum());
filterVectorExpr.setChildExpressions(new VectorExpression[] { ve });
filterVectorExpr.setInputTypeInfos(ve.getOutputTypeInfo());
filterVectorExpr.setInputDataTypePhysicalVariations(ve.getOutputDataTypePhysicalVariation());
return filterVectorExpr;
} else {
return ve;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorizationContext method getFilterOnBooleanColumnExpression.
private VectorExpression getFilterOnBooleanColumnExpression(ExprNodeColumnDesc exprDesc, int columnNum) throws HiveException {
final VectorExpression expr;
// Evaluate the column as a boolean, converting if necessary.
TypeInfo typeInfo = exprDesc.getTypeInfo();
if (typeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.BOOLEAN) {
expr = new SelectColumnIsTrue(columnNum);
expr.setInputTypeInfos(typeInfo);
expr.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
} else {
// Ok, we need to convert.
List<ExprNodeDesc> exprAsList = Collections.singletonList(exprDesc);
expr = getCastToBooleanExpression(exprAsList, VectorExpressionDescriptor.Mode.FILTER);
if (expr == null) {
throw new HiveException("Cannot vectorize converting expression " + exprDesc.getExprString() + " to boolean");
}
}
return expr;
}
Aggregations