use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinAntiJoinGenerateResultOperator method finishAntiRepeated.
protected void finishAntiRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, VectorMapJoinHashTableResult hashSetResult) throws HiveException, IOException {
switch(joinResult) {
case MATCH:
if (bigTableValueExpressions != null) {
// Run our value expressions over whole batch.
for (VectorExpression ve : bigTableValueExpressions) {
ve.evaluate(batch);
}
}
// Generate special repeated case.
batch.size = generateHashSetResultRepeatedAll(batch);
batch.selectedInUse = true;
break;
case SPILL:
// Whole batch is spilled.
spillBatchRepeated(batch, hashSetResult);
batch.size = 0;
break;
case NOMATCH:
// No match for entire batch.
batch.size = 0;
break;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinInnerBigOnlyGenerateResultOperator method finishInnerBigOnlyRepeated.
protected void finishInnerBigOnlyRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, VectorMapJoinHashMultiSetResult hashMultiSetResult) throws HiveException, IOException {
switch(joinResult) {
case MATCH:
if (bigTableValueExpressions != null) {
// Run our value expressions over whole batch.
for (VectorExpression ve : bigTableValueExpressions) {
ve.evaluate(batch);
}
}
// Generate special repeated case.
int numSel = generateHashMultiSetResultRepeatedAll(batch, hashMultiSetResult);
batch.size = numSel;
batch.selectedInUse = true;
break;
case SPILL:
// Whole batch is spilled.
spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashMultiSetResult);
batch.size = 0;
break;
case NOMATCH:
// No match for entire batch.
batch.size = 0;
break;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinInnerMultiKeyOperator method processBatch.
@Override
public void processBatch(VectorizedRowBatch batch) throws HiveException {
try {
// Do the per-batch setup for an inner join.
innerPerBatchSetup(batch);
// For inner joins, we may apply the filter(s) now.
for (VectorExpression ve : bigTableFilterExpressions) {
ve.evaluate(batch);
}
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
return;
}
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
ve.evaluate(batch);
}
}
/*
* Multi-Key specific declarations.
*/
// None.
/*
* Multi-Key check for repeating.
*/
// If all BigTable input columns to key expressions are isRepeating, then
// calculate key once; lookup once.
boolean allKeyInputColumnsRepeating;
if (bigTableKeyColumnMap.length == 0) {
allKeyInputColumnsRepeating = false;
} else {
allKeyInputColumnsRepeating = true;
for (int i = 0; i < bigTableKeyColumnMap.length; i++) {
if (!batch.cols[bigTableKeyColumnMap[i]].isRepeating) {
allKeyInputColumnsRepeating = false;
break;
}
}
}
if (allKeyInputColumnsRepeating) {
/*
* Repeating.
*/
// All key input columns are repeating. Generate key once. Lookup once.
// Since the key is repeated, we must use entry 0 regardless of selectedInUse.
/*
* Multi-Key specific repeated lookup.
*/
keyVectorSerializeWrite.setOutput(currentKeyOutput);
keyVectorSerializeWrite.serializeWrite(batch, 0);
JoinUtil.JoinResult joinResult;
if (keyVectorSerializeWrite.getHasAnyNulls()) {
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
byte[] keyBytes = currentKeyOutput.getData();
int keyLength = currentKeyOutput.getLength();
joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0]);
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
finishInnerRepeated(batch, joinResult, hashMapResults[0]);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
}
// We remember any matching rows in matchs / matchSize. At the end of the loop,
// selected / batch.size will represent both matching and non-matching rows for outer join.
// Only deferred rows will have been removed from selected.
int[] selected = batch.selected;
boolean selectedInUse = batch.selectedInUse;
int hashMapResultCount = 0;
int allMatchCount = 0;
int equalKeySeriesCount = 0;
int spillCount = 0;
/*
* Multi-Key specific variables.
*/
Output temp;
// We optimize performance by only looking up the first key in a series of equal keys.
boolean haveSaveKey = false;
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
for (int logical = 0; logical < inputLogicalSize; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
/*
* Multi-Key get key.
*/
// Generate binary sortable key for current row in vectorized row batch.
keyVectorSerializeWrite.setOutput(currentKeyOutput);
keyVectorSerializeWrite.serializeWrite(batch, batchIndex);
boolean isAnyNull = keyVectorSerializeWrite.getHasAnyNulls();
if (isAnyNull || !haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) {
if (haveSaveKey) {
// Move on with our counts.
switch(saveJoinResult) {
case MATCH:
hashMapResultCount++;
equalKeySeriesCount++;
break;
case SPILL:
hashMapResultCount++;
break;
case NOMATCH:
break;
}
}
if (isAnyNull) {
saveJoinResult = JoinUtil.JoinResult.NOMATCH;
haveSaveKey = false;
} else {
// Regardless of our matching result, we keep that information to make multiple use
// of it for a possible series of equal keys.
haveSaveKey = true;
/*
* Multi-Key specific save key.
*/
temp = saveKeyOutput;
saveKeyOutput = currentKeyOutput;
currentKeyOutput = temp;
/*
* Multi-Key specific lookup key.
*/
byte[] keyBytes = saveKeyOutput.getData();
int keyLength = saveKeyOutput.getLength();
saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]);
}
switch(saveJoinResult) {
case MATCH:
equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMapResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
break;
}
} else {
switch(saveJoinResult) {
case MATCH:
equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMapResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
break;
}
}
}
if (haveSaveKey) {
// Update our counts for the last key.
switch(saveJoinResult) {
case MATCH:
hashMapResultCount++;
equalKeySeriesCount++;
break;
case SPILL:
hashMapResultCount++;
break;
case NOMATCH:
break;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
finishInner(batch, allMatchCount, equalKeySeriesCount, spillCount, hashMapResultCount);
}
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
}
} catch (IOException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinLeftSemiGenerateResultOperator method finishLeftSemiRepeated.
protected void finishLeftSemiRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult joinResult, VectorMapJoinHashTableResult hashSetResult) throws HiveException, IOException {
switch(joinResult) {
case MATCH:
if (bigTableValueExpressions != null) {
// Run our value expressions over whole batch.
for (VectorExpression ve : bigTableValueExpressions) {
ve.evaluate(batch);
}
}
// Generate special repeated case.
int numSel = generateHashSetResultRepeatedAll(batch);
batch.size = numSel;
batch.selectedInUse = true;
break;
case SPILL:
// Whole batch is spilled.
spillBatchRepeated(batch, (VectorMapJoinHashTableResult) hashSetResult);
batch.size = 0;
break;
case NOMATCH:
// No match for entire batch.
batch.size = 0;
break;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorPTFDesc method getEvaluator.
// We provide this public method to help EXPLAIN VECTORIZATION show the evaluator classes.
public static VectorPTFEvaluatorBase getEvaluator(SupportedFunctionType functionType, boolean isDistinct, WindowFrameDef windowFrameDef, Type[] columnVectorTypes, VectorExpression[] inputVectorExpressions, int outputColumnNum) {
final boolean isRowEndCurrent = (windowFrameDef.getWindowType() == WindowType.ROWS && windowFrameDef.getEnd().isCurrentRow());
/*
* we should only stream when the window start is unbounded and the end row is the current,
* because that's the way how streaming evaluation works: calculate from the very-first row then
* create result for the current row on the fly, so with other words: currently we cannot force
* a boundary on a streaming evaluator
*/
final boolean canStream = windowFrameDef.getStart().isUnbounded() && isRowEndCurrent;
// most of the evaluators will use only first argument
VectorExpression inputVectorExpression = inputVectorExpressions[0];
Type columnVectorType = columnVectorTypes[0];
VectorPTFEvaluatorBase evaluator;
switch(functionType) {
case ROW_NUMBER:
evaluator = new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case RANK:
evaluator = new VectorPTFEvaluatorRank(windowFrameDef, outputColumnNum);
break;
case DENSE_RANK:
evaluator = new VectorPTFEvaluatorDenseRank(windowFrameDef, outputColumnNum);
break;
case MIN:
switch(columnVectorType) {
case LONG:
evaluator = !canStream ? new VectorPTFEvaluatorLongMin(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingLongMin(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DOUBLE:
evaluator = !canStream ? new VectorPTFEvaluatorDoubleMin(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDoubleMin(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DECIMAL:
evaluator = !canStream ? new VectorPTFEvaluatorDecimalMin(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDecimalMin(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
default:
throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
}
break;
case MAX:
switch(columnVectorType) {
case LONG:
evaluator = !canStream ? new VectorPTFEvaluatorLongMax(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingLongMax(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DOUBLE:
evaluator = !canStream ? new VectorPTFEvaluatorDoubleMax(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDoubleMax(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DECIMAL:
evaluator = !canStream ? new VectorPTFEvaluatorDecimalMax(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDecimalMax(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
default:
throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
}
break;
case SUM:
switch(columnVectorType) {
case LONG:
evaluator = !canStream ? new VectorPTFEvaluatorLongSum(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingLongSum(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DOUBLE:
evaluator = !canStream ? new VectorPTFEvaluatorDoubleSum(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDoubleSum(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DECIMAL:
evaluator = !canStream ? new VectorPTFEvaluatorDecimalSum(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDecimalSum(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
default:
throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
}
break;
case AVG:
switch(columnVectorType) {
case LONG:
evaluator = !canStream ? new VectorPTFEvaluatorLongAvg(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingLongAvg(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DOUBLE:
evaluator = !canStream ? new VectorPTFEvaluatorDoubleAvg(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDoubleAvg(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DECIMAL:
evaluator = !canStream ? new VectorPTFEvaluatorDecimalAvg(windowFrameDef, inputVectorExpression, outputColumnNum) : new VectorPTFEvaluatorStreamingDecimalAvg(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
default:
throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
}
break;
case FIRST_VALUE:
switch(columnVectorType) {
case LONG:
evaluator = new VectorPTFEvaluatorLongFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DOUBLE:
evaluator = new VectorPTFEvaluatorDoubleFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DECIMAL:
evaluator = new VectorPTFEvaluatorDecimalFirstValue(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
default:
throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
}
break;
case LAST_VALUE:
switch(columnVectorType) {
case LONG:
evaluator = new VectorPTFEvaluatorLongLastValue(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DOUBLE:
evaluator = new VectorPTFEvaluatorDoubleLastValue(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DECIMAL:
evaluator = new VectorPTFEvaluatorDecimalLastValue(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
default:
throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
}
break;
case COUNT:
if (inputVectorExpression == null) {
evaluator = new VectorPTFEvaluatorCountStar(windowFrameDef, inputVectorExpression, outputColumnNum);
} else {
if (isDistinct) {
switch(columnVectorType) {
case BYTES:
evaluator = new VectorPTFEvaluatorBytesCountDistinct(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
// Decimal64ColumnVector is a LongColumnVector
case DECIMAL_64:
case LONG:
evaluator = new VectorPTFEvaluatorLongCountDistinct(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DOUBLE:
evaluator = new VectorPTFEvaluatorDoubleCountDistinct(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case DECIMAL:
evaluator = new VectorPTFEvaluatorDecimalCountDistinct(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case TIMESTAMP:
evaluator = new VectorPTFEvaluatorTimestampCountDistinct(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
default:
throw new RuntimeException("Unexpected column type for ptf count distinct: " + columnVectorType);
}
} else {
evaluator = new VectorPTFEvaluatorCount(windowFrameDef, inputVectorExpression, outputColumnNum);
}
}
break;
case LAG:
// lag(column, constant, ...)
int amt = inputVectorExpressions.length > 1 ? (int) ((ConstantVectorExpression) inputVectorExpressions[1]).getLongValue() : 1;
// lag(column, constant, constant/column)
VectorExpression defaultValueExpression = inputVectorExpressions.length > 2 ? inputVectorExpressions[2] : null;
switch(columnVectorType) {
case LONG:
case DOUBLE:
case DECIMAL:
evaluator = new VectorPTFEvaluatorLag(windowFrameDef, inputVectorExpression, outputColumnNum, columnVectorType, amt, defaultValueExpression);
break;
default:
throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
}
break;
case LEAD:
// lead(column, constant, ...)
amt = inputVectorExpressions.length > 1 ? (int) ((ConstantVectorExpression) inputVectorExpressions[1]).getLongValue() : 1;
// lead(column, constant, constant/column)
defaultValueExpression = inputVectorExpressions.length > 2 ? inputVectorExpressions[2] : null;
switch(columnVectorType) {
case LONG:
case DOUBLE:
case DECIMAL:
evaluator = new VectorPTFEvaluatorLead(windowFrameDef, inputVectorExpression, outputColumnNum, columnVectorType, amt, defaultValueExpression);
break;
default:
throw new RuntimeException("Unexpected column vector type " + columnVectorType + " for " + functionType);
}
break;
default:
throw new RuntimeException("Unexpected function type " + functionType);
}
return evaluator;
}
Aggregations