use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinOuterGenerateResultOperator method doValueExpr.
/**
* Apply the value expression to rows specified by a selected array.
*
* @param batch
* The vectorized row batch.
* @param selected
* The (physical) batch indices to apply the expression to.
* @param size
* The size of selected.
*/
private void doValueExpr(VectorizedRowBatch batch, int[] selected, int size) {
int saveBatchSize = batch.size;
int[] saveSelected = batch.selected;
boolean saveSelectedInUse = batch.selectedInUse;
batch.size = size;
batch.selected = selected;
batch.selectedInUse = true;
if (bigTableValueExpressions != null) {
for (VectorExpression ve : bigTableValueExpressions) {
ve.evaluate(batch);
}
}
batch.size = saveBatchSize;
batch.selected = saveSelected;
batch.selectedInUse = saveSelectedInUse;
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinOuterLongOperator method process.
// ---------------------------------------------------------------------------
// Process Single-Column Long Outer Join on a vectorized row batch.
//
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
alias = (byte) tag;
if (needCommonSetup) {
// Our one time process method initialization.
commonSetup(batch);
/*
* Initialize Single-Column Long members for this specialized class.
*/
singleJoinColumn = bigTableKeyColumnMap[0];
needCommonSetup = false;
}
if (needHashTableSetup) {
// Setup our hash table specialization. It will be the first time the process
// method is called, or after a Hybrid Grace reload.
/*
* Get our Single-Column Long hash map information for this specialized class.
*/
hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable;
useMinMax = hashMap.useMinMax();
if (useMinMax) {
min = hashMap.min();
max = hashMap.max();
}
needHashTableSetup = false;
}
batchCounter++;
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
// Do the per-batch setup for an outer join.
outerPerBatchSetup(batch);
// For outer join, remember our input rows before ON expression filtering or before
// hash table matching so we can generate results for all rows (matching and non matching)
// later.
boolean inputSelectedInUse = batch.selectedInUse;
if (inputSelectedInUse) {
// if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
// throw new HiveException("batch.selected is not in sort order and unique");
// }
System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
}
// Filtering for outer join just removes rows available for hash table matching.
boolean someRowsFilteredOut = false;
if (bigTableFilterExpressions.length > 0) {
// Since the input
for (VectorExpression ve : bigTableFilterExpressions) {
ve.evaluate(batch);
}
someRowsFilteredOut = (batch.size != inputLogicalSize);
if (LOG.isDebugEnabled()) {
if (batch.selectedInUse) {
if (inputSelectedInUse) {
LOG.debug(CLASS_NAME + " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
} else {
LOG.debug(CLASS_NAME + " inputLogicalSize " + inputLogicalSize + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
}
}
}
}
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
ve.evaluate(batch);
}
}
/*
* Single-Column Long specific declarations.
*/
// The one join column for this specialized class.
LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn];
long[] vector = joinColVector.vector;
/*
* Single-Column Long check for repeating.
*/
// Check single column for repeating.
boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
if (allKeyInputColumnsRepeating) {
/*
* Repeating.
*/
// All key input columns are repeating. Generate key once. Lookup once.
// Since the key is repeated, we must use entry 0 regardless of selectedInUse.
/*
* Single-Column Long specific repeated lookup.
*/
JoinUtil.JoinResult joinResult;
if (batch.size == 0) {
// Whole repeated key batch was filtered out.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
// Any (repeated) null key column is no match for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
// Handle *repeated* join key, if found.
long key = vector[0];
// LOG.debug(CLASS_NAME + " repeated key " + key);
if (useMinMax && (key < min || key > max)) {
// Out of range for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
joinResult = hashMap.lookup(key, hashMapResults[0]);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
}
int[] selected = batch.selected;
boolean selectedInUse = batch.selectedInUse;
int hashMapResultCount = 0;
int allMatchCount = 0;
int equalKeySeriesCount = 0;
int spillCount = 0;
boolean atLeastOneNonMatch = someRowsFilteredOut;
/*
* Single-Column Long specific variables.
*/
long saveKey = 0;
// We optimize performance by only looking up the first key in a series of equal keys.
boolean haveSaveKey = false;
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
for (int logical = 0; logical < batch.size; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
/*
* Single-Column Long outer null detection.
*/
boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex];
if (isNull) {
// Have that the NULL does not interfere with the current equal key series, if there
// is one. We do not set saveJoinResult.
//
// Let a current MATCH equal key series keep going, or
// Let a current SPILL equal key series keep going, or
// Let a current NOMATCH keep not matching.
atLeastOneNonMatch = true;
// LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
} else {
/*
* Single-Column Long outer get key.
*/
long currentKey = vector[batchIndex];
if (!haveSaveKey || currentKey != saveKey) {
if (haveSaveKey) {
// Move on with our counts.
switch(saveJoinResult) {
case MATCH:
hashMapResultCount++;
equalKeySeriesCount++;
break;
case SPILL:
hashMapResultCount++;
break;
case NOMATCH:
break;
}
}
// Regardless of our matching result, we keep that information to make multiple use
// of it for a possible series of equal keys.
haveSaveKey = true;
/*
* Single-Column Long specific save key.
*/
saveKey = currentKey;
if (useMinMax && (currentKey < min || currentKey > max)) {
// Key out of range for whole hash table.
saveJoinResult = JoinUtil.JoinResult.NOMATCH;
} else {
saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]);
}
switch(saveJoinResult) {
case MATCH:
equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMapResultCount;
spillCount++;
break;
case NOMATCH:
atLeastOneNonMatch = true;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
break;
}
} else {
switch(saveJoinResult) {
case MATCH:
equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMapResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
break;
}
}
// if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
// throw new HiveException("allMatchs is not in sort order and unique");
// }
}
}
if (haveSaveKey) {
// Update our counts for the last key.
switch(saveJoinResult) {
case MATCH:
hashMapResultCount++;
equalKeySeriesCount++;
break;
case SPILL:
hashMapResultCount++;
break;
case NOMATCH:
break;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " atLeastOneNonMatch " + atLeastOneNonMatch + " inputSelectedInUse " + inputSelectedInUse + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
// We will generate results for all matching and non-matching rows.
finishOuter(batch, allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, inputSelectedInUse, inputLogicalSize, spillCount, hashMapResultCount);
}
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
}
} catch (IOException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinOuterMultiKeyOperator method process.
// ---------------------------------------------------------------------------
// Process Multi-Key Outer Join on a vectorized row batch.
//
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
alias = (byte) tag;
if (needCommonSetup) {
// Our one time process method initialization.
commonSetup(batch);
/*
* Initialize Multi-Key members for this specialized class.
*/
keyVectorSerializeWrite = new VectorSerializeRow(new BinarySortableSerializeWrite(bigTableKeyColumnMap.length));
keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap);
currentKeyOutput = new Output();
saveKeyOutput = new Output();
needCommonSetup = false;
}
if (needHashTableSetup) {
// Setup our hash table specialization. It will be the first time the process
// method is called, or after a Hybrid Grace reload.
/*
* Get our Multi-Key hash map information for this specialized class.
*/
hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable;
needHashTableSetup = false;
}
batchCounter++;
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
// Do the per-batch setup for an outer join.
outerPerBatchSetup(batch);
// For outer join, remember our input rows before ON expression filtering or before
// hash table matching so we can generate results for all rows (matching and non matching)
// later.
boolean inputSelectedInUse = batch.selectedInUse;
if (inputSelectedInUse) {
// if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
// throw new HiveException("batch.selected is not in sort order and unique");
// }
System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
}
// Filtering for outer join just removes rows available for hash table matching.
boolean someRowsFilteredOut = false;
if (bigTableFilterExpressions.length > 0) {
// Since the input
for (VectorExpression ve : bigTableFilterExpressions) {
ve.evaluate(batch);
}
someRowsFilteredOut = (batch.size != inputLogicalSize);
if (LOG.isDebugEnabled()) {
if (batch.selectedInUse) {
if (inputSelectedInUse) {
LOG.debug(CLASS_NAME + " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
} else {
LOG.debug(CLASS_NAME + " inputLogicalSize " + inputLogicalSize + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
}
}
}
}
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
ve.evaluate(batch);
}
}
/*
* Multi-Key specific declarations.
*/
// None.
/*
* Multi-Key Long check for repeating.
*/
// If all BigTable input columns to key expressions are isRepeating, then
// calculate key once; lookup once.
// Also determine if any nulls are present since for a join that means no match.
boolean allKeyInputColumnsRepeating;
// Only valid if allKeyInputColumnsRepeating is true.
boolean someKeyInputColumnIsNull = false;
if (bigTableKeyColumnMap.length == 0) {
allKeyInputColumnsRepeating = false;
} else {
allKeyInputColumnsRepeating = true;
for (int i = 0; i < bigTableKeyColumnMap.length; i++) {
ColumnVector colVector = batch.cols[bigTableKeyColumnMap[i]];
if (!colVector.isRepeating) {
allKeyInputColumnsRepeating = false;
break;
}
if (!colVector.noNulls && colVector.isNull[0]) {
someKeyInputColumnIsNull = true;
}
}
}
if (allKeyInputColumnsRepeating) {
/*
* Repeating.
*/
// All key input columns are repeating. Generate key once. Lookup once.
// Since the key is repeated, we must use entry 0 regardless of selectedInUse.
/*
* Multi-Key specific repeated lookup.
*/
JoinUtil.JoinResult joinResult;
if (batch.size == 0) {
// Whole repeated key batch was filtered out.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else if (someKeyInputColumnIsNull) {
// Any (repeated) null key column is no match for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
// All key input columns are repeating. Generate key once. Lookup once.
keyVectorSerializeWrite.setOutput(currentKeyOutput);
keyVectorSerializeWrite.serializeWrite(batch, 0);
byte[] keyBytes = currentKeyOutput.getData();
int keyLength = currentKeyOutput.getLength();
joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0]);
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
}
int[] selected = batch.selected;
boolean selectedInUse = batch.selectedInUse;
int hashMapResultCount = 0;
int allMatchCount = 0;
int equalKeySeriesCount = 0;
int spillCount = 0;
boolean atLeastOneNonMatch = someRowsFilteredOut;
/*
* Multi-Key specific variables.
*/
Output temp;
// We optimize performance by only looking up the first key in a series of equal keys.
boolean haveSaveKey = false;
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
for (int logical = 0; logical < batch.size; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
/*
* Multi-Key outer null detection.
*/
// Generate binary sortable key for current row in vectorized row batch.
keyVectorSerializeWrite.setOutput(currentKeyOutput);
keyVectorSerializeWrite.serializeWrite(batch, batchIndex);
if (keyVectorSerializeWrite.getHasAnyNulls()) {
// Have that the NULL does not interfere with the current equal key series, if there
// is one. We do not set saveJoinResult.
//
// Let a current MATCH equal key series keep going, or
// Let a current SPILL equal key series keep going, or
// Let a current NOMATCH keep not matching.
atLeastOneNonMatch = true;
// LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
} else {
if (!haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) {
if (haveSaveKey) {
// Move on with our counts.
switch(saveJoinResult) {
case MATCH:
hashMapResultCount++;
equalKeySeriesCount++;
break;
case SPILL:
hashMapResultCount++;
break;
case NOMATCH:
break;
}
}
// Regardless of our matching result, we keep that information to make multiple use
// of it for a possible series of equal keys.
haveSaveKey = true;
/*
* Multi-Key specific save key.
*/
temp = saveKeyOutput;
saveKeyOutput = currentKeyOutput;
currentKeyOutput = temp;
/*
* Multi-Key specific lookup key.
*/
byte[] keyBytes = saveKeyOutput.getData();
int keyLength = saveKeyOutput.getLength();
saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]);
switch(saveJoinResult) {
case MATCH:
equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMapResultCount;
spillCount++;
break;
case NOMATCH:
atLeastOneNonMatch = true;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
break;
}
} else {
switch(saveJoinResult) {
case MATCH:
equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMapResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
break;
}
}
// if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
// throw new HiveException("allMatchs is not in sort order and unique");
// }
}
}
if (haveSaveKey) {
// Update our counts for the last key.
switch(saveJoinResult) {
case MATCH:
hashMapResultCount++;
equalKeySeriesCount++;
break;
case SPILL:
hashMapResultCount++;
break;
case NOMATCH:
break;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " atLeastOneNonMatch " + atLeastOneNonMatch + " inputSelectedInUse " + inputSelectedInUse + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
// We will generate results for all matching and non-matching rows.
finishOuter(batch, allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, inputSelectedInUse, inputLogicalSize, spillCount, hashMapResultCount);
}
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
}
} catch (IOException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorMapJoinOuterStringOperator method process.
// ---------------------------------------------------------------------------
// Process Single-Column String Outer Join on a vectorized row batch.
//
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
alias = (byte) tag;
if (needCommonSetup) {
// Our one time process method initialization.
commonSetup(batch);
/*
* Initialize Single-Column String members for this specialized class.
*/
singleJoinColumn = bigTableKeyColumnMap[0];
needCommonSetup = false;
}
if (needHashTableSetup) {
// Setup our hash table specialization. It will be the first time the process
// method is called, or after a Hybrid Grace reload.
/*
* Get our Single-Column String hash map information for this specialized class.
*/
hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable;
needHashTableSetup = false;
}
batchCounter++;
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
// Do the per-batch setup for an outer join.
outerPerBatchSetup(batch);
// For outer join, remember our input rows before ON expression filtering or before
// hash table matching so we can generate results for all rows (matching and non matching)
// later.
boolean inputSelectedInUse = batch.selectedInUse;
if (inputSelectedInUse) {
// if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
// throw new HiveException("batch.selected is not in sort order and unique");
// }
System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
}
// Filtering for outer join just removes rows available for hash table matching.
boolean someRowsFilteredOut = false;
if (bigTableFilterExpressions.length > 0) {
// Since the input
for (VectorExpression ve : bigTableFilterExpressions) {
ve.evaluate(batch);
}
someRowsFilteredOut = (batch.size != inputLogicalSize);
if (LOG.isDebugEnabled()) {
if (batch.selectedInUse) {
if (inputSelectedInUse) {
LOG.debug(CLASS_NAME + " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
} else {
LOG.debug(CLASS_NAME + " inputLogicalSize " + inputLogicalSize + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
}
}
}
}
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
ve.evaluate(batch);
}
}
/*
* Single-Column String specific declarations.
*/
// The one join column for this specialized class.
BytesColumnVector joinColVector = (BytesColumnVector) batch.cols[singleJoinColumn];
byte[][] vector = joinColVector.vector;
int[] start = joinColVector.start;
int[] length = joinColVector.length;
/*
* Single-Column String check for repeating.
*/
// Check single column for repeating.
boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
if (allKeyInputColumnsRepeating) {
/*
* Repeating.
*/
// All key input columns are repeating. Generate key once. Lookup once.
// Since the key is repeated, we must use entry 0 regardless of selectedInUse.
/*
* Single-Column String specific repeated lookup.
*/
JoinUtil.JoinResult joinResult;
if (batch.size == 0) {
// Whole repeated key batch was filtered out.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else if (!joinColVector.noNulls && joinColVector.isNull[0]) {
// Any (repeated) null key column is no match for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
// Handle *repeated* join key, if found.
byte[] keyBytes = vector[0];
int keyStart = start[0];
int keyLength = length[0];
joinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[0]);
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
}
int[] selected = batch.selected;
boolean selectedInUse = batch.selectedInUse;
int hashMapResultCount = 0;
int allMatchCount = 0;
int equalKeySeriesCount = 0;
int spillCount = 0;
boolean atLeastOneNonMatch = someRowsFilteredOut;
/*
* Single-Column String specific variables.
*/
int saveKeyBatchIndex = -1;
// We optimize performance by only looking up the first key in a series of equal keys.
boolean haveSaveKey = false;
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
for (int logical = 0; logical < batch.size; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
/*
* Single-Column String outer null detection.
*/
boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex];
if (isNull) {
// Have that the NULL does not interfere with the current equal key series, if there
// is one. We do not set saveJoinResult.
//
// Let a current MATCH equal key series keep going, or
// Let a current SPILL equal key series keep going, or
// Let a current NOMATCH keep not matching.
atLeastOneNonMatch = true;
// LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
} else {
if (!haveSaveKey || StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex], vector[batchIndex], start[batchIndex], length[batchIndex]) == false) {
if (haveSaveKey) {
// Move on with our counts.
switch(saveJoinResult) {
case MATCH:
hashMapResultCount++;
equalKeySeriesCount++;
break;
case SPILL:
hashMapResultCount++;
break;
case NOMATCH:
break;
}
}
// Regardless of our matching result, we keep that information to make multiple use
// of it for a possible series of equal keys.
haveSaveKey = true;
/*
* Single-Column String specific save key.
*/
saveKeyBatchIndex = batchIndex;
/*
* Single-Column Long specific lookup key.
*/
byte[] keyBytes = vector[batchIndex];
int keyStart = start[batchIndex];
int keyLength = length[batchIndex];
saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]);
switch(saveJoinResult) {
case MATCH:
equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMapResultCount;
spillCount++;
break;
case NOMATCH:
atLeastOneNonMatch = true;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
break;
}
} else {
switch(saveJoinResult) {
case MATCH:
equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMapResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
break;
}
}
// if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
// throw new HiveException("allMatchs is not in sort order and unique");
// }
}
}
if (haveSaveKey) {
// Update our counts for the last key.
switch(saveJoinResult) {
case MATCH:
hashMapResultCount++;
equalKeySeriesCount++;
break;
case SPILL:
hashMapResultCount++;
break;
case NOMATCH:
break;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " atLeastOneNonMatch " + atLeastOneNonMatch + " inputSelectedInUse " + inputSelectedInUse + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
}
// We will generate results for all matching and non-matching rows.
finishOuter(batch, allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, inputSelectedInUse, inputLogicalSize, spillCount, hashMapResultCount);
}
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
}
} catch (IOException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.
the class VectorGroupKeyHelper method init.
void init(VectorExpression[] keyExpressions) throws HiveException {
// NOTE: To support pruning the grouping set id dummy key by VectorGroupbyOpeator MERGE_PARTIAL
// case, we use the keyCount passed to the constructor and not keyExpressions.length.
// Inspect the output type of each key expression. And, remember the output columns.
inputColumnNums = new int[keyCount];
for (int i = 0; i < keyCount; ++i) {
VectorExpression keyExpression = keyExpressions[i];
TypeInfo typeInfo = keyExpression.getOutputTypeInfo();
addKey(typeInfo);
// The output of the key expression is the input column.
final int inputColumnNum = keyExpression.getOutputColumnNum();
inputColumnNums[i] = inputColumnNum;
}
finishAdding();
}
Aggregations