use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.
the class VectorUDFTimestampFieldTimestamp method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
Preconditions.checkState(((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory() == PrimitiveCategory.TIMESTAMP);
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
ColumnVector inputColVec = batch.cols[this.colNum];
/* every line below this is identical for evaluateLong & evaluateString */
final int n = inputColVec.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
if (batch.size == 0) {
/* n != batch.size when isRepeating */
return;
}
// We do not need to do a column reset since we are carefully changing the output.
outV.isRepeating = false;
TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVec;
if (inputColVec.isRepeating) {
if (inputColVec.noNulls || !inputColVec.isNull[0]) {
outV.isNull[0] = false;
outV.vector[0] = getTimestampField(timestampColVector, 0);
} else {
outV.isNull[0] = true;
outV.noNulls = false;
}
outV.isRepeating = true;
return;
}
if (inputColVec.noNulls) {
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outV.isNull[i] = false;
outV.vector[i] = getTimestampField(timestampColVector, i);
}
} else {
Arrays.fill(outV.isNull, 0, n, false);
for (int i = 0; i < n; i++) {
outV.vector[i] = getTimestampField(timestampColVector, i);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs...
outV.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outV.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outV.vector[i] = getTimestampField(timestampColVector, i);
}
}
} else {
for (int i = 0; i < n; i++) {
outV.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outV.vector[i] = getTimestampField(timestampColVector, i);
}
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.
the class VectorUDAFCountMerge method aggregateInputSelection.
@Override
public void aggregateInputSelection(VectorAggregationBufferRow[] aggregationBufferSets, int aggregateIndex, VectorizedRowBatch batch) throws HiveException {
int batchSize = batch.size;
if (batchSize == 0) {
return;
}
inputExpression.evaluate(batch);
LongColumnVector inputVector = (LongColumnVector) batch.cols[this.inputExpression.getOutputColumnNum()];
long[] vector = inputVector.vector;
if (inputVector.noNulls) {
if (inputVector.isRepeating) {
iterateNoNullsRepeatingWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector[0], batchSize);
} else {
if (batch.selectedInUse) {
iterateNoNullsSelectionWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector, batch.selected, batchSize);
} else {
iterateNoNullsWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector, batchSize);
}
}
} else {
if (inputVector.isRepeating) {
if (batch.selectedInUse) {
iterateHasNullsRepeatingSelectionWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector[0], batchSize, batch.selected, inputVector.isNull);
} else {
iterateHasNullsRepeatingWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector[0], batchSize, inputVector.isNull);
}
} else {
if (batch.selectedInUse) {
iterateHasNullsSelectionWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector, batchSize, batch.selected, inputVector.isNull);
} else {
iterateHasNullsWithAggregationSelection(aggregationBufferSets, aggregateIndex, vector, batchSize, inputVector.isNull);
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.
the class VectorUDAFCountMerge method assignRowColumn.
@Override
public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int columnNum, AggregationBuffer agg) throws HiveException {
LongColumnVector outputColVector = (LongColumnVector) batch.cols[columnNum];
Aggregation myagg = (Aggregation) agg;
outputColVector.isNull[batchIndex] = false;
outputColVector.vector[batchIndex] = myagg.value;
}
use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.
the class VectorUDAFCountStar method assignRowColumn.
@Override
public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int columnNum, AggregationBuffer agg) throws HiveException {
LongColumnVector outputColVector = (LongColumnVector) batch.cols[columnNum];
Aggregation myagg = (Aggregation) agg;
outputColVector.isNull[batchIndex] = false;
outputColVector.vector[batchIndex] = myagg.count;
}
use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.
the class VectorMapJoinInnerBigOnlyLongOperator method process.
// ---------------------------------------------------------------------------
// Process Single-Column Long Inner Big-Only Join on a vectorized row batch.
//
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
alias = (byte) tag;
if (needCommonSetup) {
// Our one time process method initialization.
commonSetup(batch);
/*
* Initialize Single-Column Long members for this specialized class.
*/
singleJoinColumn = bigTableKeyColumnMap[0];
needCommonSetup = false;
}
if (needHashTableSetup) {
// Setup our hash table specialization. It will be the first time the process
// method is called, or after a Hybrid Grace reload.
/*
* Get our Single-Column Long hash multi-set information for this specialized class.
*/
hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable;
useMinMax = hashMultiSet.useMinMax();
if (useMinMax) {
min = hashMultiSet.min();
max = hashMultiSet.max();
}
needHashTableSetup = false;
}
batchCounter++;
// For inner joins, we may apply the filter(s) now.
for (VectorExpression ve : bigTableFilterExpressions) {
ve.evaluate(batch);
}
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
ve.evaluate(batch);
}
}
/*
* Single-Column Long specific declarations.
*/
// The one join column for this specialized class.
LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn];
long[] vector = joinColVector.vector;
/*
* Single-Column Long check for repeating.
*/
// Check single column for repeating.
boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
if (allKeyInputColumnsRepeating) {
/*
* Repeating.
*/
// All key input columns are repeating. Generate key once. Lookup once.
// Since the key is repeated, we must use entry 0 regardless of selectedInUse.
/*
* Single-Column Long specific repeated lookup.
*/
JoinUtil.JoinResult joinResult;
if (!joinColVector.noNulls && joinColVector.isNull[0]) {
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
long key = vector[0];
if (useMinMax && (key < min || key > max)) {
// Out of range for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
joinResult = hashMultiSet.contains(key, hashMultiSetResults[0]);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
} else {
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
}
// We remember any matching rows in matchs / matchSize. At the end of the loop,
// selected / batch.size will represent both matching and non-matching rows for outer join.
// Only deferred rows will have been removed from selected.
int[] selected = batch.selected;
boolean selectedInUse = batch.selectedInUse;
int hashMultiSetResultCount = 0;
int allMatchCount = 0;
int equalKeySeriesCount = 0;
int spillCount = 0;
/*
* Single-Column Long specific variables.
*/
long saveKey = 0;
// We optimize performance by only looking up the first key in a series of equal keys.
boolean haveSaveKey = false;
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
for (int logical = 0; logical < inputLogicalSize; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
/*
* Single-Column Long get key.
*/
long currentKey;
boolean isNull;
if (!joinColVector.noNulls && joinColVector.isNull[batchIndex]) {
currentKey = 0;
isNull = true;
} else {
currentKey = vector[batchIndex];
isNull = false;
}
if (isNull || !haveSaveKey || currentKey != saveKey) {
if (haveSaveKey) {
// Move on with our counts.
switch(saveJoinResult) {
case MATCH:
// We have extracted the count from the hash multi-set result, so we don't keep it.
equalKeySeriesCount++;
break;
case SPILL:
// We keep the hash multi-set result for its spill information.
hashMultiSetResultCount++;
break;
case NOMATCH:
break;
}
}
if (isNull) {
saveJoinResult = JoinUtil.JoinResult.NOMATCH;
haveSaveKey = false;
} else {
// Regardless of our matching result, we keep that information to make multiple use
// of it for a possible series of equal keys.
haveSaveKey = true;
/*
* Single-Column Long specific save key.
*/
saveKey = currentKey;
if (useMinMax && (currentKey < min || currentKey > max)) {
// Key out of range for whole hash table.
saveJoinResult = JoinUtil.JoinResult.NOMATCH;
} else {
saveJoinResult = hashMultiSet.contains(currentKey, hashMultiSetResults[hashMultiSetResultCount]);
}
}
switch(saveJoinResult) {
case MATCH:
equalKeySeriesValueCounts[equalKeySeriesCount] = hashMultiSetResults[hashMultiSetResultCount].count();
equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMultiSetResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
break;
}
} else {
switch(saveJoinResult) {
case MATCH:
equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMultiSetResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
break;
}
}
}
if (haveSaveKey) {
// Update our counts for the last key.
switch(saveJoinResult) {
case MATCH:
// We have extracted the count from the hash multi-set result, so we don't keep it.
equalKeySeriesCount++;
break;
case SPILL:
// We keep the hash multi-set result for its spill information.
hashMultiSetResultCount++;
break;
case NOMATCH:
break;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesValueCounts " + longArrayToRangesString(equalKeySeriesValueCounts, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesDuplicateCounts " + intArrayToRangesString(equalKeySeriesDuplicateCounts, equalKeySeriesCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount)));
}
finishInnerBigOnly(batch, allMatchCount, equalKeySeriesCount, spillCount, (VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount);
}
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
}
} catch (IOException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
Aggregations