use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.
the class StructColumnInList method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
final int logicalSize = batch.size;
if (logicalSize == 0) {
return;
}
if (buffer == null) {
buffer = new Output();
binarySortableSerializeWrite = new BinarySortableSerializeWrite(structColumnMap.length);
}
for (VectorExpression ve : structExpressions) {
ve.evaluate(batch);
}
BytesColumnVector scratchBytesColumnVector = (BytesColumnVector) batch.cols[scratchBytesColumn];
try {
boolean selectedInUse = batch.selectedInUse;
int[] selected = batch.selected;
for (int logical = 0; logical < logicalSize; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
binarySortableSerializeWrite.set(buffer);
for (int f = 0; f < structColumnMap.length; f++) {
int fieldColumn = structColumnMap[f];
ColumnVector colVec = batch.cols[fieldColumn];
int adjustedIndex = (colVec.isRepeating ? 0 : batchIndex);
if (colVec.noNulls || !colVec.isNull[adjustedIndex]) {
switch(fieldVectorColumnTypes[f]) {
case BYTES:
{
BytesColumnVector bytesColVec = (BytesColumnVector) colVec;
byte[] bytes = bytesColVec.vector[adjustedIndex];
int start = bytesColVec.start[adjustedIndex];
int length = bytesColVec.length[adjustedIndex];
binarySortableSerializeWrite.writeString(bytes, start, length);
}
break;
case LONG:
binarySortableSerializeWrite.writeLong(((LongColumnVector) colVec).vector[adjustedIndex]);
break;
case DOUBLE:
binarySortableSerializeWrite.writeDouble(((DoubleColumnVector) colVec).vector[adjustedIndex]);
break;
case DECIMAL:
DecimalColumnVector decColVector = ((DecimalColumnVector) colVec);
binarySortableSerializeWrite.writeHiveDecimal(decColVector.vector[adjustedIndex], decColVector.scale);
break;
default:
throw new RuntimeException("Unexpected vector column type " + fieldVectorColumnTypes[f].name());
}
} else {
binarySortableSerializeWrite.writeNull();
}
}
scratchBytesColumnVector.setVal(batchIndex, buffer.getData(), 0, buffer.getLength());
}
// Now, take the serialized keys we just wrote into our scratch column and look them
// up in the IN list.
super.evaluate(batch);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.
the class VectorElt method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
int[] sel = batch.selected;
int n = batch.size;
BytesColumnVector outputVector = (BytesColumnVector) batch.cols[outputColumn];
if (n <= 0) {
return;
}
outputVector.init();
outputVector.noNulls = false;
outputVector.isRepeating = false;
LongColumnVector inputIndexVector = (LongColumnVector) batch.cols[inputColumns[0]];
long[] indexVector = inputIndexVector.vector;
if (inputIndexVector.isRepeating) {
int index = (int) indexVector[0];
if (index > 0 && index < inputColumns.length) {
BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]];
if (cv.isRepeating) {
outputVector.setElement(0, 0, cv);
outputVector.isRepeating = true;
} else if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector.setVal(i, cv.vector[0], cv.start[0], cv.length[0]);
}
} else {
for (int i = 0; i != n; i++) {
outputVector.setVal(i, cv.vector[0], cv.start[0], cv.length[0]);
}
}
} else {
outputVector.isNull[0] = true;
outputVector.isRepeating = true;
}
} else if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
int index = (int) indexVector[i];
if (index > 0 && index < inputColumns.length) {
BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]];
int cvi = cv.isRepeating ? 0 : i;
outputVector.setVal(i, cv.vector[cvi], cv.start[cvi], cv.length[cvi]);
} else {
outputVector.isNull[i] = true;
}
}
} else {
for (int i = 0; i != n; i++) {
int index = (int) indexVector[i];
if (index > 0 && index < inputColumns.length) {
BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]];
int cvi = cv.isRepeating ? 0 : i;
outputVector.setVal(i, cv.vector[cvi], cv.start[cvi], cv.length[cvi]);
} else {
outputVector.isNull[i] = true;
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.
the class VectorMapJoinInnerBigOnlyLongOperator method process.
//---------------------------------------------------------------------------
// Process Single-Column Long Inner Big-Only Join on a vectorized row batch.
//
@Override
public void process(Object row, int tag) throws HiveException {
try {
VectorizedRowBatch batch = (VectorizedRowBatch) row;
alias = (byte) tag;
if (needCommonSetup) {
// Our one time process method initialization.
commonSetup(batch);
/*
* Initialize Single-Column Long members for this specialized class.
*/
singleJoinColumn = bigTableKeyColumnMap[0];
needCommonSetup = false;
}
if (needHashTableSetup) {
// Setup our hash table specialization. It will be the first time the process
// method is called, or after a Hybrid Grace reload.
/*
* Get our Single-Column Long hash multi-set information for this specialized class.
*/
hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable;
useMinMax = hashMultiSet.useMinMax();
if (useMinMax) {
min = hashMultiSet.min();
max = hashMultiSet.max();
}
needHashTableSetup = false;
}
batchCounter++;
// For inner joins, we may apply the filter(s) now.
for (VectorExpression ve : bigTableFilterExpressions) {
ve.evaluate(batch);
}
final int inputLogicalSize = batch.size;
if (inputLogicalSize == 0) {
if (isLogDebugEnabled) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
}
return;
}
// Perform any key expressions. Results will go into scratch columns.
if (bigTableKeyExpressions != null) {
for (VectorExpression ve : bigTableKeyExpressions) {
ve.evaluate(batch);
}
}
/*
* Single-Column Long specific declarations.
*/
// The one join column for this specialized class.
LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn];
long[] vector = joinColVector.vector;
/*
* Single-Column Long check for repeating.
*/
// Check single column for repeating.
boolean allKeyInputColumnsRepeating = joinColVector.isRepeating;
if (allKeyInputColumnsRepeating) {
/*
* Repeating.
*/
// All key input columns are repeating. Generate key once. Lookup once.
// Since the key is repeated, we must use entry 0 regardless of selectedInUse.
/*
* Single-Column Long specific repeated lookup.
*/
JoinUtil.JoinResult joinResult;
if (!joinColVector.noNulls && joinColVector.isNull[0]) {
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
long key = vector[0];
if (useMinMax && (key < min || key > max)) {
// Out of range for whole batch.
joinResult = JoinUtil.JoinResult.NOMATCH;
} else {
joinResult = hashMultiSet.contains(key, hashMultiSetResults[0]);
}
}
if (isLogDebugEnabled) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name());
}
finishInnerBigOnlyRepeated(batch, joinResult, hashMultiSetResults[0]);
} else {
if (isLogDebugEnabled) {
LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
}
// We remember any matching rows in matchs / matchSize. At the end of the loop,
// selected / batch.size will represent both matching and non-matching rows for outer join.
// Only deferred rows will have been removed from selected.
int[] selected = batch.selected;
boolean selectedInUse = batch.selectedInUse;
int hashMultiSetResultCount = 0;
int allMatchCount = 0;
int equalKeySeriesCount = 0;
int spillCount = 0;
/*
* Single-Column Long specific variables.
*/
long saveKey = 0;
// We optimize performance by only looking up the first key in a series of equal keys.
boolean haveSaveKey = false;
JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
// Logical loop over the rows in the batch since the batch may have selected in use.
for (int logical = 0; logical < inputLogicalSize; logical++) {
int batchIndex = (selectedInUse ? selected[logical] : logical);
/*
* Single-Column Long get key.
*/
long currentKey;
boolean isNull;
if (!joinColVector.noNulls && joinColVector.isNull[batchIndex]) {
currentKey = 0;
isNull = true;
} else {
currentKey = vector[batchIndex];
isNull = false;
}
if (isNull || !haveSaveKey || currentKey != saveKey) {
if (haveSaveKey) {
// Move on with our counts.
switch(saveJoinResult) {
case MATCH:
// We have extracted the count from the hash multi-set result, so we don't keep it.
equalKeySeriesCount++;
break;
case SPILL:
// We keep the hash multi-set result for its spill information.
hashMultiSetResultCount++;
break;
case NOMATCH:
break;
}
}
if (isNull) {
saveJoinResult = JoinUtil.JoinResult.NOMATCH;
haveSaveKey = false;
} else {
// Regardless of our matching result, we keep that information to make multiple use
// of it for a possible series of equal keys.
haveSaveKey = true;
/*
* Single-Column Long specific save key.
*/
saveKey = currentKey;
if (useMinMax && (currentKey < min || currentKey > max)) {
// Key out of range for whole hash table.
saveJoinResult = JoinUtil.JoinResult.NOMATCH;
} else {
saveJoinResult = hashMultiSet.contains(currentKey, hashMultiSetResults[hashMultiSetResultCount]);
}
}
switch(saveJoinResult) {
case MATCH:
equalKeySeriesValueCounts[equalKeySeriesCount] = hashMultiSetResults[hashMultiSetResultCount].count();
equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMultiSetResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
break;
}
} else {
switch(saveJoinResult) {
case MATCH:
equalKeySeriesDuplicateCounts[equalKeySeriesCount]++;
allMatchs[allMatchCount++] = batchIndex;
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
break;
case SPILL:
spills[spillCount] = batchIndex;
spillHashMapResultIndices[spillCount] = hashMultiSetResultCount;
spillCount++;
break;
case NOMATCH:
// VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
break;
}
}
}
if (haveSaveKey) {
// Update our counts for the last key.
switch(saveJoinResult) {
case MATCH:
// We have extracted the count from the hash multi-set result, so we don't keep it.
equalKeySeriesCount++;
break;
case SPILL:
// We keep the hash multi-set result for its spill information.
hashMultiSetResultCount++;
break;
case NOMATCH:
break;
}
}
if (isLogDebugEnabled) {
LOG.debug(CLASS_NAME + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesValueCounts " + longArrayToRangesString(equalKeySeriesValueCounts, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesDuplicateCounts " + intArrayToRangesString(equalKeySeriesDuplicateCounts, equalKeySeriesCount) + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMultiSetResults, 0, hashMultiSetResultCount)));
}
finishInnerBigOnly(batch, allMatchCount, equalKeySeriesCount, spillCount, (VectorMapJoinHashTableResult[]) hashMultiSetResults, hashMultiSetResultCount);
}
if (batch.size > 0) {
// Forward any remaining selected rows.
forwardBigTableBatch(batch);
}
} catch (IOException e) {
throw new HiveException(e);
} catch (Exception e) {
throw new HiveException(e);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.
the class VectorUDFTimestampFieldDate method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
Preconditions.checkState(inputTypes[0] == VectorExpression.Type.DATE);
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
ColumnVector inputColVec = batch.cols[this.colNum];
/* every line below this is identical for evaluateLong & evaluateString */
final int n = inputColVec.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
if (batch.size == 0) {
/* n != batch.size when isRepeating */
return;
}
/* true for all algebraic UDFs with no state */
outV.isRepeating = inputColVec.isRepeating;
LongColumnVector longColVector = (LongColumnVector) inputColVec;
if (inputColVec.noNulls) {
outV.noNulls = true;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outV.vector[i] = getDateField(longColVector.vector[i]);
}
} else {
for (int i = 0; i < n; i++) {
outV.vector[i] = getDateField(longColVector.vector[i]);
}
}
} else {
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
outV.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outV.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outV.vector[i] = getDateField(longColVector.vector[i]);
}
}
} else {
for (int i = 0; i < n; i++) {
outV.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outV.vector[i] = getDateField(longColVector.vector[i]);
}
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.LongColumnVector in project hive by apache.
the class VectorUDFTimestampFieldString method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn];
BytesColumnVector inputCol = (BytesColumnVector) batch.cols[this.colNum];
final int n = inputCol.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputCol.isRepeating == false) && batch.selectedInUse;
if (batch.size == 0) {
// n != batch.size when isRepeating
return;
}
// true for all algebraic UDFs with no state
outV.isRepeating = inputCol.isRepeating;
if (inputCol.noNulls) {
outV.noNulls = true;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
try {
outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
outV.isNull[i] = false;
} catch (ParseException e) {
outV.noNulls = false;
outV.isNull[i] = true;
}
}
} else {
for (int i = 0; i < n; i++) {
try {
outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
outV.isNull[i] = false;
} catch (ParseException e) {
outV.noNulls = false;
outV.isNull[i] = true;
}
}
}
} else {
// Handle case with nulls. Don't do function if the value is null, to save time,
// because calling the function can be expensive.
outV.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outV.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
try {
outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
} catch (ParseException e) {
outV.isNull[i] = true;
}
}
}
} else {
for (int i = 0; i < n; i++) {
outV.isNull[i] = inputCol.isNull[i];
if (!inputCol.isNull[i]) {
try {
outV.vector[i] = getField(inputCol.vector[i], inputCol.start[i], inputCol.length[i]);
} catch (ParseException e) {
outV.isNull[i] = true;
}
}
}
}
}
}
Aggregations