use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.
the class VectorUDFMapIndexBaseCol method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
ColumnVector outV = batch.cols[outputColumnNum];
MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum];
// indexColumnVector includes the keys of Map
indexColumnVector = batch.cols[indexColumnNum];
/*
* Do careful maintenance of the outputColVector.noNulls flag.
*/
int[] mapValueIndex;
if (mapV.isRepeating) {
if (mapV.isNull[0]) {
outV.isNull[0] = true;
outV.noNulls = false;
outV.isRepeating = true;
} else {
mapValueIndex = getMapValueIndex(mapV, batch);
if (indexColumnVector.isRepeating) {
// the key is not found in MapColumnVector, set the output as null ColumnVector
if (mapValueIndex[0] == -1) {
outV.isNull[0] = true;
outV.noNulls = false;
} else {
// the key is found in MapColumnVector, set the value
outV.isNull[0] = false;
outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values);
}
outV.isRepeating = true;
} else {
setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex);
}
}
} else {
mapValueIndex = getMapValueIndex(mapV, batch);
setUnRepeatingOutVector(batch, mapV, outV, mapValueIndex);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.
the class VectorUDFTimestampFieldDate method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
Preconditions.checkState(((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory() == PrimitiveCategory.DATE);
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
ColumnVector inputColVec = batch.cols[this.colNum];
/* every line below this is identical for evaluateLong & evaluateString */
final int n = inputColVec.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
boolean[] outputIsNull = outputColVector.isNull;
if (batch.size == 0) {
/* n != batch.size when isRepeating */
return;
}
// We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
LongColumnVector longColVector = (LongColumnVector) inputColVec;
if (inputColVec.isRepeating) {
if (inputColVec.noNulls || !inputColVec.isNull[0]) {
outputColVector.isNull[0] = false;
outputColVector.vector[0] = getDateField(longColVector.vector[0]);
} else {
outputColVector.isNull[0] = true;
outputColVector.noNulls = false;
}
outputColVector.isRepeating = true;
return;
}
if (inputColVec.noNulls) {
if (batch.selectedInUse) {
if (!outputColVector.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
// Set isNull before call in case it changes it mind.
outputIsNull[i] = false;
outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
}
} else {
if (!outputColVector.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outputColVector.noNulls = true;
}
for (int i = 0; i != n; i++) {
outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs...
outputColVector.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outputColVector.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
}
} else {
for (int i = 0; i < n; i++) {
outputColVector.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outputColVector.vector[i] = getDateField(longColVector.vector[i]);
}
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.
the class VectorUDFTimestampFieldTimestamp method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
Preconditions.checkState(((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory() == PrimitiveCategory.TIMESTAMP);
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum];
ColumnVector inputColVec = batch.cols[this.colNum];
/* every line below this is identical for evaluateLong & evaluateString */
final int n = inputColVec.isRepeating ? 1 : batch.size;
int[] sel = batch.selected;
final boolean selectedInUse = (inputColVec.isRepeating == false) && batch.selectedInUse;
if (batch.size == 0) {
/* n != batch.size when isRepeating */
return;
}
// We do not need to do a column reset since we are carefully changing the output.
outV.isRepeating = false;
TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVec;
if (inputColVec.isRepeating) {
if (inputColVec.noNulls || !inputColVec.isNull[0]) {
outV.isNull[0] = false;
outV.vector[0] = getTimestampField(timestampColVector, 0);
} else {
outV.isNull[0] = true;
outV.noNulls = false;
}
outV.isRepeating = true;
return;
}
if (inputColVec.noNulls) {
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outV.isNull[i] = false;
outV.vector[i] = getTimestampField(timestampColVector, i);
}
} else {
Arrays.fill(outV.isNull, 0, n, false);
for (int i = 0; i < n; i++) {
outV.vector[i] = getTimestampField(timestampColVector, i);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs...
outV.noNulls = false;
if (selectedInUse) {
for (int j = 0; j < n; j++) {
int i = sel[j];
outV.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outV.vector[i] = getTimestampField(timestampColVector, i);
}
}
} else {
for (int i = 0; i < n; i++) {
outV.isNull[i] = inputColVec.isNull[i];
if (!inputColVec.isNull[i]) {
outV.vector[i] = getTimestampField(timestampColVector, i);
}
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.
the class VectorUDAFCount method aggregateInput.
@Override
public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) throws HiveException {
inputExpression.evaluate(batch);
ColumnVector inputVector = batch.cols[this.inputExpression.getOutputColumnNum()];
int batchSize = batch.size;
if (batchSize == 0) {
return;
}
Aggregation myagg = (Aggregation) agg;
if (inputVector.isRepeating) {
if (inputVector.noNulls || !inputVector.isNull[0]) {
myagg.count += batchSize;
}
return;
}
if (inputVector.noNulls) {
myagg.count += batchSize;
return;
} else if (!batch.selectedInUse) {
iterateNoSelectionHasNulls(myagg, batchSize, inputVector.isNull);
} else {
iterateSelectionHasNulls(myagg, batchSize, inputVector.isNull, batch.selected);
}
}
use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector in project hive by apache.
the class VectorMapJoinGenerateResultOperator method generateHashMapResultLargeMultiValue.
/**
* Generate optimized results for a large N x M cross product using repeated vectorized row
* batch optimization.
*
* @param batch
* The big table batch.
* @param hashMapResult
* The hash map results for the matching key.
* @param allMatchs
* The all match selected array that contains (physical) batch indices.
* @param allMatchesIndex
* The index of the match key.
* @param duplicateCount
* Number of equal key rows.
*/
private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult, int[] allMatchs, int allMatchesIndex, int duplicateCount) throws HiveException, IOException {
// Kick out previous overflow batch results.
if (overflowBatch.size > 0) {
forwardOverflow();
}
ByteSegmentRef byteSegmentRef = hashMapResult.first();
while (byteSegmentRef != null) {
// Fill up as much of the overflow batch as possible with small table values.
while (byteSegmentRef != null) {
if (smallTableVectorDeserializeRow != null) {
doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult);
}
overflowBatch.size++;
if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) {
break;
}
byteSegmentRef = hashMapResult.next();
}
for (int i = 0; i < duplicateCount; i++) {
int batchIndex = allMatchs[allMatchesIndex + i];
if (bigTableRetainedVectorCopy != null) {
// The one big table row's values repeat.
bigTableRetainedVectorCopy.copyByReference(batch, batchIndex, overflowBatch, 0);
for (int column : bigTableRetainedMapping.getOutputColumns()) {
overflowBatch.cols[column].isRepeating = true;
}
}
// Crucial here that we don't reset the overflow batch, or we will loose the small table
// values we put in above.
forwardOverflowNoReset();
// Hand reset the big table columns.
for (int column : bigTableRetainedMapping.getOutputColumns()) {
ColumnVector colVector = overflowBatch.cols[column];
colVector.reset();
}
}
byteSegmentRef = hashMapResult.next();
if (byteSegmentRef == null) {
break;
}
// Get ready for a another round of small table values.
overflowBatch.reset();
}
// Clear away any residue from our optimizations.
overflowBatch.reset();
}
Aggregations