use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.
the class VectorPTFEvaluatorDoubleAvg method evaluateGroupBatch.
public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
evaluateInputExpr(batch);
// Sum all non-null double column values for avg; maintain isGroupResultNull; after last row of
// last group batch compute the group avg when sum is non-null.
// We do not filter when PTF is in reducer.
Preconditions.checkState(!batch.selectedInUse);
final int size = batch.size;
if (size == 0) {
return;
}
DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]);
if (doubleColVector.isRepeating) {
if (doubleColVector.noNulls || !doubleColVector.isNull[0]) {
// We have a repeated value. The sum increases by value * batch.size.
if (isGroupResultNull) {
// First aggregation calculation for group.
sum = doubleColVector.vector[0] * batch.size;
isGroupResultNull = false;
} else {
sum += doubleColVector.vector[0] * batch.size;
}
nonNullGroupCount += size;
}
} else if (doubleColVector.noNulls) {
double[] vector = doubleColVector.vector;
double varSum = vector[0];
for (int i = 1; i < size; i++) {
varSum += vector[i];
}
nonNullGroupCount += size;
if (isGroupResultNull) {
// First aggregation calculation for group.
sum = varSum;
isGroupResultNull = false;
} else {
sum += varSum;
}
} else {
boolean[] batchIsNull = doubleColVector.isNull;
int i = 0;
while (batchIsNull[i]) {
if (++i >= size) {
return;
}
}
double[] vector = doubleColVector.vector;
double varSum = vector[i++];
nonNullGroupCount++;
for (; i < size; i++) {
if (!batchIsNull[i]) {
varSum += vector[i];
nonNullGroupCount++;
}
}
if (isGroupResultNull) {
// First aggregation calculation for group.
sum = varSum;
isGroupResultNull = false;
} else {
sum += varSum;
}
}
if (isLastGroupBatch) {
if (!isGroupResultNull) {
avg = sum / nonNullGroupCount;
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.
the class VectorUDAFSumTimestamp method assignRowColumn.
@Override
public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int columnNum, AggregationBuffer agg) throws HiveException {
DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[columnNum];
Aggregation myagg = (Aggregation) agg;
if (myagg.isNull) {
outputColVector.noNulls = false;
outputColVector.isNull[batchIndex] = true;
return;
}
outputColVector.isNull[batchIndex] = false;
outputColVector.vector[batchIndex] = myagg.sum;
}
use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.
the class IfExprDoubleColumnDoubleColumn method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
DoubleColumnVector arg2ColVector = (DoubleColumnVector) batch.cols[arg2Column];
DoubleColumnVector arg3ColVector = (DoubleColumnVector) batch.cols[arg3Column];
DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector1 = arg1ColVector.vector;
double[] vector2 = arg2ColVector.vector;
double[] vector3 = arg3ColVector.vector;
double[] outputVector = outputColVector.vector;
// return immediately if batch is empty
if (n == 0) {
return;
}
// We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
/* All the code paths below propagate nulls even if neither arg2 nor arg3
* have nulls. This is to reduce the number of code paths and shorten the
* code, at the expense of maybe doing unnecessary work if neither input
* has nulls. This could be improved in the future by expanding the number
* of code paths.
*/
if (arg1ColVector.isRepeating) {
if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
} else {
arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
}
return;
}
// extend any repeating values and noNulls indicator in the inputs
arg2ColVector.flatten(batch.selectedInUse, sel, n);
arg3ColVector.flatten(batch.selectedInUse, sel, n);
if (arg1ColVector.noNulls) {
// Carefully handle NULLs...
/*
* For better performance on LONG/DOUBLE we don't want the conditional
* statements inside the for loop.
*/
outputColVector.noNulls = false;
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs...
/*
* For better performance on LONG/DOUBLE we don't want the conditional
* statements inside the for loop.
*/
outputColVector.noNulls = false;
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? vector2[i] : vector3[i]);
outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? vector2[i] : vector3[i]);
outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
}
}
}
// restore repeating and no nulls indicators
arg2ColVector.unFlatten();
arg3ColVector.unFlatten();
}
use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.
the class LongColDivideLongScalar method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
super.evaluateChildren(batch);
}
LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
long[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
// return immediately if batch is empty
if (n == 0) {
return;
}
// We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
if (value == 0) {
// Denominator is zero, convert the batch to nulls
outputColVector.noNulls = false;
outputColVector.isRepeating = true;
outputIsNull[0] = true;
NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
return;
} else if (inputColVector.isRepeating) {
if (inputColVector.noNulls || !inputIsNull[0]) {
outputIsNull[0] = false;
outputVector[0] = vector[0] / (double) value;
} else {
outputIsNull[0] = true;
outputColVector.noNulls = false;
}
outputColVector.isRepeating = true;
NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
return;
}
if (inputColVector.noNulls) {
if (batch.selectedInUse) {
if (!outputColVector.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
// Set isNull before call in case it changes it mind.
outputIsNull[i] = false;
outputVector[i] = vector[i] / (double) value;
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
outputVector[i] = vector[i] / (double) value;
}
}
} else {
if (!outputColVector.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outputColVector.noNulls = true;
}
for (int i = 0; i != n; i++) {
outputVector[i] = vector[i] / (double) value;
}
}
} else /* there are nulls */
{
// Carefully handle NULLs...
/*
* For better performance on LONG/DOUBLE we don't want the conditional
* statements inside the for loop.
*/
outputColVector.noNulls = false;
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputVector[i] = vector[i] / (double) value;
outputIsNull[i] = inputIsNull[i];
}
} else {
for (int i = 0; i != n; i++) {
outputVector[i] = vector[i] / (double) value;
}
System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
}
}
/* Set double data vector array entries for NULL elements to the correct value.
* Unlike other col-scalar operations, this one doesn't benefit from carrying
* over NaN values from the input array.
*/
NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
}
use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.
the class MathFuncDoubleToDouble method evaluate.
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
this.evaluateChildren(batch);
}
DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum];
DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
int[] sel = batch.selected;
boolean[] inputIsNull = inputColVector.isNull;
boolean[] outputIsNull = outputColVector.isNull;
int n = batch.size;
double[] vector = inputColVector.vector;
double[] outputVector = outputColVector.vector;
// return immediately if batch is empty
if (n == 0) {
return;
}
// We do not need to do a column reset since we are carefully changing the output.
outputColVector.isRepeating = false;
if (inputColVector.isRepeating) {
if (inputColVector.noNulls || !inputIsNull[0]) {
outputIsNull[0] = false;
outputVector[0] = func(vector[0]);
} else {
outputIsNull[0] = true;
outputColVector.noNulls = false;
}
outputColVector.isRepeating = true;
cleanup(outputColVector, sel, batch.selectedInUse, n);
return;
}
if (inputColVector.noNulls) {
if (batch.selectedInUse) {
if (!outputColVector.noNulls) {
for (int j = 0; j != n; j++) {
final int i = sel[j];
// Set isNull before call in case it changes it mind.
outputIsNull[i] = false;
outputVector[i] = func(vector[i]);
}
} else {
for (int j = 0; j != n; j++) {
final int i = sel[j];
outputVector[i] = func(vector[i]);
}
}
} else {
if (!outputColVector.noNulls) {
// Assume it is almost always a performance win to fill all of isNull so we can
// safely reset noNulls.
Arrays.fill(outputIsNull, false);
outputColVector.noNulls = true;
}
for (int i = 0; i != n; i++) {
outputVector[i] = func(vector[i]);
}
}
} else /* there are nulls in the inputColVector */
{
// Carefully handle NULLs...
outputColVector.noNulls = false;
if (batch.selectedInUse) {
for (int j = 0; j != n; j++) {
int i = sel[j];
outputIsNull[i] = inputIsNull[i];
outputVector[i] = func(vector[i]);
}
} else {
System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
for (int i = 0; i != n; i++) {
outputVector[i] = func(vector[i]);
}
}
}
cleanup(outputColVector, sel, batch.selectedInUse, n);
}
Aggregations