Search in sources :

Example 81 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorPTFEvaluatorDoubleAvg method evaluateGroupBatch.

public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
    evaluateInputExpr(batch);
    // Sum all non-null double column values for avg; maintain isGroupResultNull; after last row of
    // last group batch compute the group avg when sum is non-null.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]);
    if (doubleColVector.isRepeating) {
        if (doubleColVector.noNulls || !doubleColVector.isNull[0]) {
            // We have a repeated value.  The sum increases by value * batch.size.
            if (isGroupResultNull) {
                // First aggregation calculation for group.
                sum = doubleColVector.vector[0] * batch.size;
                isGroupResultNull = false;
            } else {
                sum += doubleColVector.vector[0] * batch.size;
            }
            nonNullGroupCount += size;
        }
    } else if (doubleColVector.noNulls) {
        double[] vector = doubleColVector.vector;
        double varSum = vector[0];
        for (int i = 1; i < size; i++) {
            varSum += vector[i];
        }
        nonNullGroupCount += size;
        if (isGroupResultNull) {
            // First aggregation calculation for group.
            sum = varSum;
            isGroupResultNull = false;
        } else {
            sum += varSum;
        }
    } else {
        boolean[] batchIsNull = doubleColVector.isNull;
        int i = 0;
        while (batchIsNull[i]) {
            if (++i >= size) {
                return;
            }
        }
        double[] vector = doubleColVector.vector;
        double varSum = vector[i++];
        nonNullGroupCount++;
        for (; i < size; i++) {
            if (!batchIsNull[i]) {
                varSum += vector[i];
                nonNullGroupCount++;
            }
        }
        if (isGroupResultNull) {
            // First aggregation calculation for group.
            sum = varSum;
            isGroupResultNull = false;
        } else {
            sum += varSum;
        }
    }
    if (isLastGroupBatch) {
        if (!isGroupResultNull) {
            avg = sum / nonNullGroupCount;
        }
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 82 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorUDAFSumTimestamp method assignRowColumn.

@Override
public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int columnNum, AggregationBuffer agg) throws HiveException {
    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[columnNum];
    Aggregation myagg = (Aggregation) agg;
    if (myagg.isNull) {
        outputColVector.noNulls = false;
        outputColVector.isNull[batchIndex] = true;
        return;
    }
    outputColVector.isNull[batchIndex] = false;
    outputColVector.vector[batchIndex] = myagg.sum;
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 83 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class IfExprDoubleColumnDoubleColumn method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
    DoubleColumnVector arg2ColVector = (DoubleColumnVector) batch.cols[arg2Column];
    DoubleColumnVector arg3ColVector = (DoubleColumnVector) batch.cols[arg3Column];
    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    boolean[] outputIsNull = outputColVector.isNull;
    int n = batch.size;
    long[] vector1 = arg1ColVector.vector;
    double[] vector2 = arg2ColVector.vector;
    double[] vector3 = arg3ColVector.vector;
    double[] outputVector = outputColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    /* All the code paths below propagate nulls even if neither arg2 nor arg3
     * have nulls. This is to reduce the number of code paths and shorten the
     * code, at the expense of maybe doing unnecessary work if neither input
     * has nulls. This could be improved in the future by expanding the number
     * of code paths.
     */
    if (arg1ColVector.isRepeating) {
        if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) {
            arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
        } else {
            arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector);
        }
        return;
    }
    // extend any repeating values and noNulls indicator in the inputs
    arg2ColVector.flatten(batch.selectedInUse, sel, n);
    arg3ColVector.flatten(batch.selectedInUse, sel, n);
    if (arg1ColVector.noNulls) {
        // Carefully handle NULLs...
        /*
       * For better performance on LONG/DOUBLE we don't want the conditional
       * statements inside the for loop.
       */
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
                outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        } else {
            for (int i = 0; i != n; i++) {
                outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]);
                outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        // Carefully handle NULLs...
        /*
       * For better performance on LONG/DOUBLE we don't want the conditional
       * statements inside the for loop.
       */
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? vector2[i] : vector3[i]);
                outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        } else {
            for (int i = 0; i != n; i++) {
                outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? vector2[i] : vector3[i]);
                outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2ColVector.isNull[i] : arg3ColVector.isNull[i]);
            }
        }
    }
    // restore repeating and no nulls indicators
    arg2ColVector.unFlatten();
    arg3ColVector.unFlatten();
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 84 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class LongColDivideLongScalar method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    int n = batch.size;
    long[] vector = inputColVector.vector;
    double[] outputVector = outputColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    if (value == 0) {
        // Denominator is zero, convert the batch to nulls
        outputColVector.noNulls = false;
        outputColVector.isRepeating = true;
        outputIsNull[0] = true;
        NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
        return;
    } else if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            outputIsNull[0] = false;
            outputVector[0] = vector[0] / (double) value;
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n);
        return;
    }
    if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    outputVector[i] = vector[i] / (double) value;
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    outputVector[i] = vector[i] / (double) value;
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                outputVector[i] = vector[i] / (double) value;
            }
        }
    } else /* there are nulls */
    {
        // Carefully handle NULLs...
        /*
       * For better performance on LONG/DOUBLE we don't want the conditional
       * statements inside the for loop.
       */
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputVector[i] = vector[i] / (double) value;
                outputIsNull[i] = inputIsNull[i];
            }
        } else {
            for (int i = 0; i != n; i++) {
                outputVector[i] = vector[i] / (double) value;
            }
            System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
        }
    }
    /* Set double data vector array entries for NULL elements to the correct value.
     * Unlike other col-scalar operations, this one doesn't benefit from carrying
     * over NaN values from the input array.
     */
    NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 85 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class MathFuncDoubleToDouble method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        this.evaluateChildren(batch);
    }
    DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum];
    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    int n = batch.size;
    double[] vector = inputColVector.vector;
    double[] outputVector = outputColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            outputIsNull[0] = false;
            outputVector[0] = func(vector[0]);
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        cleanup(outputColVector, sel, batch.selectedInUse, n);
        return;
    }
    if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    outputVector[i] = func(vector[i]);
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    outputVector[i] = func(vector[i]);
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                outputVector[i] = func(vector[i]);
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        // Carefully handle NULLs...
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputIsNull[i] = inputIsNull[i];
                outputVector[i] = func(vector[i]);
            }
        } else {
            System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
            for (int i = 0; i != n; i++) {
                outputVector[i] = func(vector[i]);
            }
        }
    }
    cleanup(outputColVector, sel, batch.selectedInUse, n);
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Aggregations

DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)104 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)59 Test (org.junit.Test)37 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)33 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)18 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)17 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)13 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)10 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)9 Configuration (org.apache.hadoop.conf.Configuration)7 Random (java.util.Random)5 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)5 Timestamp (java.sql.Timestamp)4 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)4 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)4 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)3 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)2 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2