Search in sources :

Example 36 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class TestVectorFilterExpressions method testFilterDoubleNotBetween.

@Test
public void testFilterDoubleNotBetween() {
    // Spot check only. null & repeating behavior are checked elsewhere for the same template.
    int seed = 17;
    VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch(5, 2, seed);
    vrb.cols[0] = new DoubleColumnVector();
    DoubleColumnVector dcv = (DoubleColumnVector) vrb.cols[0];
    // Basic case
    dcv.vector[0] = 5;
    dcv.vector[1] = 20;
    dcv.vector[2] = 17;
    dcv.vector[3] = 15;
    dcv.vector[4] = 10;
    VectorExpression expr = new FilterDoubleColumnNotBetween(0, 10, 20);
    expr.evaluate(vrb);
    assertEquals(1, vrb.size);
    assertTrue(vrb.selectedInUse);
    assertEquals(0, vrb.selected[0]);
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) FilterDoubleColumnNotBetween(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnNotBetween) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Test(org.junit.Test)

Example 37 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class LongColDivideLongColumn method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1];
    LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2];
    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    int n = batch.size;
    long[] vector1 = inputColVector1.vector;
    long[] vector2 = inputColVector2.vector;
    double[] outputVector = outputColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    /*
     * Propagate null values for a two-input operator and set isRepeating and noNulls appropriately.
     */
    NullUtil.propagateNullsColCol(inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse);
    /* Disregard nulls for processing. In other words,
     * the arithmetic operation is performed even if one or
     * more inputs are null. This is to improve speed by avoiding
     * conditional checks in the inner loop.
     */
    boolean hasDivBy0 = false;
    if (inputColVector1.isRepeating && inputColVector2.isRepeating) {
        long denom = vector2[0];
        outputVector[0] = vector1[0] / (double) denom;
        hasDivBy0 = hasDivBy0 || (denom == 0);
    } else if (inputColVector1.isRepeating) {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                long denom = vector2[i];
                outputVector[i] = vector1[0] / (double) denom;
                hasDivBy0 = hasDivBy0 || (denom == 0);
            }
        } else {
            for (int i = 0; i != n; i++) {
                long denom = vector2[i];
                outputVector[i] = vector1[0] / (double) denom;
                hasDivBy0 = hasDivBy0 || (denom == 0);
            }
        }
    } else if (inputColVector2.isRepeating) {
        if (vector2[0] == 0) {
            outputColVector.noNulls = false;
            outputColVector.isRepeating = true;
            outputColVector.isNull[0] = true;
        } else if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputVector[i] = vector1[i] / (double) vector2[0];
            }
        } else {
            for (int i = 0; i != n; i++) {
                outputVector[i] = vector1[i] / (double) vector2[0];
            }
        }
    } else {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                long denom = vector2[i];
                outputVector[i] = vector1[i] / (double) denom;
                hasDivBy0 = hasDivBy0 || (denom == 0);
            }
        } else {
            for (int i = 0; i != n; i++) {
                long denom = vector2[i];
                outputVector[i] = vector1[i] / (double) denom;
                hasDivBy0 = hasDivBy0 || (denom == 0);
            }
        }
    }
    /* For the case when the output can have null values, follow
     * the convention that the data values must be 1 for long and
     * NaN for double. This is to prevent possible later zero-divide errors
     * in complex arithmetic expressions like col2 / (col1 - 1)
     * in the case when some col1 entries are null.
     */
    if (!hasDivBy0) {
        NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
    } else {
        NullUtil.setNullAndDivBy0DataEntriesDouble(outputColVector, batch.selectedInUse, sel, n, inputColVector2);
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 38 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class LongScalarDivideLongColumn method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    int n = batch.size;
    long[] vector = inputColVector.vector;
    double[] outputVector = outputColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    boolean hasDivBy0 = false;
    if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            outputIsNull[0] = false;
            long denom = vector[0];
            outputVector[0] = value / denom;
            hasDivBy0 = hasDivBy0 || (denom == 0);
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
    } else if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    long denom = vector[i];
                    outputVector[i] = value / denom;
                    hasDivBy0 = hasDivBy0 || (denom == 0);
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    long denom = vector[i];
                    outputVector[i] = value / denom;
                    hasDivBy0 = hasDivBy0 || (denom == 0);
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                long denom = vector[i];
                outputVector[i] = value / denom;
                hasDivBy0 = hasDivBy0 || (denom == 0);
            }
        }
    } else /* there are nulls */
    {
        // Carefully handle NULLs...
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                long denom = vector[i];
                outputVector[i] = value / denom;
                hasDivBy0 = hasDivBy0 || (denom == 0);
                outputIsNull[i] = inputIsNull[i];
            }
        } else {
            System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
            for (int i = 0; i != n; i++) {
                long denom = vector[i];
                outputVector[i] = value / denom;
                hasDivBy0 = hasDivBy0 || (denom == 0);
            }
        }
    }
    /* Set double data vector array entries for NULL elements to the correct value.
     * Unlike other col-scalar operations, this one doesn't benefit from carrying
     * over NaN values from the input array.
     */
    if (!hasDivBy0) {
        NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n);
    } else {
        NullUtil.setNullAndDivBy0DataEntriesDouble(outputColVector, batch.selectedInUse, sel, n, inputColVector);
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 39 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class MathFuncLongToDouble method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        this.evaluateChildren(batch);
    }
    LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum];
    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    int n = batch.size;
    long[] vector = inputColVector.vector;
    double[] outputVector = outputColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            outputIsNull[0] = false;
            outputVector[0] = func(vector[0]);
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        cleanup(outputColVector, sel, batch.selectedInUse, n);
        return;
    }
    if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    outputVector[i] = func(vector[i]);
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    outputVector[i] = func(vector[i]);
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                outputVector[i] = func(vector[i]);
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        // Carefully handle NULLs...
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputIsNull[i] = inputIsNull[i];
                outputVector[i] = func(vector[i]);
            }
        } else {
            System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
            for (int i = 0; i != n; i++) {
                outputVector[i] = func(vector[i]);
            }
        }
    }
    cleanup(outputColVector, sel, batch.selectedInUse, n);
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 40 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class CastDoubleToTimestamp method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) {
    if (childExpressions != null) {
        this.evaluateChildren(batch);
    }
    DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum];
    TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    int n = batch.size;
    double[] vector = inputColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            outputIsNull[0] = false;
            setDouble(outputColVector, vector, 0);
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        return;
    }
    if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    setDouble(outputColVector, vector, i);
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    setDouble(outputColVector, vector, i);
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                setDouble(outputColVector, vector, i);
            }
        }
    } else /* there are NULLs in the inputColVector */
    {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (!inputIsNull[i]) {
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    setDouble(outputColVector, vector, i);
                } else {
                    outputIsNull[i] = true;
                    outputColVector.noNulls = false;
                }
            }
        } else {
            for (int i = 0; i != n; i++) {
                if (!inputIsNull[i]) {
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    setDouble(outputColVector, vector, i);
                } else {
                    outputIsNull[i] = true;
                    outputColVector.noNulls = false;
                }
            }
        }
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Aggregations

DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)101 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)58 Test (org.junit.Test)37 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)31 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)17 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)16 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)11 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)9 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)9 Configuration (org.apache.hadoop.conf.Configuration)6 Random (java.util.Random)5 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)5 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)4 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)4 Timestamp (java.sql.Timestamp)3 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)3 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)2 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)2