Search in sources :

Example 21 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class TestVectorArithmeticExpressions method testDecimalColAddDecimalColumn.

@Test
public void testDecimalColAddDecimalColumn() throws HiveException {
    VectorizedRowBatch b = getVectorizedRowBatch3DecimalCols();
    VectorExpression expr = new DecimalColAddDecimalColumn(0, 1, 2);
    DecimalColumnVector r = (DecimalColumnVector) b.cols[2];
    // test without nulls
    expr.evaluate(b);
    assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("2.20")));
    assertTrue(r.vector[1].getHiveDecimal().equals(HiveDecimal.create("-2.30")));
    assertTrue(r.vector[2].getHiveDecimal().equals(HiveDecimal.create("1.00")));
    // test nulls propagation
    b = getVectorizedRowBatch3DecimalCols();
    DecimalColumnVector c0 = (DecimalColumnVector) b.cols[0];
    c0.noNulls = false;
    c0.isNull[0] = true;
    r = (DecimalColumnVector) b.cols[2];
    expr.evaluate(b);
    assertTrue(!r.noNulls && r.isNull[0]);
    // Verify null output data entry is not 0, but rather the value specified by design,
    // which is the minimum non-0 value, 0.01 in this case.
    assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("0.01")));
    // test that overflow produces NULL
    b = getVectorizedRowBatch3DecimalCols();
    c0 = (DecimalColumnVector) b.cols[0];
    // set to max possible value
    c0.vector[0].set(HiveDecimal.create("9999999999999999.99"));
    r = (DecimalColumnVector) b.cols[2];
    // will cause overflow for result at position 0, must yield NULL
    expr.evaluate(b);
    assertTrue(!r.noNulls && r.isNull[0]);
    // verify proper null output data value
    assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("0.01")));
    // test left input repeating
    b = getVectorizedRowBatch3DecimalCols();
    c0 = (DecimalColumnVector) b.cols[0];
    c0.isRepeating = true;
    r = (DecimalColumnVector) b.cols[2];
    expr.evaluate(b);
    assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("2.20")));
    assertTrue(r.vector[1].getHiveDecimal().equals(HiveDecimal.create("2.20")));
    assertTrue(r.vector[2].getHiveDecimal().equals(HiveDecimal.create("2.20")));
    // test both inputs repeating
    DecimalColumnVector c1 = (DecimalColumnVector) b.cols[1];
    c1.isRepeating = true;
    expr.evaluate(b);
    assertTrue(r.isRepeating);
    assertTrue(r.vector[0].getHiveDecimal().equals(HiveDecimal.create("2.20")));
    // test right input repeating
    b = getVectorizedRowBatch3DecimalCols();
    c1 = (DecimalColumnVector) b.cols[1];
    c1.isRepeating = true;
    c1.vector[0].set(HiveDecimal.create("2.00"));
    r = (DecimalColumnVector) b.cols[2];
    expr.evaluate(b);
    assertTrue(r.vector[2].getHiveDecimal().equals(HiveDecimal.create("2.00")));
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) TestVectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DecimalColAddDecimalColumn(org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DecimalColAddDecimalColumn) Test(org.junit.Test)

Example 22 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class VectorPTFGroupBatches method fillGroupResults.

private void fillGroupResults(VectorizedRowBatch batch, boolean isLastGroupBatch) throws HiveException {
    /*
     * Do careful maintenance of the outputColVector.noNulls flag.
     */
    int evaluatorIndex = -1;
    int startRowIndex = partitionResults.currentRow;
    for (VectorPTFEvaluatorBase evaluator : evaluators) {
        evaluatorIndex += 1;
        if (evaluator.streamsResult()) {
            evaluator.evaluateGroupBatch(batch);
            if (isLastGroupBatch) {
                evaluator.doLastBatchWork();
            }
            continue;
        }
        final int outputColumnNum = evaluator.getOutputColumnNum();
        final ColumnVector outputColVector = batch.cols[outputColumnNum];
        // reset row counter before next evaluator
        partitionResults.currentRow = startRowIndex;
        for (int i = 0; i < batch.size; i++) {
            Object result = partitionResults.getResultForCurrentRow(evaluatorIndex);
            if (result == null) {
                outputColVector.noNulls = false;
                outputColVector.isNull[i] = true;
            } else {
                try {
                    switch(evaluator.getResultColumnVectorType()) {
                        case LONG:
                            ((LongColumnVector) outputColVector).vector[i] = (long) result;
                            break;
                        case DOUBLE:
                            ((DoubleColumnVector) outputColVector).vector[i] = (double) result;
                            break;
                        case DECIMAL:
                            ((DecimalColumnVector) outputColVector).set(i, (HiveDecimalWritable) result);
                            break;
                        default:
                            throw new RuntimeException("Unexpected column vector type " + evaluator.getResultColumnVectorType());
                    }
                } catch (Exception e) {
                    throw new RuntimeException(String.format("error while setting value from evaluator: %s", evaluator.getClass()), e);
                }
            }
            partitionResults.nextRow();
        }
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) IntervalDayTimeColumnVector(org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 23 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class VectorPTFEvaluatorStreamingDecimalMin method evaluateGroupBatch.

@Override
public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException {
    evaluateInputExpr(batch);
    // Determine minimum of all non-null decimal column values; maintain isNull.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]);
    DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum];
    if (decimalColVector.isRepeating) {
        if (decimalColVector.noNulls || !decimalColVector.isNull[0]) {
            HiveDecimalWritable repeatedMin = decimalColVector.vector[0];
            if (isNull) {
                min.set(repeatedMin);
                isNull = false;
            } else if (repeatedMin.compareTo(min) == -1) {
                min.set(repeatedMin);
            }
            outputColVector.set(0, min);
        } else if (isNull) {
            outputColVector.isNull[0] = true;
            outputColVector.noNulls = false;
        } else {
            // Continue previous MIN.
            outputColVector.set(0, min);
        }
        outputColVector.isRepeating = true;
    } else if (decimalColVector.noNulls) {
        HiveDecimalWritable[] vector = decimalColVector.vector;
        for (int i = 0; i < size; i++) {
            final HiveDecimalWritable value = vector[i];
            if (isNull) {
                min.set(value);
                isNull = false;
            } else if (value.compareTo(min) == -1) {
                min.set(value);
            }
            outputColVector.set(i, min);
        }
    } else {
        boolean[] batchIsNull = decimalColVector.isNull;
        int i = 0;
        while (batchIsNull[i]) {
            if (isNull) {
                outputColVector.isNull[i] = true;
                outputColVector.noNulls = false;
            } else {
                // Continue previous MIN.
                outputColVector.set(i, min);
            }
            if (++i >= size) {
                return;
            }
        }
        HiveDecimalWritable[] vector = decimalColVector.vector;
        final HiveDecimalWritable firstValue = vector[i];
        if (isNull) {
            min.set(firstValue);
            isNull = false;
        } else if (firstValue.compareTo(min) == -1) {
            min.set(firstValue);
        }
        outputColVector.set(i++, min);
        for (; i < size; i++) {
            if (!batchIsNull[i]) {
                final HiveDecimalWritable value = vector[i];
                if (isNull) {
                    min.set(value);
                    isNull = false;
                } else if (value.compareTo(min) == -1) {
                    min.set(value);
                }
                outputColVector.set(i, min);
            } else {
                // Continue previous MIN.
                outputColVector.set(i, min);
            }
        }
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)

Example 24 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class VectorPTFEvaluatorStreamingDecimalMax method evaluateGroupBatch.

@Override
public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException {
    evaluateInputExpr(batch);
    // Determine maximum of all non-null decimal column values; maintain isNull.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]);
    DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum];
    if (decimalColVector.isRepeating) {
        if (decimalColVector.noNulls || !decimalColVector.isNull[0]) {
            HiveDecimalWritable repeatedMax = decimalColVector.vector[0];
            if (isNull) {
                max.set(repeatedMax);
                isNull = false;
            } else if (repeatedMax.compareTo(max) == 1) {
                max.set(repeatedMax);
            }
            outputColVector.set(0, max);
        } else if (isNull) {
            outputColVector.isNull[0] = true;
            outputColVector.noNulls = false;
        } else {
            // Continue previous MAX.
            outputColVector.set(0, max);
        }
        outputColVector.isRepeating = true;
    } else if (decimalColVector.noNulls) {
        HiveDecimalWritable[] vector = decimalColVector.vector;
        for (int i = 0; i < size; i++) {
            final HiveDecimalWritable value = vector[i];
            if (isNull) {
                max.set(value);
                isNull = false;
            } else if (value.compareTo(max) == 1) {
                max.set(value);
            }
            outputColVector.set(i, max);
        }
    } else {
        boolean[] batchIsNull = decimalColVector.isNull;
        int i = 0;
        while (batchIsNull[i]) {
            if (isNull) {
                outputColVector.isNull[i] = true;
                outputColVector.noNulls = false;
            } else {
                // Continue previous MAX.
                outputColVector.set(i, max);
            }
            if (++i >= size) {
                return;
            }
        }
        HiveDecimalWritable[] vector = decimalColVector.vector;
        final HiveDecimalWritable firstValue = vector[i];
        if (isNull) {
            max.set(firstValue);
            isNull = false;
        } else if (firstValue.compareTo(max) == 1) {
            max.set(firstValue);
        }
        outputColVector.set(i++, max);
        for (; i < size; i++) {
            if (!batchIsNull[i]) {
                final HiveDecimalWritable value = vector[i];
                if (isNull) {
                    max.set(value);
                    isNull = false;
                } else if (value.compareTo(max) == 1) {
                    max.set(value);
                }
                outputColVector.set(i, max);
            } else {
                // Continue previous MAX.
                outputColVector.set(i, max);
            }
        }
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)

Example 25 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class VectorPTFEvaluatorStreamingDecimalSum method evaluateGroupBatch.

@Override
public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException {
    evaluateInputExpr(batch);
    // Sum all non-null decimal column values; maintain isGroupResultNull.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]);
    DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum];
    if (decimalColVector.isRepeating) {
        if (decimalColVector.noNulls || !decimalColVector.isNull[0]) {
            // We have a repeated value.
            isNull = false;
            HiveDecimalWritable repeatedValue = decimalColVector.vector[0];
            for (int i = 0; i < size; i++) {
                sum.mutateAdd(repeatedValue);
                // Output row i SUM.
                outputColVector.set(i, sum);
            }
        } else {
            if (isNull) {
                outputColVector.isNull[0] = true;
                outputColVector.noNulls = false;
            } else {
                // Continue previous SUM.
                outputColVector.set(0, sum);
            }
            outputColVector.isRepeating = true;
        }
    } else if (decimalColVector.noNulls) {
        isNull = false;
        HiveDecimalWritable[] vector = decimalColVector.vector;
        for (int i = 0; i < size; i++) {
            sum.mutateAdd(vector[i]);
            // Output row i sum.
            outputColVector.set(i, sum);
        }
    } else {
        boolean[] batchIsNull = decimalColVector.isNull;
        int i = 0;
        while (batchIsNull[i]) {
            if (isNull) {
                outputColVector.isNull[i] = true;
                outputColVector.noNulls = false;
            } else {
                // Continue previous SUM.
                outputColVector.set(i, sum);
            }
            if (++i >= size) {
                return;
            }
        }
        isNull = false;
        HiveDecimalWritable[] vector = decimalColVector.vector;
        sum.mutateAdd(vector[i++]);
        // Output row i sum.
        outputColVector.set(i, sum);
        for (; i < size; i++) {
            if (!batchIsNull[i]) {
                sum.mutateAdd(vector[i]);
                outputColVector.set(i, sum);
            } else {
                // Continue previous SUM.
                outputColVector.set(i, sum);
            }
        }
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)

Aggregations

DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)108 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)38 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)28 Test (org.junit.Test)28 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)27 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)25 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)25 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)23 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)18 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)16 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)14 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)7 Timestamp (java.sql.Timestamp)5 Random (java.util.Random)4 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)3 IOException (java.io.IOException)2 DateColumnVector (org.apache.hadoop.hive.ql.exec.vector.DateColumnVector)2 Decimal64ColumnVector (org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector)2