Search in sources :

Example 76 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class VectorUDFDateAddColCol method evaluateTimestamp.

protected long evaluateTimestamp(ColumnVector columnVector, int index, long numDays) {
    TimestampColumnVector tcv = (TimestampColumnVector) columnVector;
    // Convert to date value (in days)
    long days = DateWritableV2.millisToDays(tcv.getTime(index));
    if (isPositive) {
        days += numDays;
    } else {
        days -= numDays;
    }
    return days;
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)

Example 77 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class VectorUDFDateDiffColScalar method evaluateTimestamp.

protected int evaluateTimestamp(ColumnVector columnVector, int index) {
    TimestampColumnVector tcv = (TimestampColumnVector) columnVector;
    date.setTime(tcv.getTime(index));
    return DateWritableV2.dateToDays(date) - baseDate;
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)

Example 78 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class TimestampToStringUnaryUDF method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumnNum[0]];
    int[] sel = batch.selected;
    int n = batch.size;
    BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum];
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    outputColVector.initBuffer();
    if (n == 0) {
        // Nothing to do
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            // Set isNull before call in case it changes it mind.
            outputIsNull[0] = false;
            func(outputColVector, inputColVector, 0);
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        return;
    }
    if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    func(outputColVector, inputColVector, i);
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    func(outputColVector, inputColVector, i);
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                func(outputColVector, inputColVector, i);
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        // Carefully handle NULLs...
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputColVector.isNull[i] = inputColVector.isNull[i];
                if (!inputColVector.isNull[i]) {
                    func(outputColVector, inputColVector, i);
                }
            }
        } else {
            System.arraycopy(inputColVector.isNull, 0, outputColVector.isNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inputColVector.isNull[i]) {
                    func(outputColVector, inputColVector, i);
                }
            }
        }
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)

Example 79 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class TimestampColumnInList method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    if (inSet == null) {
        inSet = new HashSet<Timestamp>(inListValues.length);
        for (Timestamp val : inListValues) {
            inSet.add(val);
        }
    }
    TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputColumnNum[0]];
    LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum];
    int[] sel = batch.selected;
    boolean[] inputIsNull = inputColVector.isNull;
    boolean[] outputIsNull = outputColVector.isNull;
    int n = batch.size;
    long[] outputVector = outputColVector.vector;
    // return immediately if batch is empty
    if (n == 0) {
        return;
    }
    // We do not need to do a column reset since we are carefully changing the output.
    outputColVector.isRepeating = false;
    if (inputColVector.isRepeating) {
        if (inputColVector.noNulls || !inputIsNull[0]) {
            // Set isNull before call in case it changes it mind.
            outputIsNull[0] = false;
            outputVector[0] = inSet.contains(inputColVector.asScratchTimestamp(0)) ? 1 : 0;
        } else {
            outputIsNull[0] = true;
            outputColVector.noNulls = false;
        }
        outputColVector.isRepeating = true;
        return;
    }
    if (inputColVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outputColVector.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    // Set isNull before call in case it changes it mind.
                    outputIsNull[i] = false;
                    outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
                }
            }
        } else {
            if (!outputColVector.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outputColVector.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
            }
        }
    } else /* there are nulls in the inputColVector */
    {
        // Carefully handle NULLs...
        outputColVector.noNulls = false;
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                outputIsNull[i] = inputIsNull[i];
                if (!inputIsNull[i]) {
                    outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
                }
            }
        } else {
            System.arraycopy(inputIsNull, 0, outputIsNull, 0, n);
            for (int i = 0; i != n; i++) {
                if (!inputIsNull[i]) {
                    outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0;
                }
            }
        }
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) Timestamp(java.sql.Timestamp) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 80 with TimestampColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector in project hive by apache.

the class Deserializer method readPrimitive.

private void readPrimitive(FieldVector arrowVector, ColumnVector hiveVector) {
    final Types.MinorType minorType = arrowVector.getMinorType();
    final int size = arrowVector.getValueCount();
    switch(minorType) {
        case BIT:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((LongColumnVector) hiveVector).vector[i] = ((BitVector) arrowVector).get(i);
                    }
                }
            }
            break;
        case TINYINT:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((LongColumnVector) hiveVector).vector[i] = ((TinyIntVector) arrowVector).get(i);
                    }
                }
            }
            break;
        case SMALLINT:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((LongColumnVector) hiveVector).vector[i] = ((SmallIntVector) arrowVector).get(i);
                    }
                }
            }
            break;
        case INT:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((LongColumnVector) hiveVector).vector[i] = ((IntVector) arrowVector).get(i);
                    }
                }
            }
            break;
        case BIGINT:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((LongColumnVector) hiveVector).vector[i] = ((BigIntVector) arrowVector).get(i);
                    }
                }
            }
            break;
        case FLOAT4:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((DoubleColumnVector) hiveVector).vector[i] = ((Float4Vector) arrowVector).get(i);
                    }
                }
            }
            break;
        case FLOAT8:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((DoubleColumnVector) hiveVector).vector[i] = ((Float8Vector) arrowVector).get(i);
                    }
                }
            }
            break;
        case VARCHAR:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((BytesColumnVector) hiveVector).setVal(i, ((VarCharVector) arrowVector).get(i));
                    }
                }
            }
            break;
        case DATEDAY:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((LongColumnVector) hiveVector).vector[i] = ((DateDayVector) arrowVector).get(i);
                    }
                }
            }
            break;
        case TIMESTAMPMILLI:
        case TIMESTAMPMILLITZ:
        case TIMESTAMPMICRO:
        case TIMESTAMPMICROTZ:
        case TIMESTAMPNANO:
        case TIMESTAMPNANOTZ:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        // Time = second + sub-second
                        final long time = ((TimeStampVector) arrowVector).get(i);
                        long second;
                        int subSecondInNanos;
                        switch(minorType) {
                            case TIMESTAMPMILLI:
                            case TIMESTAMPMILLITZ:
                                {
                                    subSecondInNanos = (int) ((time % MILLIS_PER_SECOND) * NS_PER_MILLIS);
                                    second = time / MILLIS_PER_SECOND;
                                }
                                break;
                            case TIMESTAMPMICROTZ:
                            case TIMESTAMPMICRO:
                                {
                                    subSecondInNanos = (int) ((time % MICROS_PER_SECOND) * NS_PER_MICROS);
                                    second = time / MICROS_PER_SECOND;
                                }
                                break;
                            case TIMESTAMPNANOTZ:
                            case TIMESTAMPNANO:
                                {
                                    subSecondInNanos = (int) (time % NS_PER_SECOND);
                                    second = time / NS_PER_SECOND;
                                }
                                break;
                            default:
                                throw new IllegalArgumentException();
                        }
                        final TimestampColumnVector timestampColumnVector = (TimestampColumnVector) hiveVector;
                        // A nanosecond value should not be negative
                        if (subSecondInNanos < 0) {
                            // So add one second to the negative nanosecond value to make it positive
                            subSecondInNanos += NS_PER_SECOND;
                            // Subtract one second from the second value because we added one second
                            second -= 1;
                        }
                        timestampColumnVector.time[i] = second * MILLIS_PER_SECOND;
                        timestampColumnVector.nanos[i] = subSecondInNanos;
                    }
                }
            }
            break;
        case VARBINARY:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((BytesColumnVector) hiveVector).setVal(i, ((VarBinaryVector) arrowVector).get(i));
                    }
                }
            }
            break;
        case DECIMAL:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((DecimalColumnVector) hiveVector).set(i, HiveDecimal.create(((DecimalVector) arrowVector).getObject(i)));
                    }
                }
            }
            break;
        case INTERVALYEAR:
            {
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        ((LongColumnVector) hiveVector).vector[i] = ((IntervalYearVector) arrowVector).get(i);
                    }
                }
            }
            break;
        case INTERVALDAY:
            {
                final IntervalDayVector intervalDayVector = (IntervalDayVector) arrowVector;
                final NullableIntervalDayHolder intervalDayHolder = new NullableIntervalDayHolder();
                final HiveIntervalDayTime intervalDayTime = new HiveIntervalDayTime();
                for (int i = 0; i < size; i++) {
                    if (arrowVector.isNull(i)) {
                        VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
                    } else {
                        hiveVector.isNull[i] = false;
                        intervalDayVector.get(i, intervalDayHolder);
                        final long seconds = intervalDayHolder.days * SECOND_PER_DAY + intervalDayHolder.milliseconds / MILLIS_PER_SECOND;
                        final int nanos = (intervalDayHolder.milliseconds % 1_000) * NS_PER_MILLIS;
                        intervalDayTime.set(seconds, nanos);
                        ((IntervalDayTimeColumnVector) hiveVector).set(i, intervalDayTime);
                    }
                }
            }
            break;
        default:
            throw new IllegalArgumentException();
    }
}
Also used : Types(org.apache.arrow.vector.types.Types) BitVector(org.apache.arrow.vector.BitVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) NullableIntervalDayHolder(org.apache.arrow.vector.holders.NullableIntervalDayHolder) IntervalDayVector(org.apache.arrow.vector.IntervalDayVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float4Vector(org.apache.arrow.vector.Float4Vector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) BigIntVector(org.apache.arrow.vector.BigIntVector) IntervalYearVector(org.apache.arrow.vector.IntervalYearVector) DateDayVector(org.apache.arrow.vector.DateDayVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime)

Aggregations

TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)85 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)31 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)26 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)21 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)18 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)17 Timestamp (java.sql.Timestamp)16 Test (org.junit.Test)11 Random (java.util.Random)10 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)9 TimestampWritableV2 (org.apache.hadoop.hive.serde2.io.TimestampWritableV2)7 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)6 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)5 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)4 Timestamp (org.apache.hadoop.hive.common.type.Timestamp)3 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 HiveIntervalDayTime (org.apache.hadoop.hive.common.type.HiveIntervalDayTime)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)2 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)2