Search in sources :

Example 1 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.

/**
   * Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
   */
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, LongColumnVector dictionaryIds) {
    System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
    if (column.noNulls) {
        column.noNulls = dictionaryIds.noNulls;
    }
    column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
    switch(descriptor.getType()) {
        case INT32:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.decodeToInt((int) dictionaryIds.vector[i]);
            }
            break;
        case INT64:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.decodeToLong((int) dictionaryIds.vector[i]);
            }
            break;
        case FLOAT:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.decodeToFloat((int) dictionaryIds.vector[i]);
            }
            break;
        case DOUBLE:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.decodeToDouble((int) dictionaryIds.vector[i]);
            }
            break;
        case INT96:
            final Calendar calendar;
            if (Strings.isNullOrEmpty(this.conversionTimeZone)) {
                // Local time should be used if no timezone is specified
                calendar = Calendar.getInstance();
            } else {
                calendar = Calendar.getInstance(TimeZone.getTimeZone(this.conversionTimeZone));
            }
            for (int i = rowId; i < rowId + num; ++i) {
                ByteBuffer buf = dictionary.decodeToBinary((int) dictionaryIds.vector[i]).toByteBuffer();
                buf.order(ByteOrder.LITTLE_ENDIAN);
                long timeOfDayNanos = buf.getLong();
                int julianDay = buf.getInt();
                NanoTime nt = new NanoTime(julianDay, timeOfDayNanos);
                Timestamp ts = NanoTimeUtils.getTimestamp(nt, calendar);
                ((TimestampColumnVector) column).set(i, ts);
            }
            break;
        case BINARY:
        case FIXED_LEN_BYTE_ARRAY:
            if (column instanceof BytesColumnVector) {
                for (int i = rowId; i < rowId + num; ++i) {
                    ((BytesColumnVector) column).setVal(i, dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe());
                }
            } else {
                DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
                decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
                decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
                for (int i = rowId; i < rowId + num; ++i) {
                    decimalColumnVector.vector[i].set(dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe(), decimalColumnVector.scale);
                }
            }
            break;
        default:
            throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType());
    }
}
Also used : NanoTime(org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Calendar(java.util.Calendar) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ByteBuffer(java.nio.ByteBuffer) Timestamp(java.sql.Timestamp) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 2 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class VectorPTFEvaluatorDecimalAvg method evaluateGroupBatch.

public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
    evaluateInputExpr(batch);
    // Sum all non-null decimal column values for avg; maintain isGroupResultNull; after last row of
    // last group batch compute the group avg when sum is non-null.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]);
    if (decimalColVector.isRepeating) {
        if (decimalColVector.noNulls || !decimalColVector.isNull[0]) {
            // We have a repeated value.  The sum increases by value * batch.size.
            temp.setFromLong(batch.size);
            if (isGroupResultNull) {
                // First aggregation calculation for group.
                sum.set(decimalColVector.vector[0]);
                sum.mutateMultiply(temp);
                isGroupResultNull = false;
            } else {
                temp.mutateMultiply(decimalColVector.vector[0]);
                sum.mutateAdd(temp);
            }
            nonNullGroupCount += size;
        }
    } else if (decimalColVector.noNulls) {
        HiveDecimalWritable[] vector = decimalColVector.vector;
        if (isGroupResultNull) {
            // First aggregation calculation for group.
            sum.set(vector[0]);
            isGroupResultNull = false;
        } else {
            sum.mutateAdd(vector[0]);
        }
        for (int i = 1; i < size; i++) {
            sum.mutateAdd(vector[i]);
        }
        nonNullGroupCount += size;
    } else {
        boolean[] batchIsNull = decimalColVector.isNull;
        int i = 0;
        while (batchIsNull[i]) {
            if (++i >= size) {
                return;
            }
        }
        HiveDecimalWritable[] vector = decimalColVector.vector;
        if (isGroupResultNull) {
            // First aggregation calculation for group.
            sum.set(vector[i++]);
            isGroupResultNull = false;
        } else {
            sum.mutateAdd(vector[i++]);
        }
        nonNullGroupCount++;
        for (; i < size; i++) {
            if (!batchIsNull[i]) {
                sum.mutateAdd(vector[i]);
                nonNullGroupCount++;
            }
        }
    }
    if (isLastGroupBatch) {
        if (!isGroupResultNull) {
            avg.set(sum);
            temp.setFromLong(nonNullGroupCount);
            avg.mutateDivide(temp);
        }
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)

Example 3 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class VectorPTFEvaluatorDecimalLastValue method evaluateGroupBatch.

public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
    evaluateInputExpr(batch);
    // Last row of last batch determines isGroupResultNull and decimal lastValue.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    if (!isLastGroupBatch) {
        return;
    }
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]);
    if (decimalColVector.isRepeating) {
        if (decimalColVector.noNulls || !decimalColVector.isNull[0]) {
            lastValue.set(decimalColVector.vector[0]);
            isGroupResultNull = false;
        } else {
            isGroupResultNull = true;
        }
    } else if (decimalColVector.noNulls) {
        lastValue.set(decimalColVector.vector[size - 1]);
        isGroupResultNull = false;
    } else {
        final int lastBatchIndex = size - 1;
        if (!decimalColVector.isNull[lastBatchIndex]) {
            lastValue.set(decimalColVector.vector[lastBatchIndex]);
            isGroupResultNull = false;
        } else {
            isGroupResultNull = true;
        }
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)

Example 4 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project hive by apache.

the class VectorPTFEvaluatorDecimalMax method evaluateGroupBatch.

public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
    evaluateInputExpr(batch);
    // Determine maximum of all non-null decimal column values; maintain isGroupResultNull.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]);
    if (decimalColVector.isRepeating) {
        if (decimalColVector.noNulls || !decimalColVector.isNull[0]) {
            if (isGroupResultNull) {
                max.set(decimalColVector.vector[0]);
                isGroupResultNull = false;
            } else {
                HiveDecimalWritable repeatedMax = decimalColVector.vector[0];
                if (repeatedMax.compareTo(max) == 1) {
                    max.set(repeatedMax);
                }
            }
        }
    } else if (decimalColVector.noNulls) {
        HiveDecimalWritable[] vector = decimalColVector.vector;
        if (isGroupResultNull) {
            max.set(vector[0]);
            isGroupResultNull = false;
        } else {
            final HiveDecimalWritable dec = vector[0];
            if (dec.compareTo(max) == 1) {
                max.set(dec);
            }
        }
        for (int i = 1; i < size; i++) {
            final HiveDecimalWritable dec = vector[i];
            if (dec.compareTo(max) == 1) {
                max.set(dec);
            }
        }
    } else {
        boolean[] batchIsNull = decimalColVector.isNull;
        int i = 0;
        while (batchIsNull[i]) {
            if (++i >= size) {
                return;
            }
        }
        HiveDecimalWritable[] vector = decimalColVector.vector;
        if (isGroupResultNull) {
            max.set(vector[i++]);
            isGroupResultNull = false;
        } else {
            final HiveDecimalWritable dec = vector[i++];
            if (dec.compareTo(max) == 1) {
                max.set(dec);
            }
        }
        for (; i < size; i++) {
            if (!batchIsNull[i]) {
                final HiveDecimalWritable dec = vector[i];
                if (dec.compareTo(max) == 1) {
                    max.set(dec);
                }
            }
        }
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)

Example 5 with DecimalColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector in project flink by apache.

the class AbstractOrcColumnVector method createDecimalVector.

private static DecimalColumnVector createDecimalVector(int batchSize, int precision, int scale, Object value) {
    DecimalColumnVector dv = new DecimalColumnVector(batchSize, precision, scale);
    if (value == null) {
        dv.noNulls = false;
        dv.isNull[0] = true;
        dv.isRepeating = true;
    } else {
        dv.set(0, value instanceof HiveDecimal ? (HiveDecimal) value : HiveDecimal.create((BigDecimal) value));
        dv.isRepeating = true;
        dv.isNull[0] = false;
    }
    return dv;
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal)

Aggregations

DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)108 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)38 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)28 Test (org.junit.Test)28 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)27 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)25 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)25 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)23 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)18 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)16 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)14 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)7 Timestamp (java.sql.Timestamp)5 Random (java.util.Random)4 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)3 IOException (java.io.IOException)2 DateColumnVector (org.apache.hadoop.hive.ql.exec.vector.DateColumnVector)2 Decimal64ColumnVector (org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector)2