Search in sources :

Example 1 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.

/**
   * Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
   */
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, LongColumnVector dictionaryIds) {
    System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
    if (column.noNulls) {
        column.noNulls = dictionaryIds.noNulls;
    }
    column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
    switch(descriptor.getType()) {
        case INT32:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.decodeToInt((int) dictionaryIds.vector[i]);
            }
            break;
        case INT64:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.decodeToLong((int) dictionaryIds.vector[i]);
            }
            break;
        case FLOAT:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.decodeToFloat((int) dictionaryIds.vector[i]);
            }
            break;
        case DOUBLE:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.decodeToDouble((int) dictionaryIds.vector[i]);
            }
            break;
        case INT96:
            final Calendar calendar;
            if (Strings.isNullOrEmpty(this.conversionTimeZone)) {
                // Local time should be used if no timezone is specified
                calendar = Calendar.getInstance();
            } else {
                calendar = Calendar.getInstance(TimeZone.getTimeZone(this.conversionTimeZone));
            }
            for (int i = rowId; i < rowId + num; ++i) {
                ByteBuffer buf = dictionary.decodeToBinary((int) dictionaryIds.vector[i]).toByteBuffer();
                buf.order(ByteOrder.LITTLE_ENDIAN);
                long timeOfDayNanos = buf.getLong();
                int julianDay = buf.getInt();
                NanoTime nt = new NanoTime(julianDay, timeOfDayNanos);
                Timestamp ts = NanoTimeUtils.getTimestamp(nt, calendar);
                ((TimestampColumnVector) column).set(i, ts);
            }
            break;
        case BINARY:
        case FIXED_LEN_BYTE_ARRAY:
            if (column instanceof BytesColumnVector) {
                for (int i = rowId; i < rowId + num; ++i) {
                    ((BytesColumnVector) column).setVal(i, dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe());
                }
            } else {
                DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
                decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
                decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
                for (int i = rowId; i < rowId + num; ++i) {
                    decimalColumnVector.vector[i].set(dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe(), decimalColumnVector.scale);
                }
            }
            break;
        default:
            throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType());
    }
}
Also used : NanoTime(org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Calendar(java.util.Calendar) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ByteBuffer(java.nio.ByteBuffer) Timestamp(java.sql.Timestamp) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 2 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorPTFEvaluatorDoubleMin method evaluateGroupBatch.

public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
    evaluateInputExpr(batch);
    // Determine minimum of all non-null double column values; maintain isGroupResultNull.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]);
    if (doubleColVector.isRepeating) {
        if (doubleColVector.noNulls || !doubleColVector.isNull[0]) {
            if (isGroupResultNull) {
                min = doubleColVector.vector[0];
                isGroupResultNull = false;
            } else {
                final double repeatedMin = doubleColVector.vector[0];
                if (repeatedMin < min) {
                    min = repeatedMin;
                }
            }
        }
    } else if (doubleColVector.noNulls) {
        double[] vector = doubleColVector.vector;
        double varMin = vector[0];
        for (int i = 1; i < size; i++) {
            final double d = vector[i];
            if (d < varMin) {
                varMin = d;
            }
        }
        if (isGroupResultNull) {
            min = varMin;
            isGroupResultNull = false;
        } else if (varMin < min) {
            min = varMin;
        }
    } else {
        boolean[] batchIsNull = doubleColVector.isNull;
        int i = 0;
        while (batchIsNull[i]) {
            if (++i >= size) {
                return;
            }
        }
        double[] vector = doubleColVector.vector;
        double varMin = vector[i++];
        for (; i < size; i++) {
            if (!batchIsNull[i]) {
                final double d = vector[i];
                if (d < varMin) {
                    varMin = d;
                }
            }
        }
        if (isGroupResultNull) {
            min = varMin;
            isGroupResultNull = false;
        } else if (varMin < min) {
            min = varMin;
        }
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 3 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorPTFEvaluatorDoubleSum method evaluateGroupBatch.

public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
    evaluateInputExpr(batch);
    // Sum all non-null double column values; maintain isGroupResultNull.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]);
    if (doubleColVector.isRepeating) {
        if (doubleColVector.noNulls || !doubleColVector.isNull[0]) {
            if (isGroupResultNull) {
                // First aggregation calculation for group.
                sum = doubleColVector.vector[0] * batch.size;
                isGroupResultNull = false;
            } else {
                sum += doubleColVector.vector[0] * batch.size;
            }
        }
    } else if (doubleColVector.noNulls) {
        double[] vector = doubleColVector.vector;
        double varSum = vector[0];
        for (int i = 1; i < size; i++) {
            varSum += vector[i];
        }
        if (isGroupResultNull) {
            // First aggregation calculation for group.
            sum = varSum;
            isGroupResultNull = false;
        } else {
            sum += varSum;
        }
    } else {
        boolean[] batchIsNull = doubleColVector.isNull;
        int i = 0;
        while (batchIsNull[i]) {
            if (++i >= size) {
                return;
            }
        }
        double[] vector = doubleColVector.vector;
        double varSum = vector[i++];
        for (; i < size; i++) {
            if (!batchIsNull[i]) {
                varSum += vector[i];
            }
        }
        if (isGroupResultNull) {
            // First aggregation calculation for group.
            sum = varSum;
            isGroupResultNull = false;
        } else {
            sum += varSum;
        }
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 4 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorPTFEvaluatorDoubleFirstValue method evaluateGroupBatch.

public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
    evaluateInputExpr(batch);
    // First row determines isGroupResultNull and double firstValue; stream fill result as repeated.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    if (!haveFirstValue) {
        final int size = batch.size;
        if (size == 0) {
            return;
        }
        DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]);
        if (doubleColVector.isRepeating) {
            if (doubleColVector.noNulls || !doubleColVector.isNull[0]) {
                firstValue = doubleColVector.vector[0];
                isGroupResultNull = false;
            }
        } else if (doubleColVector.noNulls) {
            firstValue = doubleColVector.vector[0];
            isGroupResultNull = false;
        } else {
            if (!doubleColVector.isNull[0]) {
                firstValue = doubleColVector.vector[0];
                isGroupResultNull = false;
            }
        }
        haveFirstValue = true;
    }
    /*
     * Do careful maintenance of the outputColVector.noNulls flag.
     */
    // First value is repeated for all batches.
    DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum];
    outputColVector.isRepeating = true;
    if (isGroupResultNull) {
        outputColVector.noNulls = false;
        outputColVector.isNull[0] = true;
    } else {
        outputColVector.isNull[0] = false;
        outputColVector.vector[0] = firstValue;
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 5 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorPTFEvaluatorDoubleLastValue method evaluateGroupBatch.

public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
    evaluateInputExpr(batch);
    // Last row of last batch determines isGroupResultNull and double lastValue.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    if (!isLastGroupBatch) {
        return;
    }
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]);
    if (doubleColVector.isRepeating) {
        if (doubleColVector.noNulls || !doubleColVector.isNull[0]) {
            lastValue = doubleColVector.vector[0];
            isGroupResultNull = false;
        } else {
            isGroupResultNull = true;
        }
    } else if (doubleColVector.noNulls) {
        lastValue = doubleColVector.vector[size - 1];
        isGroupResultNull = false;
    } else {
        final int lastBatchIndex = size - 1;
        if (!doubleColVector.isNull[lastBatchIndex]) {
            lastValue = doubleColVector.vector[lastBatchIndex];
            isGroupResultNull = false;
        } else {
            isGroupResultNull = true;
        }
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Aggregations

DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)123 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)60 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)39 Test (org.junit.Test)38 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)25 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)23 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)19 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)14 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)9 Configuration (org.apache.hadoop.conf.Configuration)7 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)7 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)6 Random (java.util.Random)5 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)5 Timestamp (java.sql.Timestamp)4 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)4 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 IOException (java.io.IOException)2 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2