Search in sources :

Example 76 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class ColumnVectorGenUtil method generateDoubleColumnVector.

public static DoubleColumnVector generateDoubleColumnVector(boolean nulls, boolean repeating, int size, Random rand) {
    DoubleColumnVector dcv = new DoubleColumnVector(size);
    dcv.noNulls = !nulls;
    dcv.isRepeating = repeating;
    double repeatingValue;
    do {
        repeatingValue = rand.nextDouble();
    } while (repeatingValue == 0);
    int nullFrequency = generateNullFrequency(rand);
    for (int i = 0; i < size; i++) {
        if (nulls && (repeating || i % nullFrequency == 0)) {
            dcv.isNull[i] = true;
            dcv.vector[i] = DOUBLE_VECTOR_NULL_VALUE;
        } else {
            dcv.isNull[i] = false;
            dcv.vector[i] = repeating ? repeatingValue : rand.nextDouble();
            if (dcv.vector[i] == 0) {
                i--;
            }
        }
    }
    return dcv;
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 77 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorizedListColumnReader method getChildData.

/**
 * Get the child ColumnVector of ListColumnVector
 */
private ColumnVector getChildData(ListColumnVector lcv, int index) {
    if (lcv.offsets[index] > Integer.MAX_VALUE || lcv.lengths[index] > Integer.MAX_VALUE) {
        throw new RuntimeException("The element number in list is out of scope.");
    }
    if (lcv.isNull[index]) {
        return null;
    }
    int start = (int) lcv.offsets[index];
    int length = (int) lcv.lengths[index];
    ColumnVector child = lcv.child;
    ColumnVector resultCV = null;
    if (child instanceof LongColumnVector) {
        resultCV = new LongColumnVector(length);
        try {
            System.arraycopy(((LongColumnVector) lcv.child).vector, start, ((LongColumnVector) resultCV).vector, 0, length);
        } catch (Exception e) {
            throw new RuntimeException("Fail to copy at index:" + index + ", start:" + start + ",length:" + length + ",vec " + "len:" + ((LongColumnVector) lcv.child).vector.length + ", offset len:" + lcv.offsets.length + ", len len:" + lcv.lengths.length, e);
        }
    }
    if (child instanceof DoubleColumnVector) {
        resultCV = new DoubleColumnVector(length);
        System.arraycopy(((DoubleColumnVector) lcv.child).vector, start, ((DoubleColumnVector) resultCV).vector, 0, length);
    }
    if (child instanceof BytesColumnVector) {
        resultCV = new BytesColumnVector(length);
        System.arraycopy(((BytesColumnVector) lcv.child).vector, start, ((BytesColumnVector) resultCV).vector, 0, length);
    }
    if (child instanceof DecimalColumnVector) {
        resultCV = new DecimalColumnVector(length, ((DecimalColumnVector) child).precision, ((DecimalColumnVector) child).scale);
        System.arraycopy(((DecimalColumnVector) lcv.child).vector, start, ((DecimalColumnVector) resultCV).vector, 0, length);
    }
    return resultCV;
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IOException(java.io.IOException) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ListColumnVector(org.apache.hadoop.hive.ql.exec.vector.ListColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 78 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorPTFGroupBatches method fillGroupResults.

private void fillGroupResults(VectorizedRowBatch batch) {
    for (VectorPTFEvaluatorBase evaluator : evaluators) {
        final int outputColumnNum = evaluator.getOutputColumnNum();
        if (evaluator.streamsResult()) {
            continue;
        }
        final ColumnVector outputColVector = batch.cols[outputColumnNum];
        outputColVector.isRepeating = true;
        final boolean isGroupResultNull = evaluator.isGroupResultNull();
        outputColVector.isNull[0] = isGroupResultNull;
        if (isGroupResultNull) {
            outputColVector.noNulls = false;
        } else {
            switch(evaluator.getResultColumnVectorType()) {
                case LONG:
                    ((LongColumnVector) outputColVector).vector[0] = evaluator.getLongGroupResult();
                    break;
                case DOUBLE:
                    ((DoubleColumnVector) outputColVector).vector[0] = evaluator.getDoubleGroupResult();
                    break;
                case DECIMAL:
                    ((DecimalColumnVector) outputColVector).vector[0].set(evaluator.getDecimalGroupResult());
                    break;
                default:
                    throw new RuntimeException("Unexpected column vector type " + evaluator.getResultColumnVectorType());
            }
        }
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 79 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorPTFOperator method isPartitionChanged.

private boolean isPartitionChanged(VectorizedRowBatch batch) {
    final int count = partitionColumnMap.length;
    for (int i = 0; i < count; i++) {
        ColumnVector colVector = batch.cols[partitionColumnMap[i]];
        // Vector reduce key (i.e. partition) columns are repeated -- so we test element 0.
        final boolean isNull = !colVector.noNulls && colVector.isNull[0];
        final boolean currentIsNull = currentPartitionIsNull[i];
        if (isNull != currentIsNull) {
            return true;
        }
        if (isNull) {
            // NULL does equal NULL here.
            continue;
        }
        switch(partitionColumnVectorTypes[i]) {
            case LONG:
                if (currentPartitionLongs[i] != ((LongColumnVector) colVector).vector[0]) {
                    return true;
                }
                break;
            case DOUBLE:
                if (currentPartitionDoubles[i] != ((DoubleColumnVector) colVector).vector[0]) {
                    return true;
                }
                break;
            case BYTES:
                {
                    BytesColumnVector byteColVector = (BytesColumnVector) colVector;
                    byte[] bytes = byteColVector.vector[0];
                    final int start = byteColVector.start[0];
                    final int length = byteColVector.length[0];
                    if (!StringExpr.equal(bytes, start, length, currentPartitionByteArrays[i], 0, currentPartitionByteLengths[i])) {
                        return true;
                    }
                }
                break;
            case DECIMAL:
                if (!currentPartitionDecimals[i].equals(((DecimalColumnVector) colVector).vector[0])) {
                    return true;
                }
                break;
            case TIMESTAMP:
                if (((TimestampColumnVector) colVector).compareTo(0, currentPartitionTimestamps[i]) != 0) {
                    return true;
                }
                break;
            case INTERVAL_DAY_TIME:
                if (((IntervalDayTimeColumnVector) colVector).compareTo(0, currentPartitionIntervalDayTimes[i]) != 0) {
                    return true;
                }
                break;
            default:
                throw new RuntimeException("Unexpected column vector type " + partitionColumnVectorTypes[i]);
        }
    }
    return false;
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) IntervalDayTimeColumnVector(org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 80 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorPTFOperator method setCurrentPartition.

private void setCurrentPartition(VectorizedRowBatch batch) {
    final int count = partitionColumnMap.length;
    for (int i = 0; i < count; i++) {
        ColumnVector colVector = batch.cols[partitionColumnMap[i]];
        // Partition columns are repeated -- so we test element 0.
        final boolean isNull = !colVector.noNulls && colVector.isNull[0];
        currentPartitionIsNull[i] = isNull;
        if (isNull) {
            continue;
        }
        switch(partitionColumnVectorTypes[i]) {
            case LONG:
                currentPartitionLongs[i] = ((LongColumnVector) colVector).vector[0];
                break;
            case DOUBLE:
                currentPartitionDoubles[i] = ((DoubleColumnVector) colVector).vector[0];
                break;
            case BYTES:
                {
                    BytesColumnVector byteColVector = (BytesColumnVector) colVector;
                    byte[] bytes = byteColVector.vector[0];
                    final int start = byteColVector.start[0];
                    final int length = byteColVector.length[0];
                    if (currentPartitionByteArrays[i] == null || currentPartitionByteLengths[i] < length) {
                        currentPartitionByteArrays[i] = Arrays.copyOfRange(bytes, start, start + length);
                    } else {
                        System.arraycopy(bytes, start, currentPartitionByteArrays[i], 0, length);
                    }
                    currentPartitionByteLengths[i] = length;
                }
                break;
            case DECIMAL:
                if (currentPartitionDecimals[i] == null) {
                    currentPartitionDecimals[i] = new HiveDecimalWritable();
                }
                currentPartitionDecimals[i].set(((DecimalColumnVector) colVector).vector[0]);
                break;
            case TIMESTAMP:
                if (currentPartitionTimestamps[i] == null) {
                    currentPartitionTimestamps[i] = new Timestamp(0);
                }
                ((TimestampColumnVector) colVector).timestampUpdate(currentPartitionTimestamps[i], 0);
                break;
            case INTERVAL_DAY_TIME:
                if (currentPartitionIntervalDayTimes[i] == null) {
                    currentPartitionIntervalDayTimes[i] = new HiveIntervalDayTime();
                }
                ((IntervalDayTimeColumnVector) colVector).intervalDayTimeUpdate(currentPartitionIntervalDayTimes[i], 0);
                break;
            default:
                throw new RuntimeException("Unexpected column vector type " + partitionColumnVectorTypes[i]);
        }
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) IntervalDayTimeColumnVector(org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector) Timestamp(java.sql.Timestamp) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) IntervalDayTimeColumnVector(org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime)

Aggregations

DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)104 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)59 Test (org.junit.Test)37 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)33 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)18 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)17 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)13 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)10 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)9 Configuration (org.apache.hadoop.conf.Configuration)7 Random (java.util.Random)5 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)5 Timestamp (java.sql.Timestamp)4 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)4 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)4 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)3 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)2 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2