Search in sources :

Example 26 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class TestVectorUDFAdaptor method getBatchStrDblLongWithStrOut.

private VectorizedRowBatch getBatchStrDblLongWithStrOut() {
    VectorizedRowBatch b = new VectorizedRowBatch(4);
    BytesColumnVector strCol = new BytesColumnVector();
    LongColumnVector longCol = new LongColumnVector();
    DoubleColumnVector dblCol = new DoubleColumnVector();
    BytesColumnVector outCol = new BytesColumnVector();
    b.cols[0] = strCol;
    b.cols[1] = longCol;
    b.cols[2] = dblCol;
    b.cols[3] = outCol;
    strCol.initBuffer();
    strCol.setVal(0, blue, 0, blue.length);
    strCol.setVal(1, red, 0, red.length);
    longCol.vector[0] = 0;
    longCol.vector[1] = 1;
    dblCol.vector[0] = 0.0;
    dblCol.vector[1] = 1.0;
    // set one null value for possible later use
    longCol.isNull[1] = true;
    // but have no nulls initially
    longCol.noNulls = true;
    strCol.noNulls = true;
    dblCol.noNulls = true;
    outCol.initBuffer();
    b.size = 2;
    return b;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 27 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorPTFEvaluatorDoubleMin method evaluateGroupBatch.

public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
    evaluateInputExpr(batch);
    // Determine minimum of all non-null double column values; maintain isGroupResultNull.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]);
    if (doubleColVector.isRepeating) {
        if (doubleColVector.noNulls || !doubleColVector.isNull[0]) {
            if (isGroupResultNull) {
                min = doubleColVector.vector[0];
                isGroupResultNull = false;
            } else {
                final double repeatedMin = doubleColVector.vector[0];
                if (repeatedMin < min) {
                    min = repeatedMin;
                }
            }
        }
    } else if (doubleColVector.noNulls) {
        double[] vector = doubleColVector.vector;
        double varMin = vector[0];
        for (int i = 1; i < size; i++) {
            final double d = vector[i];
            if (d < varMin) {
                varMin = d;
            }
        }
        if (isGroupResultNull) {
            min = varMin;
            isGroupResultNull = false;
        } else if (varMin < min) {
            min = varMin;
        }
    } else {
        boolean[] batchIsNull = doubleColVector.isNull;
        int i = 0;
        while (batchIsNull[i]) {
            if (++i >= size) {
                return;
            }
        }
        double[] vector = doubleColVector.vector;
        double varMin = vector[i++];
        for (; i < size; i++) {
            if (!batchIsNull[i]) {
                final double d = vector[i];
                if (d < varMin) {
                    varMin = d;
                }
            }
        }
        if (isGroupResultNull) {
            min = varMin;
            isGroupResultNull = false;
        } else if (varMin < min) {
            min = varMin;
        }
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 28 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorPTFEvaluatorDoubleSum method evaluateGroupBatch.

public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) {
    evaluateInputExpr(batch);
    // Sum all non-null double column values; maintain isGroupResultNull.
    // We do not filter when PTF is in reducer.
    Preconditions.checkState(!batch.selectedInUse);
    final int size = batch.size;
    if (size == 0) {
        return;
    }
    DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]);
    if (doubleColVector.isRepeating) {
        if (doubleColVector.noNulls || !doubleColVector.isNull[0]) {
            if (isGroupResultNull) {
                // First aggregation calculation for group.
                sum = doubleColVector.vector[0] * batch.size;
                isGroupResultNull = false;
            } else {
                sum += doubleColVector.vector[0] * batch.size;
            }
        }
    } else if (doubleColVector.noNulls) {
        double[] vector = doubleColVector.vector;
        double varSum = vector[0];
        for (int i = 1; i < size; i++) {
            varSum += vector[i];
        }
        if (isGroupResultNull) {
            // First aggregation calculation for group.
            sum = varSum;
            isGroupResultNull = false;
        } else {
            sum += varSum;
        }
    } else {
        boolean[] batchIsNull = doubleColVector.isNull;
        int i = 0;
        while (batchIsNull[i]) {
            if (++i >= size) {
                return;
            }
        }
        double[] vector = doubleColVector.vector;
        double varSum = vector[i++];
        for (; i < size; i++) {
            if (!batchIsNull[i]) {
                varSum += vector[i];
            }
        }
        if (isGroupResultNull) {
            // First aggregation calculation for group.
            sum = varSum;
            isGroupResultNull = false;
        } else {
            sum += varSum;
        }
    }
}
Also used : DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Example 29 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorizedListColumnReader method fillColumnVector.

private void fillColumnVector(PrimitiveObjectInspector.PrimitiveCategory category, ListColumnVector lcv, List valueList, int elementNum) {
    int total = valueList.size();
    setChildrenInfo(lcv, total, elementNum);
    switch(category) {
        case BOOLEAN:
            lcv.child = new LongColumnVector(total);
            for (int i = 0; i < valueList.size(); i++) {
                ((LongColumnVector) lcv.child).vector[i] = ((List<Integer>) valueList).get(i);
            }
            break;
        case INT:
        case BYTE:
        case SHORT:
        case DATE:
        case INTERVAL_YEAR_MONTH:
        case LONG:
            lcv.child = new LongColumnVector(total);
            for (int i = 0; i < valueList.size(); i++) {
                ((LongColumnVector) lcv.child).vector[i] = ((List<Long>) valueList).get(i);
            }
            break;
        case DOUBLE:
            lcv.child = new DoubleColumnVector(total);
            for (int i = 0; i < valueList.size(); i++) {
                ((DoubleColumnVector) lcv.child).vector[i] = ((List<Double>) valueList).get(i);
            }
            break;
        case BINARY:
        case STRING:
        case CHAR:
        case VARCHAR:
            lcv.child = new BytesColumnVector(total);
            lcv.child.init();
            for (int i = 0; i < valueList.size(); i++) {
                byte[] src = ((List<byte[]>) valueList).get(i);
                ((BytesColumnVector) lcv.child).setRef(i, src, 0, src.length);
            }
            break;
        case FLOAT:
            lcv.child = new DoubleColumnVector(total);
            for (int i = 0; i < valueList.size(); i++) {
                ((DoubleColumnVector) lcv.child).vector[i] = ((List<Float>) valueList).get(i);
            }
            break;
        case DECIMAL:
            decimalTypeCheck(type);
            int precision = type.asPrimitiveType().getDecimalMetadata().getPrecision();
            int scale = type.asPrimitiveType().getDecimalMetadata().getScale();
            lcv.child = new DecimalColumnVector(total, precision, scale);
            for (int i = 0; i < valueList.size(); i++) {
                ((DecimalColumnVector) lcv.child).vector[i].set(((List<byte[]>) valueList).get(i), scale);
            }
            break;
        case INTERVAL_DAY_TIME:
        case TIMESTAMP:
        default:
            throw new RuntimeException("Unsupported type in the list: " + type);
    }
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ArrayList(java.util.ArrayList) List(java.util.List) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Example 30 with DoubleColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector in project hive by apache.

the class VectorizedPrimitiveColumnReader method decodeDictionaryIds.

/**
 * Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`.
 */
private void decodeDictionaryIds(int rowId, int num, ColumnVector column, TypeInfo columnType, LongColumnVector dictionaryIds) {
    System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num);
    if (column.noNulls) {
        column.noNulls = dictionaryIds.noNulls;
    }
    column.isRepeating = column.isRepeating && dictionaryIds.isRepeating;
    PrimitiveTypeInfo primitiveColumnType = (PrimitiveTypeInfo) columnType;
    switch(primitiveColumnType.getPrimitiveCategory()) {
        case INT:
        case BYTE:
        case SHORT:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.readInteger((int) dictionaryIds.vector[i]);
                if (!(dictionary.isValid(((LongColumnVector) column).vector[i]))) {
                    setNullValue(column, i);
                    ((LongColumnVector) column).vector[i] = 0;
                }
            }
            break;
        case DATE:
        case INTERVAL_YEAR_MONTH:
        case LONG:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.readLong((int) dictionaryIds.vector[i]);
            }
            break;
        case BOOLEAN:
            for (int i = rowId; i < rowId + num; ++i) {
                ((LongColumnVector) column).vector[i] = dictionary.readBoolean((int) dictionaryIds.vector[i]) ? 1 : 0;
            }
            break;
        case DOUBLE:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.readDouble((int) dictionaryIds.vector[i]);
            }
            break;
        case BINARY:
            for (int i = rowId; i < rowId + num; ++i) {
                ((BytesColumnVector) column).setVal(i, dictionary.readBytes((int) dictionaryIds.vector[i]));
            }
            break;
        case STRING:
            for (int i = rowId; i < rowId + num; ++i) {
                ((BytesColumnVector) column).setVal(i, dictionary.readString((int) dictionaryIds.vector[i]));
            }
            break;
        case VARCHAR:
            for (int i = rowId; i < rowId + num; ++i) {
                ((BytesColumnVector) column).setVal(i, dictionary.readVarchar((int) dictionaryIds.vector[i]));
            }
            break;
        case CHAR:
            for (int i = rowId; i < rowId + num; ++i) {
                ((BytesColumnVector) column).setVal(i, dictionary.readChar((int) dictionaryIds.vector[i]));
            }
            break;
        case FLOAT:
            for (int i = rowId; i < rowId + num; ++i) {
                ((DoubleColumnVector) column).vector[i] = dictionary.readFloat((int) dictionaryIds.vector[i]);
            }
            break;
        case DECIMAL:
            decimalTypeCheck(type);
            DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column);
            decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision();
            decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale();
            for (int i = rowId; i < rowId + num; ++i) {
                decimalColumnVector.vector[i].set(dictionary.readDecimal((int) dictionaryIds.vector[i]), decimalColumnVector.scale);
            }
            break;
        case TIMESTAMP:
            for (int i = rowId; i < rowId + num; ++i) {
                ((TimestampColumnVector) column).set(i, dictionary.readTimestamp((int) dictionaryIds.vector[i]));
            }
            break;
        case INTERVAL_DAY_TIME:
        default:
            throw new UnsupportedOperationException("Unsupported type: " + type);
    }
}
Also used : TimestampColumnVector(org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)101 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)58 Test (org.junit.Test)37 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)31 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)17 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)16 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)11 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)9 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)9 Configuration (org.apache.hadoop.conf.Configuration)6 Random (java.util.Random)5 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)5 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)4 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)4 Timestamp (java.sql.Timestamp)3 StructColumnVector (org.apache.hadoop.hive.ql.exec.vector.StructColumnVector)3 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)2 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)2