Search in sources :

Example 1 with Type

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type in project hive by apache.

the class Vectorizer method specializeReduceSinkOperator.

private Operator<? extends OperatorDesc> specializeReduceSinkOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, ReduceSinkDesc desc, VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException {
    Operator<? extends OperatorDesc> vectorOp = null;
    Class<? extends Operator<?>> opClass = null;
    Type[] reduceSinkKeyColumnVectorTypes = vectorReduceSinkInfo.getReduceSinkKeyColumnVectorTypes();
    // By default, we can always use the multi-key class.
    VectorReduceSinkDesc.ReduceSinkKeyType reduceSinkKeyType = VectorReduceSinkDesc.ReduceSinkKeyType.MULTI_KEY;
    // Look for single column optimization.
    if (reduceSinkKeyColumnVectorTypes.length == 1) {
        LOG.info("Vectorizer vectorizeOperator groupby typeName " + vectorReduceSinkInfo.getReduceSinkKeyTypeInfos()[0]);
        Type columnVectorType = reduceSinkKeyColumnVectorTypes[0];
        switch(columnVectorType) {
            case LONG:
                {
                    PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) vectorReduceSinkInfo.getReduceSinkKeyTypeInfos()[0]).getPrimitiveCategory();
                    switch(primitiveCategory) {
                        case BOOLEAN:
                        case BYTE:
                        case SHORT:
                        case INT:
                        case LONG:
                            reduceSinkKeyType = VectorReduceSinkDesc.ReduceSinkKeyType.LONG;
                            break;
                        default:
                            // Other integer types not supported yet.
                            break;
                    }
                }
                break;
            case BYTES:
                reduceSinkKeyType = VectorReduceSinkDesc.ReduceSinkKeyType.STRING;
            default:
                // Stay with multi-key.
                break;
        }
    }
    switch(reduceSinkKeyType) {
        case LONG:
            opClass = VectorReduceSinkLongOperator.class;
            break;
        case STRING:
            opClass = VectorReduceSinkStringOperator.class;
            break;
        case MULTI_KEY:
            opClass = VectorReduceSinkMultiKeyOperator.class;
            break;
        default:
            throw new HiveException("Unknown reduce sink key type " + reduceSinkKeyType);
    }
    VectorReduceSinkDesc vectorDesc = (VectorReduceSinkDesc) desc.getVectorDesc();
    vectorDesc.setReduceSinkKeyType(reduceSinkKeyType);
    vectorDesc.setVectorReduceSinkInfo(vectorReduceSinkInfo);
    vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext);
    LOG.info("Vectorizer vectorizeOperator reduce sink class " + vectorOp.getClass().getSimpleName());
    return vectorOp;
}
Also used : InConstantType(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) VectorDeserializeType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType) OperatorType(org.apache.hadoop.hive.ql.plan.api.OperatorType) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)

Example 2 with Type

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type in project hive by apache.

the class VectorColumnSetInfo method addKey.

protected void addKey(String outputType) throws HiveException {
    indexLookup[addIndex] = new KeyLookupHelper();
    String typeName = VectorizationContext.mapTypeNameSynonyms(outputType);
    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
    Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
    switch(columnVectorType) {
        case LONG:
            longIndices[longIndicesIndex] = addIndex;
            indexLookup[addIndex].setLong(longIndicesIndex);
            ++longIndicesIndex;
            break;
        case DOUBLE:
            doubleIndices[doubleIndicesIndex] = addIndex;
            indexLookup[addIndex].setDouble(doubleIndicesIndex);
            ++doubleIndicesIndex;
            break;
        case BYTES:
            stringIndices[stringIndicesIndex] = addIndex;
            indexLookup[addIndex].setString(stringIndicesIndex);
            ++stringIndicesIndex;
            break;
        case DECIMAL:
            decimalIndices[decimalIndicesIndex] = addIndex;
            indexLookup[addIndex].setDecimal(decimalIndicesIndex);
            ++decimalIndicesIndex;
            break;
        case TIMESTAMP:
            timestampIndices[timestampIndicesIndex] = addIndex;
            indexLookup[addIndex].setTimestamp(timestampIndicesIndex);
            ++timestampIndicesIndex;
            break;
        case INTERVAL_DAY_TIME:
            intervalDayTimeIndices[intervalDayTimeIndicesIndex] = addIndex;
            indexLookup[addIndex].setIntervalDayTime(intervalDayTimeIndicesIndex);
            ++intervalDayTimeIndicesIndex;
            break;
        default:
            throw new HiveException("Unexpected column vector type " + columnVectorType);
    }
    addIndex++;
}
Also used : Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 3 with Type

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type in project hive by apache.

the class VectorCopyRow method init.

public void init(VectorColumnMapping columnMapping) throws HiveException {
    int count = columnMapping.getCount();
    subRowToBatchCopiersByValue = new CopyRow[count];
    subRowToBatchCopiersByReference = new CopyRow[count];
    for (int i = 0; i < count; i++) {
        int inputColumn = columnMapping.getInputColumns()[i];
        int outputColumn = columnMapping.getOutputColumns()[i];
        TypeInfo typeInfo = columnMapping.getTypeInfos()[i];
        Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
        CopyRow copyRowByValue = null;
        CopyRow copyRowByReference = null;
        switch(columnVectorType) {
            case LONG:
                copyRowByValue = new LongCopyRow(inputColumn, outputColumn);
                break;
            case TIMESTAMP:
                copyRowByValue = new TimestampCopyRow(inputColumn, outputColumn);
                break;
            case INTERVAL_DAY_TIME:
                copyRowByValue = new IntervalDayTimeCopyRow(inputColumn, outputColumn);
                break;
            case DOUBLE:
                copyRowByValue = new DoubleCopyRow(inputColumn, outputColumn);
                break;
            case BYTES:
                copyRowByValue = new BytesCopyRowByValue(inputColumn, outputColumn);
                copyRowByReference = new BytesCopyRowByReference(inputColumn, outputColumn);
                break;
            case DECIMAL:
                copyRowByValue = new DecimalCopyRow(inputColumn, outputColumn);
                break;
            default:
                throw new HiveException("Unexpected column vector type " + columnVectorType);
        }
        subRowToBatchCopiersByValue[i] = copyRowByValue;
        if (copyRowByReference == null) {
            subRowToBatchCopiersByReference[i] = copyRowByValue;
        } else {
            subRowToBatchCopiersByReference[i] = copyRowByReference;
        }
    }
}
Also used : Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 4 with Type

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type in project hive by apache.

the class VectorHashKeyWrapperBatch method assignRowColumn.

public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int keyIndex, VectorHashKeyWrapper kw) throws HiveException {
    ColumnVector colVector = batch.cols[keyIndex];
    if (kw.isNull(keyIndex)) {
        colVector.noNulls = false;
        colVector.isNull[batchIndex] = true;
        return;
    }
    colVector.isNull[batchIndex] = false;
    ColumnVector.Type columnVectorType = columnVectorTypes[keyIndex];
    int columnTypeSpecificIndex = columnTypeSpecificIndices[keyIndex];
    switch(columnVectorType) {
        case LONG:
        case DECIMAL_64:
            ((LongColumnVector) colVector).vector[batchIndex] = kw.getLongValue(columnTypeSpecificIndex);
            break;
        case DOUBLE:
            ((DoubleColumnVector) colVector).vector[batchIndex] = kw.getDoubleValue(columnTypeSpecificIndex);
            break;
        case BYTES:
            ((BytesColumnVector) colVector).setVal(batchIndex, kw.getBytes(columnTypeSpecificIndex), kw.getByteStart(columnTypeSpecificIndex), kw.getByteLength(columnTypeSpecificIndex));
            break;
        case DECIMAL:
            ((DecimalColumnVector) colVector).vector[batchIndex].set(kw.getDecimal(columnTypeSpecificIndex));
            break;
        case TIMESTAMP:
            ((TimestampColumnVector) colVector).set(batchIndex, kw.getTimestamp(columnTypeSpecificIndex));
            break;
        case INTERVAL_DAY_TIME:
            ((IntervalDayTimeColumnVector) colVector).set(batchIndex, kw.getIntervalDayTime(columnTypeSpecificIndex));
            break;
        default:
            throw new HiveException("Unexpected column vector type " + columnVectorType);
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)

Example 5 with Type

use of org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type in project hive by apache.

the class VectorHashKeyWrapperBatch method getWritableKeyValue.

/**
 * Get the row-mode writable object value of a key from a key wrapper
 * @param keyOutputWriter
 */
public Object getWritableKeyValue(VectorHashKeyWrapper kw, int keyIndex, VectorExpressionWriter keyOutputWriter) throws HiveException {
    if (kw.isNull(keyIndex)) {
        return null;
    }
    ColumnVector.Type columnVectorType = columnVectorTypes[keyIndex];
    int columnTypeSpecificIndex = columnTypeSpecificIndices[keyIndex];
    switch(columnVectorType) {
        case LONG:
            return keyOutputWriter.writeValue(kw.getLongValue(columnTypeSpecificIndex));
        case DOUBLE:
            return keyOutputWriter.writeValue(kw.getDoubleValue(columnTypeSpecificIndex));
        case BYTES:
            return keyOutputWriter.writeValue(kw.getBytes(columnTypeSpecificIndex), kw.getByteStart(columnTypeSpecificIndex), kw.getByteLength(columnTypeSpecificIndex));
        case DECIMAL:
            return keyOutputWriter.writeValue(kw.getDecimal(columnTypeSpecificIndex));
        case DECIMAL_64:
            throw new RuntimeException("Getting writable for DECIMAL_64 not supported");
        case TIMESTAMP:
            return keyOutputWriter.writeValue(kw.getTimestamp(columnTypeSpecificIndex));
        case INTERVAL_DAY_TIME:
            return keyOutputWriter.writeValue(kw.getIntervalDayTime(columnTypeSpecificIndex));
        default:
            throw new HiveException("Unexpected column vector type " + columnVectorType);
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)

Aggregations

Type (org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type)20 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)14 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)10 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)7 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)7 DecimalTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo)6 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)5 InConstantType (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType)5 WindowType (org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType)5 HashTableImplementationType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType)5 HashTableKeyType (org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType)5 VectorDeserializeType (org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType)5 OperatorType (org.apache.hadoop.hive.ql.plan.api.OperatorType)5 ArrayList (java.util.ArrayList)4 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)4 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)3 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)3 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)3