Search in sources :

Example 56 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class VectorMapJoinGenerateResultOperator method performValueExpressions.

//------------------------------------------------------------------------------------------------
protected void performValueExpressions(VectorizedRowBatch batch, int[] allMatchs, int allMatchCount) {
    /*
     *  For the moment, pretend all matched are selected so we can evaluate the value
     *  expressions.
     *
     *  Since we may use the overflow batch when generating results, we will assign the
     *  selected and real batch size later...
     */
    int[] saveSelected = batch.selected;
    batch.selected = allMatchs;
    boolean saveSelectedInUse = batch.selectedInUse;
    batch.selectedInUse = true;
    batch.size = allMatchCount;
    // Run our value expressions over whole batch.
    for (VectorExpression ve : bigTableValueExpressions) {
        ve.evaluate(batch);
    }
    batch.selected = saveSelected;
    batch.selectedInUse = saveSelectedInUse;
}
Also used : VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 57 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class Vectorizer method canSpecializeReduceSink.

private boolean canSpecializeReduceSink(ReduceSinkDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException {
    // Allocate a VectorReduceSinkDesc initially with key type NONE so EXPLAIN can report this
    // operator was vectorized, but not native.  And, the conditions.
    VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc();
    desc.setVectorDesc(vectorDesc);
    boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED);
    String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
    boolean hasBuckets = desc.getBucketCols() != null && !desc.getBucketCols().isEmpty();
    boolean hasTopN = desc.getTopN() >= 0;
    boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM);
    boolean hasDistinctColumns = desc.getDistinctColumnIndices().size() > 0;
    TableDesc keyTableDesc = desc.getKeySerializeInfo();
    Class<? extends Deserializer> keySerializerClass = keyTableDesc.getDeserializerClass();
    boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class);
    TableDesc valueTableDesc = desc.getValueSerializeInfo();
    Class<? extends Deserializer> valueDeserializerClass = valueTableDesc.getDeserializerClass();
    boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class);
    // Remember the condition variables for EXPLAIN regardless.
    vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled);
    vectorDesc.setEngine(engine);
    vectorDesc.setHasBuckets(hasBuckets);
    vectorDesc.setHasTopN(hasTopN);
    vectorDesc.setUseUniformHash(useUniformHash);
    vectorDesc.setHasDistinctColumns(hasDistinctColumns);
    vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable);
    vectorDesc.setIsValueLazyBinary(isValueLazyBinary);
    // Many restrictions.
    if (!isVectorizationReduceSinkNativeEnabled || !isTezOrSpark || hasBuckets || hasTopN || !useUniformHash || hasDistinctColumns || !isKeyBinarySortable || !isValueLazyBinary) {
        return false;
    }
    // We are doing work here we'd normally do in VectorGroupByCommonOperator's constructor.
    // So if we later decide not to specialize, we'll just waste any scratch columns allocated...
    List<ExprNodeDesc> keysDescs = desc.getKeyCols();
    VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs);
    // Since a key expression can be a calculation and the key will go into a scratch column,
    // we need the mapping and type information.
    int[] reduceSinkKeyColumnMap = new int[allKeyExpressions.length];
    TypeInfo[] reduceSinkKeyTypeInfos = new TypeInfo[allKeyExpressions.length];
    Type[] reduceSinkKeyColumnVectorTypes = new Type[allKeyExpressions.length];
    ArrayList<VectorExpression> groupByKeyExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] reduceSinkKeyExpressions;
    for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
        VectorExpression ve = allKeyExpressions[i];
        reduceSinkKeyColumnMap[i] = ve.getOutputColumn();
        reduceSinkKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo();
        reduceSinkKeyColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkKeyTypeInfos[i]);
        if (!IdentityExpression.isColumnOnly(ve)) {
            groupByKeyExpressionsList.add(ve);
        }
    }
    if (groupByKeyExpressionsList.size() == 0) {
        reduceSinkKeyExpressions = null;
    } else {
        reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]);
    }
    ArrayList<ExprNodeDesc> valueDescs = desc.getValueCols();
    VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs);
    int[] reduceSinkValueColumnMap = new int[valueDescs.size()];
    TypeInfo[] reduceSinkValueTypeInfos = new TypeInfo[valueDescs.size()];
    Type[] reduceSinkValueColumnVectorTypes = new Type[valueDescs.size()];
    ArrayList<VectorExpression> reduceSinkValueExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] reduceSinkValueExpressions;
    for (int i = 0; i < valueDescs.size(); ++i) {
        VectorExpression ve = allValueExpressions[i];
        reduceSinkValueColumnMap[i] = ve.getOutputColumn();
        reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo();
        reduceSinkValueColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]);
        if (!IdentityExpression.isColumnOnly(ve)) {
            reduceSinkValueExpressionsList.add(ve);
        }
    }
    if (reduceSinkValueExpressionsList.size() == 0) {
        reduceSinkValueExpressions = null;
    } else {
        reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
    }
    vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
    vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
    vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
    vectorReduceSinkInfo.setReduceSinkKeyExpressions(reduceSinkKeyExpressions);
    vectorReduceSinkInfo.setReduceSinkValueColumnMap(reduceSinkValueColumnMap);
    vectorReduceSinkInfo.setReduceSinkValueTypeInfos(reduceSinkValueTypeInfos);
    vectorReduceSinkInfo.setReduceSinkValueColumnVectorTypes(reduceSinkValueColumnVectorTypes);
    vectorReduceSinkInfo.setReduceSinkValueExpressions(reduceSinkValueExpressions);
    return true;
}
Also used : ArrayList(java.util.ArrayList) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) InConstantType(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) VectorDeserializeType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType) OperatorType(org.apache.hadoop.hive.ql.plan.api.OperatorType) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 58 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class Vectorizer method vectorizeFilterOperator.

public static Operator<? extends OperatorDesc> vectorizeFilterOperator(Operator<? extends OperatorDesc> filterOp, VectorizationContext vContext) throws HiveException {
    FilterDesc filterDesc = (FilterDesc) filterOp.getConf();
    VectorFilterDesc vectorFilterDesc = new VectorFilterDesc();
    filterDesc.setVectorDesc(vectorFilterDesc);
    ExprNodeDesc predicateExpr = filterDesc.getPredicate();
    VectorExpression vectorPredicateExpr = vContext.getVectorExpression(predicateExpr, VectorExpressionDescriptor.Mode.FILTER);
    vectorFilterDesc.setPredicateExpression(vectorPredicateExpr);
    return OperatorFactory.getVectorOperator(filterOp.getCompilationOpContext(), filterDesc, vContext);
}
Also used : VectorFilterDesc(org.apache.hadoop.hive.ql.plan.VectorFilterDesc) VectorFilterDesc(org.apache.hadoop.hive.ql.plan.VectorFilterDesc) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 59 with VectorExpression

use of org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression in project hive by apache.

the class Vectorizer method vectorizeSelectOperator.

public static Operator<? extends OperatorDesc> vectorizeSelectOperator(Operator<? extends OperatorDesc> selectOp, VectorizationContext vContext) throws HiveException {
    SelectDesc selectDesc = (SelectDesc) selectOp.getConf();
    VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
    selectDesc.setVectorDesc(vectorSelectDesc);
    List<ExprNodeDesc> colList = selectDesc.getColList();
    int index = 0;
    final int size = colList.size();
    VectorExpression[] vectorSelectExprs = new VectorExpression[size];
    int[] projectedOutputColumns = new int[size];
    for (int i = 0; i < size; i++) {
        ExprNodeDesc expr = colList.get(i);
        VectorExpression ve = vContext.getVectorExpression(expr);
        projectedOutputColumns[i] = ve.getOutputColumn();
        if (ve instanceof IdentityExpression) {
            // Suppress useless evaluation.
            continue;
        }
        vectorSelectExprs[index++] = ve;
    }
    if (index < size) {
        vectorSelectExprs = Arrays.copyOf(vectorSelectExprs, index);
    }
    vectorSelectDesc.setSelectExpressions(vectorSelectExprs);
    vectorSelectDesc.setProjectedOutputColumns(projectedOutputColumns);
    return OperatorFactory.getVectorOperator(selectOp.getCompilationOpContext(), selectDesc, vContext);
}
Also used : VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IdentityExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression)

Aggregations

VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)59 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)32 ArrayList (java.util.ArrayList)30 Test (org.junit.Test)27 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)25 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)25 DynamicValueVectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.DynamicValueVectorExpression)24 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)17 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)16 IOException (java.io.IOException)13 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)13 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)12 GenericUDFOPGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan)10 VectorMapJoinHashTableResult (org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult)6 GenericUDF (org.apache.hadoop.hive.ql.udf.generic.GenericUDF)5 VectorSerializeRow (org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow)4 BRoundWithNumDigitsDoubleToDouble (org.apache.hadoop.hive.ql.exec.vector.expressions.BRoundWithNumDigitsDoubleToDouble)4 FuncLogWithBaseDoubleToDouble (org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble)4 List (java.util.List)3 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)3