Search in sources :

Example 91 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class Vectorizer method specializeMapJoinOperator.

Operator<? extends OperatorDesc> specializeMapJoinOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, MapJoinDesc desc, VectorMapJoinDesc vectorDesc) throws HiveException {
    Operator<? extends OperatorDesc> vectorOp = null;
    Class<? extends Operator<?>> opClass = null;
    VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo();
    HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE;
    HashTableKind hashTableKind = HashTableKind.NONE;
    HashTableKeyType hashTableKeyType = HashTableKeyType.NONE;
    VectorMapJoinVariation vectorMapJoinVariation = VectorMapJoinVariation.NONE;
    if (vectorDesc.getIsFastHashTableEnabled()) {
        hashTableImplementationType = HashTableImplementationType.FAST;
    } else {
        hashTableImplementationType = HashTableImplementationType.OPTIMIZED;
    }
    int joinType = desc.getConds()[0].getType();
    boolean isInnerBigOnly = false;
    if (joinType == JoinDesc.INNER_JOIN && isBigTableOnlyResults(desc)) {
        isInnerBigOnly = true;
    }
    // By default, we can always use the multi-key class.
    hashTableKeyType = HashTableKeyType.MULTI_KEY;
    if (!HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MULTIKEY_ONLY_ENABLED)) {
        // Look for single column optimization.
        byte posBigTable = (byte) desc.getPosBigTable();
        Map<Byte, List<ExprNodeDesc>> keyExprs = desc.getKeys();
        List<ExprNodeDesc> bigTableKeyExprs = keyExprs.get(posBigTable);
        if (bigTableKeyExprs.size() == 1) {
            TypeInfo typeInfo = bigTableKeyExprs.get(0).getTypeInfo();
            LOG.info("Vectorizer vectorizeOperator map join typeName " + typeInfo.getTypeName());
            switch(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) {
                case BOOLEAN:
                    hashTableKeyType = HashTableKeyType.BOOLEAN;
                    break;
                case BYTE:
                    hashTableKeyType = HashTableKeyType.BYTE;
                    break;
                case SHORT:
                    hashTableKeyType = HashTableKeyType.SHORT;
                    break;
                case INT:
                    hashTableKeyType = HashTableKeyType.INT;
                    break;
                case LONG:
                    hashTableKeyType = HashTableKeyType.LONG;
                    break;
                case STRING:
                case CHAR:
                case VARCHAR:
                case BINARY:
                    hashTableKeyType = HashTableKeyType.STRING;
                default:
            }
        }
    }
    switch(joinType) {
        case JoinDesc.INNER_JOIN:
            if (!isInnerBigOnly) {
                vectorMapJoinVariation = VectorMapJoinVariation.INNER;
                hashTableKind = HashTableKind.HASH_MAP;
            } else {
                vectorMapJoinVariation = VectorMapJoinVariation.INNER_BIG_ONLY;
                hashTableKind = HashTableKind.HASH_MULTISET;
            }
            break;
        case JoinDesc.LEFT_OUTER_JOIN:
        case JoinDesc.RIGHT_OUTER_JOIN:
            vectorMapJoinVariation = VectorMapJoinVariation.OUTER;
            hashTableKind = HashTableKind.HASH_MAP;
            break;
        case JoinDesc.LEFT_SEMI_JOIN:
            vectorMapJoinVariation = VectorMapJoinVariation.LEFT_SEMI;
            hashTableKind = HashTableKind.HASH_SET;
            break;
        default:
            throw new HiveException("Unknown join type " + joinType);
    }
    LOG.info("Vectorizer vectorizeOperator map join hashTableKind " + hashTableKind.name() + " hashTableKeyType " + hashTableKeyType.name());
    switch(hashTableKeyType) {
        case BOOLEAN:
        case BYTE:
        case SHORT:
        case INT:
        case LONG:
            switch(vectorMapJoinVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerLongOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyLongOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiLongOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterLongOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
            }
            break;
        case STRING:
            switch(vectorMapJoinVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerStringOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyStringOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiStringOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterStringOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
            }
            break;
        case MULTI_KEY:
            switch(vectorMapJoinVariation) {
                case INNER:
                    opClass = VectorMapJoinInnerMultiKeyOperator.class;
                    break;
                case INNER_BIG_ONLY:
                    opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class;
                    break;
                case LEFT_SEMI:
                    opClass = VectorMapJoinLeftSemiMultiKeyOperator.class;
                    break;
                case OUTER:
                    opClass = VectorMapJoinOuterMultiKeyOperator.class;
                    break;
                default:
                    throw new HiveException("Unknown operator variation " + vectorMapJoinVariation);
            }
            break;
        default:
            throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
    }
    boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED);
    vectorDesc.setHashTableImplementationType(hashTableImplementationType);
    vectorDesc.setHashTableKind(hashTableKind);
    vectorDesc.setHashTableKeyType(hashTableKeyType);
    vectorDesc.setVectorMapJoinVariation(vectorMapJoinVariation);
    vectorDesc.setMinMaxEnabled(minMaxEnabled);
    vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo);
    vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext, vectorDesc);
    LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName());
    return vectorOp;
}
Also used : VectorMapJoinVariation(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKind(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind) UDFToByte(org.apache.hadoop.hive.ql.udf.UDFToByte) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 92 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class Vectorizer method validateAndVectorizeOperator.

public Operator<? extends OperatorDesc> validateAndVectorizeOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, boolean isReduce, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException, VectorizerCannotVectorizeException {
    Operator<? extends OperatorDesc> vectorOp = null;
    // This "global" allows various validation methods to set the "not vectorized" reason.
    currentOperator = op;
    boolean isNative;
    try {
        switch(op.getType()) {
            case MAPJOIN:
                {
                    if (op instanceof MapJoinOperator) {
                        if (!validateMapJoinOperator((MapJoinOperator) op)) {
                            throw new VectorizerCannotVectorizeException();
                        }
                    } else if (op instanceof SMBMapJoinOperator) {
                        if (!validateSMBMapJoinOperator((SMBMapJoinOperator) op)) {
                            throw new VectorizerCannotVectorizeException();
                        }
                    } else {
                        setOperatorNotSupported(op);
                        throw new VectorizerCannotVectorizeException();
                    }
                    if (op instanceof MapJoinOperator) {
                        MapJoinDesc desc = (MapJoinDesc) op.getConf();
                        VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc();
                        boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinDesc);
                        if (!specialize) {
                            Class<? extends Operator<?>> opClass = null;
                            // *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
                            List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
                            boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
                            if (!isOuterAndFiltered) {
                                opClass = VectorMapJoinOperator.class;
                            } else {
                                opClass = VectorMapJoinOuterFilteredOperator.class;
                            }
                            vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), desc, vContext, vectorMapJoinDesc);
                            isNative = false;
                        } else {
                            // TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
                            // HiveConf.setBoolVar(physicalContext.getConf(),
                            // HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
                            vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinDesc);
                            isNative = true;
                            if (vectorTaskColumnInfo != null) {
                                VectorMapJoinInfo vectorMapJoinInfo = vectorMapJoinDesc.getVectorMapJoinInfo();
                                if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableKeyExpressions())) {
                                    vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                                }
                                if (usesVectorUDFAdaptor(vectorMapJoinDesc.getAllBigTableValueExpressions())) {
                                    vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                                }
                            }
                        }
                    } else {
                        Preconditions.checkState(op instanceof SMBMapJoinOperator);
                        SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf();
                        VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc();
                        vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), smbJoinSinkDesc, vContext, vectorSMBJoinDesc);
                        isNative = false;
                    }
                }
                break;
            case REDUCESINK:
                {
                    if (!validateReduceSinkOperator((ReduceSinkOperator) op)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    ReduceSinkDesc reduceDesc = (ReduceSinkDesc) op.getConf();
                    VectorReduceSinkDesc vectorReduceSinkDesc = new VectorReduceSinkDesc();
                    boolean specialize = canSpecializeReduceSink(reduceDesc, isTezOrSpark, vContext, vectorReduceSinkDesc);
                    if (!specialize) {
                        vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), reduceDesc, vContext, vectorReduceSinkDesc);
                        isNative = false;
                    } else {
                        vectorOp = specializeReduceSinkOperator(op, vContext, reduceDesc, vectorReduceSinkDesc);
                        isNative = true;
                        if (vectorTaskColumnInfo != null) {
                            VectorReduceSinkInfo vectorReduceSinkInfo = vectorReduceSinkDesc.getVectorReduceSinkInfo();
                            if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                            if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                        }
                    }
                }
                break;
            case FILTER:
                {
                    if (!validateFilterOperator((FilterOperator) op)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    VectorFilterDesc vectorFilterDesc = new VectorFilterDesc();
                    vectorOp = vectorizeFilterOperator(op, vContext, vectorFilterDesc);
                    isNative = true;
                    if (vectorTaskColumnInfo != null) {
                        VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression();
                        if (usesVectorUDFAdaptor(vectorPredicateExpr)) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                    }
                }
                break;
            case SELECT:
                {
                    if (!validateSelectOperator((SelectOperator) op)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
                    vectorOp = vectorizeSelectOperator(op, vContext, vectorSelectDesc);
                    isNative = true;
                    if (vectorTaskColumnInfo != null) {
                        VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions();
                        if (usesVectorUDFAdaptor(vectorSelectExprs)) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                    }
                }
                break;
            case GROUPBY:
                {
                    // The validateGroupByOperator method will update vectorGroupByDesc.
                    VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
                    if (!validateGroupByOperator((GroupByOperator) op, isReduce, isTezOrSpark, vectorGroupByDesc)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    ImmutablePair<Operator<? extends OperatorDesc>, String> pair = doVectorizeGroupByOperator(op, vContext, vectorGroupByDesc);
                    if (pair.left == null) {
                        setOperatorIssue(pair.right);
                        throw new VectorizerCannotVectorizeException();
                    }
                    vectorOp = pair.left;
                    isNative = false;
                    if (vectorTaskColumnInfo != null) {
                        VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions();
                        if (usesVectorUDFAdaptor(vecKeyExpressions)) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                        VectorAggregationDesc[] vecAggrDescs = vectorGroupByDesc.getVecAggrDescs();
                        for (VectorAggregationDesc vecAggrDesc : vecAggrDescs) {
                            if (usesVectorUDFAdaptor(vecAggrDesc.getInputExpression())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                        }
                    }
                }
                break;
            case FILESINK:
                {
                    if (!validateFileSinkOperator((FileSinkOperator) op)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf();
                    VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc();
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), fileSinkDesc, vContext, vectorFileSinkDesc);
                    isNative = false;
                }
                break;
            case LIMIT:
                {
                    // No validation.
                    LimitDesc limitDesc = (LimitDesc) op.getConf();
                    VectorLimitDesc vectorLimitDesc = new VectorLimitDesc();
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), limitDesc, vContext, vectorLimitDesc);
                    isNative = true;
                }
                break;
            case EVENT:
                {
                    // No validation.
                    AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf();
                    VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc();
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), eventDesc, vContext, vectorEventDesc);
                    isNative = true;
                }
                break;
            case PTF:
                {
                    // The validatePTFOperator method will update vectorPTFDesc.
                    VectorPTFDesc vectorPTFDesc = new VectorPTFDesc();
                    if (!validatePTFOperator((PTFOperator) op, vContext, vectorPTFDesc)) {
                        throw new VectorizerCannotVectorizeException();
                    }
                    vectorOp = vectorizePTFOperator(op, vContext, vectorPTFDesc);
                    isNative = true;
                }
                break;
            case HASHTABLESINK:
                {
                    // No validation.
                    SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf();
                    VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc();
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext, vectorSparkHashTableSinkDesc);
                    isNative = true;
                }
                break;
            case SPARKPRUNINGSINK:
                {
                    // No validation.
                    SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf();
                    VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc();
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext, vectorSparkPartitionPruningSinkDesc);
                    // need to maintain the unique ID so that target map works can
                    // read the output
                    ((SparkPartitionPruningSinkOperator) vectorOp).setUniqueId(((SparkPartitionPruningSinkOperator) op).getUniqueId());
                    isNative = true;
                }
                break;
            default:
                setOperatorNotSupported(op);
                throw new VectorizerCannotVectorizeException();
        }
    } catch (HiveException e) {
        setOperatorIssue(e.getMessage());
        throw new VectorizerCannotVectorizeException();
    }
    Preconditions.checkState(vectorOp != null);
    if (vectorTaskColumnInfo != null && !isNative) {
        vectorTaskColumnInfo.setAllNative(false);
    }
    LOG.debug("vectorizeOperator " + vectorOp.getClass().getName());
    LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName());
    return vectorOp;
}
Also used : VectorReduceSinkLongOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator) VectorReduceSinkStringOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator) VectorMapJoinInnerBigOnlyMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator) VectorMapJoinLeftSemiMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator) VectorReduceSinkObjectHashOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkObjectHashOperator) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorizationOperator(org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator) VectorMapJoinInnerMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorPTFOperator(org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator) VectorReduceSinkEmptyKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkEmptyKeyOperator) VectorMapJoinInnerStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator) VectorMapJoinOuterLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator) VectorMapJoinLeftSemiStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator) VectorMapJoinLeftSemiLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator) VectorReduceSinkMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator) VectorMapJoinInnerBigOnlyLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator) VectorMapJoinInnerBigOnlyStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator) VectorMapJoinOuterStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator) VectorMapJoinInnerLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator) VectorMapJoinOuterMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator) VectorAppMasterEventDesc(org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc) AppMasterEventDesc(org.apache.hadoop.hive.ql.plan.AppMasterEventDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) VectorSMBJoinDesc(org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc) VectorFileSinkDesc(org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorFileSinkDesc(org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc) VectorReduceSinkInfo(org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo) VectorSparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc) SparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc) VectorAppMasterEventDesc(org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc) ArrayList(java.util.ArrayList) List(java.util.List) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorFilterDesc(org.apache.hadoop.hive.ql.plan.VectorFilterDesc) SparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc) VectorSparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorSparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc) VectorSparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorSMBJoinDesc(org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc) VectorAggregationDesc(org.apache.hadoop.hive.ql.exec.vector.VectorAggregationDesc) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) VectorPTFDesc(org.apache.hadoop.hive.ql.plan.VectorPTFDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 93 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class Vectorizer method doProcessChild.

private Operator<? extends OperatorDesc> doProcessChild(Operator<? extends OperatorDesc> child, Operator<? extends OperatorDesc> vectorParent, boolean isReduce, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws VectorizerCannotVectorizeException {
    // Use vector parent to get VectorizationContext.
    final VectorizationContext vContext;
    if (vectorParent instanceof VectorizationContextRegion) {
        vContext = ((VectorizationContextRegion) vectorParent).getOutputVectorizationContext();
    } else {
        vContext = ((VectorizationOperator) vectorParent).getInputVectorizationContext();
    }
    OperatorDesc desc = child.getConf();
    Operator<? extends OperatorDesc> vectorChild;
    try {
        vectorChild = validateAndVectorizeOperator(child, vContext, isReduce, isTezOrSpark, vectorTaskColumnInfo);
    } catch (HiveException e) {
        String issue = "exception: " + VectorizationContext.getStackTraceAsSingleLine(e);
        setNodeIssue(issue);
        throw new VectorizerCannotVectorizeException();
    }
    return vectorChild;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) VectorizationContextRegion(org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion)

Example 94 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class PartitionPruner method getAllPartsFromCacheOrServer.

private static PrunedPartitionList getAllPartsFromCacheOrServer(Table tab, String key, boolean unknownPartitions, Map<String, PrunedPartitionList> partsCache) throws SemanticException {
    PrunedPartitionList ppList = partsCache == null ? null : partsCache.get(key);
    if (ppList != null) {
        return ppList;
    }
    Set<Partition> parts;
    try {
        parts = getAllPartitions(tab);
    } catch (HiveException e) {
        throw new SemanticException(e);
    }
    ppList = new PrunedPartitionList(tab, key, parts, null, unknownPartitions);
    if (partsCache != null) {
        partsCache.put(key, ppList);
    }
    return ppList;
}
Also used : Partition(org.apache.hadoop.hive.ql.metadata.Partition) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 95 with HiveException

use of org.apache.hadoop.hive.ql.metadata.HiveException in project hive by apache.

the class ImportSemanticAnalyzer method prepareImport.

/**
 * The same code is used from both the "repl load" as well as "import".
 * Given that "repl load" now supports two modes "repl load dbName [location]" and
 * "repl load [location]" in which case the database name has to be taken from the table metadata
 * by default and then over-ridden if something specified on the command line.
 *
 * hence for import to work correctly we have to pass in the sessionState default Db via the
 * parsedDbName parameter
 */
public static boolean prepareImport(boolean isImportCmd, boolean isLocationSet, boolean isExternalSet, boolean isPartSpecSet, boolean waitOnPrecursor, String parsedLocation, String parsedTableName, String overrideDBName, LinkedHashMap<String, String> parsedPartSpec, String fromLocn, EximUtil.SemanticAnalyzerWrapperContext x, UpdatedMetaDataTracker updatedMetadata) throws IOException, MetaException, HiveException, URISyntaxException {
    // initialize load path
    URI fromURI = EximUtil.getValidatedURI(x.getConf(), stripQuotes(fromLocn));
    Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());
    FileSystem fs = FileSystem.get(fromURI, x.getConf());
    x.getInputs().add(toReadEntity(fromPath, x.getConf()));
    MetaData rv;
    try {
        rv = EximUtil.readMetaData(fs, new Path(fromPath, EximUtil.METADATA_NAME));
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
    }
    if (rv.getTable() == null) {
        // nothing to do here, silently return.
        return false;
    }
    ReplicationSpec replicationSpec = rv.getReplicationSpec();
    if (replicationSpec.isNoop()) {
        // nothing to do here, silently return.
        x.getLOG().debug("Current update with ID:{} is noop", replicationSpec.getCurrentReplicationState());
        return false;
    }
    if (isImportCmd) {
        replicationSpec.setReplSpecType(ReplicationSpec.Type.IMPORT);
    }
    String dbname = rv.getTable().getDbName();
    if ((overrideDBName != null) && (!overrideDBName.isEmpty())) {
        // If the parsed statement contained a db.tablename specification, prefer that.
        dbname = overrideDBName;
    }
    // Create table associated with the import
    // Executed if relevant, and used to contain all the other details about the table if not.
    ImportTableDesc tblDesc;
    try {
        tblDesc = getBaseCreateTableDescFromTable(dbname, rv.getTable());
    } catch (Exception e) {
        throw new HiveException(e);
    }
    boolean isSourceMm = AcidUtils.isInsertOnlyTable(tblDesc.getTblProps());
    if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
        tblDesc.setReplicationSpec(replicationSpec);
        StatsSetupConst.setBasicStatsState(tblDesc.getTblProps(), StatsSetupConst.FALSE);
    }
    if (isExternalSet) {
        if (isSourceMm) {
            throw new SemanticException("Cannot import an MM table as external");
        }
        tblDesc.setExternal(isExternalSet);
    // This condition-check could have been avoided, but to honour the old
    // default of not calling if it wasn't set, we retain that behaviour.
    // TODO:cleanup after verification that the outer if isn't really needed here
    }
    if (isLocationSet) {
        tblDesc.setLocation(parsedLocation);
        x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf()));
    }
    if ((parsedTableName != null) && (!parsedTableName.isEmpty())) {
        tblDesc.setTableName(parsedTableName);
    }
    List<AddPartitionDesc> partitionDescs = new ArrayList<AddPartitionDesc>();
    Iterable<Partition> partitions = rv.getPartitions();
    for (Partition partition : partitions) {
        // TODO: this should ideally not create AddPartitionDesc per partition
        AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition);
        if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
            StatsSetupConst.setBasicStatsState(partsDesc.getPartition(0).getPartParams(), StatsSetupConst.FALSE);
        }
        partitionDescs.add(partsDesc);
    }
    if (isPartSpecSet) {
        // The import specification asked for only a particular partition to be loaded
        // We load only that, and ignore all the others.
        boolean found = false;
        for (Iterator<AddPartitionDesc> partnIter = partitionDescs.listIterator(); partnIter.hasNext(); ) {
            AddPartitionDesc addPartitionDesc = partnIter.next();
            if (!found && addPartitionDesc.getPartition(0).getPartSpec().equals(parsedPartSpec)) {
                found = true;
            } else {
                partnIter.remove();
            }
        }
        if (!found) {
            throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(" - Specified partition not found in import directory"));
        }
    }
    if (tblDesc.getTableName() == null) {
        // or from the export dump.
        throw new SemanticException(ErrorMsg.NEED_TABLE_SPECIFICATION.getMsg());
    } else {
        x.getConf().set("import.destination.table", tblDesc.getTableName());
        for (AddPartitionDesc addPartitionDesc : partitionDescs) {
            addPartitionDesc.setTableName(tblDesc.getTableName());
        }
    }
    Warehouse wh = new Warehouse(x.getConf());
    Table table = tableIfExists(tblDesc, x.getHive());
    boolean tableExists = false;
    if (table != null) {
        checkTable(table, tblDesc, replicationSpec, x.getConf());
        x.getLOG().debug("table " + tblDesc.getTableName() + " exists: metadata checked");
        tableExists = true;
    }
    // Initialize with 0 for non-ACID and non-MM tables.
    Long writeId = 0L;
    if (((table != null) && AcidUtils.isTransactionalTable(table)) || AcidUtils.isTablePropertyTransactional(tblDesc.getTblProps())) {
        // Explain plan doesn't open a txn and hence no need to allocate write id.
        if (x.getCtx().getExplainConfig() == null) {
            writeId = SessionState.get().getTxnMgr().getTableWriteId(tblDesc.getDatabaseName(), tblDesc.getTableName());
        }
    }
    int stmtId = 0;
    /*
    if (isAcid(writeId)) {
      tblDesc.setInitialMmWriteId(writeId);
    }
    */
    if (!replicationSpec.isInReplicationScope()) {
        createRegularImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, fromURI, fs, wh, x, writeId, stmtId, isSourceMm);
    } else {
        createReplImportTasks(tblDesc, partitionDescs, replicationSpec, waitOnPrecursor, table, fromURI, fs, wh, x, writeId, stmtId, isSourceMm, updatedMetadata);
    }
    return tableExists;
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Table(org.apache.hadoop.hive.ql.metadata.Table) ImportTableDesc(org.apache.hadoop.hive.ql.plan.ImportTableDesc) ArrayList(java.util.ArrayList) IOException(java.io.IOException) URI(java.net.URI) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) MetaData(org.apache.hadoop.hive.ql.parse.repl.load.MetaData) FileSystem(org.apache.hadoop.fs.FileSystem) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc)

Aggregations

HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)451 IOException (java.io.IOException)172 ArrayList (java.util.ArrayList)81 Path (org.apache.hadoop.fs.Path)68 Table (org.apache.hadoop.hive.ql.metadata.Table)65 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)46 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)45 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)45 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)42 Partition (org.apache.hadoop.hive.ql.metadata.Partition)39 FileSystem (org.apache.hadoop.fs.FileSystem)31 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)29 LinkedHashMap (java.util.LinkedHashMap)28 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)28 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)28 FileNotFoundException (java.io.FileNotFoundException)27 URISyntaxException (java.net.URISyntaxException)27 HashMap (java.util.HashMap)26 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)23 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)23