Search in sources :

Example 51 with SemanticException

use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.

the class GenMapRedUtils method setMapWork.

/**
 * initialize MapWork
 *
 * @param alias_id
 *          current alias
 * @param topOp
 *          the top operator of the stack
 * @param plan
 *          map work to initialize
 * @param local
 *          whether you need to add to map-reduce or local work
 * @param pList
 *          pruned partition list. If it is null it will be computed on-the-fly.
 * @param inputs
 *          read entities for the map work
 * @param conf
 *          current instance of hive conf
 */
public static void setMapWork(MapWork plan, ParseContext parseCtx, Set<ReadEntity> inputs, PrunedPartitionList partsList, TableScanOperator tsOp, String alias_id, HiveConf conf, boolean local) throws SemanticException {
    ArrayList<Path> partDir = new ArrayList<Path>();
    ArrayList<PartitionDesc> partDesc = new ArrayList<PartitionDesc>();
    boolean isFullAcidTable = false;
    Path tblDir = null;
    plan.setNameToSplitSample(parseCtx.getNameToSplitSample());
    // we also collect table stats while collecting column stats.
    if (parseCtx.getAnalyzeRewrite() != null) {
        plan.setGatheringStats(true);
    }
    if (partsList == null) {
        try {
            partsList = PartitionPruner.prune(tsOp, parseCtx, alias_id);
            isFullAcidTable = tsOp.getConf().isFullAcidTable();
        } catch (SemanticException e) {
            throw e;
        }
    }
    // Generate the map work for this alias_id
    // pass both confirmed and unknown partitions through the map-reduce
    // framework
    Set<Partition> parts = partsList.getPartitions();
    TableDesc tableSpec = Utilities.getTableDesc(tsOp.getConf().getTableMetadata());
    PartitionDesc aliasPartnDesc = null;
    try {
        if (!parts.isEmpty()) {
            aliasPartnDesc = Utilities.getPartitionDesc(parts.iterator().next(), tableSpec);
        }
    } catch (HiveException e) {
        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw new SemanticException(e.getMessage(), e);
    }
    // The table does not have any partitions
    if (aliasPartnDesc == null) {
        aliasPartnDesc = new PartitionDesc(tableSpec, null);
    }
    Map<String, String> props = tsOp.getConf().getOpProps();
    if (props != null) {
        Properties target = aliasPartnDesc.getProperties();
        target.putAll(props);
    }
    plan.getAliasToPartnInfo().put(alias_id, aliasPartnDesc);
    long sizeNeeded = Integer.MAX_VALUE;
    int fileLimit = -1;
    if (parseCtx.getGlobalLimitCtx().isEnable()) {
        if (isFullAcidTable) {
            LOG.info("Skipping Global Limit optimization for an ACID table");
            parseCtx.getGlobalLimitCtx().disableOpt();
        } else {
            long sizePerRow = HiveConf.getLongVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
            sizeNeeded = (parseCtx.getGlobalLimitCtx().getGlobalOffset() + parseCtx.getGlobalLimitCtx().getGlobalLimit()) * sizePerRow;
            // for the optimization that reduce number of input file, we limit number
            // of files allowed. If more than specific number of files have to be
            // selected, we skip this optimization. Since having too many files as
            // inputs can cause unpredictable latency. It's not necessarily to be
            // cheaper.
            fileLimit = HiveConf.getIntVar(parseCtx.getConf(), HiveConf.ConfVars.HIVELIMITOPTLIMITFILE);
            if (sizePerRow <= 0 || fileLimit <= 0) {
                LOG.info("Skip optimization to reduce input size of 'limit'");
                parseCtx.getGlobalLimitCtx().disableOpt();
            } else if (parts.isEmpty()) {
                LOG.info("Empty input: skip limit optimization");
            } else {
                LOG.info("Try to reduce input size for 'limit' " + "sizeNeeded: " + sizeNeeded + "  file limit : " + fileLimit);
            }
        }
    }
    boolean isFirstPart = true;
    boolean emptyInput = true;
    boolean singlePartition = (parts.size() == 1);
    // Track the dependencies for the view. Consider a query like: select * from V;
    // where V is a view of the form: select * from T
    // The dependencies should include V at depth 0, and T at depth 1 (inferred).
    Map<String, ReadEntity> viewToInput = parseCtx.getViewAliasToInput();
    ReadEntity parentViewInfo = PlanUtils.getParentViewInfo(alias_id, viewToInput);
    // The table should also be considered a part of inputs, even if the table is a
    // partitioned table and whether any partition is selected or not
    // This read entity is a direct read entity and not an indirect read (that is when
    // this is being read because it is a dependency of a view).
    boolean isDirectRead = (parentViewInfo == null);
    TableDesc tblDesc = null;
    boolean initTableDesc = false;
    PlanUtils.addPartitionInputs(parts, inputs, parentViewInfo, isDirectRead);
    for (Partition part : parts) {
        // Later the properties have to come from the partition as opposed
        // to from the table in order to support versioning.
        Path[] paths = null;
        SampleDesc sampleDescr = parseCtx.getOpToSamplePruner().get(tsOp);
        // Lookup list bucketing pruner
        Map<String, ExprNodeDesc> partToPruner = parseCtx.getOpToPartToSkewedPruner().get(tsOp);
        ExprNodeDesc listBucketingPruner = (partToPruner != null) ? partToPruner.get(part.getName()) : null;
        if (sampleDescr != null) {
            assert (listBucketingPruner == null) : "Sampling and list bucketing can't coexit.";
            paths = SamplePruner.prune(part, sampleDescr);
            parseCtx.getGlobalLimitCtx().disableOpt();
        } else if (listBucketingPruner != null) {
            assert (sampleDescr == null) : "Sampling and list bucketing can't coexist.";
            /* Use list bucketing prunner's path. */
            paths = ListBucketingPruner.prune(parseCtx, part, listBucketingPruner);
        } else {
            // contain enough size, we change to normal mode.
            if (parseCtx.getGlobalLimitCtx().isEnable()) {
                if (isFirstPart) {
                    long sizeLeft = sizeNeeded;
                    ArrayList<Path> retPathList = new ArrayList<Path>();
                    SamplePruner.LimitPruneRetStatus status = SamplePruner.limitPrune(part, sizeLeft, fileLimit, retPathList);
                    if (status.equals(SamplePruner.LimitPruneRetStatus.NoFile)) {
                        continue;
                    } else if (status.equals(SamplePruner.LimitPruneRetStatus.NotQualify)) {
                        LOG.info("Use full input -- first " + fileLimit + " files are more than " + sizeNeeded + " bytes");
                        parseCtx.getGlobalLimitCtx().disableOpt();
                    } else {
                        emptyInput = false;
                        paths = new Path[retPathList.size()];
                        int index = 0;
                        for (Path path : retPathList) {
                            paths[index++] = path;
                        }
                        if (status.equals(SamplePruner.LimitPruneRetStatus.NeedAllFiles) && singlePartition) {
                            // if all files are needed to meet the size limit, we disable
                            // optimization. It usually happens for empty table/partition or
                            // table/partition with only one file. By disabling this
                            // optimization, we can avoid retrying the query if there is
                            // not sufficient rows.
                            parseCtx.getGlobalLimitCtx().disableOpt();
                        }
                    }
                    isFirstPart = false;
                } else {
                    paths = new Path[0];
                }
            }
            if (!parseCtx.getGlobalLimitCtx().isEnable()) {
                paths = part.getPath();
            }
        }
        // is it a partitioned table ?
        if (!part.getTable().isPartitioned()) {
            assert (tblDir == null);
            tblDir = paths[0];
            if (!initTableDesc) {
                tblDesc = Utilities.getTableDesc(part.getTable());
                initTableDesc = true;
            }
        } else if (tblDesc == null) {
            if (!initTableDesc) {
                tblDesc = Utilities.getTableDesc(part.getTable());
                initTableDesc = true;
            }
        }
        if (props != null) {
            Properties target = tblDesc.getProperties();
            target.putAll(props);
        }
        for (Path p : paths) {
            if (p == null) {
                continue;
            }
            String path = p.toString();
            if (LOG.isDebugEnabled()) {
                LOG.debug("Adding " + path + " of table " + alias_id);
            }
            partDir.add(p);
            try {
                if (part.getTable().isPartitioned()) {
                    partDesc.add(Utilities.getPartitionDesc(part, tblDesc));
                } else {
                    partDesc.add(Utilities.getPartitionDescFromTableDesc(tblDesc, part, false));
                }
            } catch (HiveException e) {
                LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
                throw new SemanticException(e.getMessage(), e);
            }
        }
    }
    if (emptyInput) {
        parseCtx.getGlobalLimitCtx().disableOpt();
    }
    Utilities.addSchemaEvolutionToTableScanOperator(partsList.getSourceTable(), tsOp);
    Iterator<Path> iterPath = partDir.iterator();
    Iterator<PartitionDesc> iterPartnDesc = partDesc.iterator();
    if (!local) {
        while (iterPath.hasNext()) {
            assert iterPartnDesc.hasNext();
            Path path = iterPath.next();
            PartitionDesc prtDesc = iterPartnDesc.next();
            // Add the path to alias mapping
            plan.addPathToAlias(path, alias_id);
            plan.addPathToPartitionInfo(path, prtDesc);
            if (LOG.isDebugEnabled()) {
                LOG.debug("Information added for path " + path);
            }
        }
        assert plan.getAliasToWork().get(alias_id) == null;
        plan.getAliasToWork().put(alias_id, tsOp);
    } else {
        // populate local work if needed
        MapredLocalWork localPlan = plan.getMapRedLocalWork();
        if (localPlan == null) {
            localPlan = new MapredLocalWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(), new LinkedHashMap<String, FetchWork>());
        }
        assert localPlan.getAliasToWork().get(alias_id) == null;
        assert localPlan.getAliasToFetchWork().get(alias_id) == null;
        localPlan.getAliasToWork().put(alias_id, tsOp);
        if (tblDir == null) {
            tblDesc = Utilities.getTableDesc(partsList.getSourceTable());
            localPlan.getAliasToFetchWork().put(alias_id, new FetchWork(partDir, partDesc, tblDesc));
        } else {
            localPlan.getAliasToFetchWork().put(alias_id, new FetchWork(tblDir, tblDesc));
        }
        plan.setMapRedLocalWork(localPlan);
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) Properties(java.util.Properties) LinkedHashMap(java.util.LinkedHashMap) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) SampleDesc(org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Example 52 with SemanticException

use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.

the class SamplePruner method limitPrune.

/**
 * Try to generate a list of subset of files in the partition to reach a size
 * limit with number of files less than fileLimit
 * @param part
 * @param sizeLimit
 * @param fileLimit
 * @param retPathList list of Paths returned
 * @return the result of the attempt
 * @throws SemanticException
 */
public static LimitPruneRetStatus limitPrune(Partition part, long sizeLimit, int fileLimit, Collection<Path> retPathList) throws SemanticException {
    try {
        FileSystem fs = part.getDataLocation().getFileSystem(Hive.get().getConf());
        String pathPattern = part.getDataLocation().toString() + "/*";
        AddPathReturnStatus ret = addPath(fs, pathPattern, sizeLimit, fileLimit, retPathList);
        if (ret == null) {
            return LimitPruneRetStatus.NotQualify;
        } else if (!ret.hasFile) {
            return LimitPruneRetStatus.NoFile;
        } else if (ret.sizeLeft > 0) {
            return LimitPruneRetStatus.NotQualify;
        } else if (ret.allFile) {
            return LimitPruneRetStatus.NeedAllFiles;
        } else {
            return LimitPruneRetStatus.NeedSomeFiles;
        }
    } catch (Exception e) {
        throw new RuntimeException("Cannot get path", e);
    }
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) IOException(java.io.IOException)

Example 53 with SemanticException

use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.

the class SparkMapJoinProcessor method convertMapJoin.

/**
 * convert a regular join to a a map-side join.
 *
 * @param conf
 * @param opParseCtxMap
 * @param op join operator
 * @param joinTree qb join tree
 * @param bigTablePos position of the source to be read as part of
 *                   map-reduce framework. All other sources are cached in memory
 * @param noCheckOuterJoin
 * @param validateMapJoinTree
 */
@Override
public MapJoinOperator convertMapJoin(HiveConf conf, JoinOperator op, boolean leftSrc, String[] baseSrc, List<String> mapAliases, int bigTablePos, boolean noCheckOuterJoin, boolean validateMapJoinTree) throws SemanticException {
    // outer join cannot be performed on a table which is being cached
    JoinCondDesc[] condns = op.getConf().getConds();
    if (!noCheckOuterJoin) {
        if (checkMapJoin(bigTablePos, condns) < 0) {
            throw new SemanticException(ErrorMsg.NO_OUTER_MAPJOIN.getMsg());
        }
    }
    // create the map-join operator
    MapJoinOperator mapJoinOp = convertJoinOpMapJoinOp(conf, op, op.getConf().isLeftInputJoin(), op.getConf().getBaseSrc(), op.getConf().getMapAliases(), bigTablePos, noCheckOuterJoin);
    // 1. remove RS as parent for the big table branch
    // 2. remove old join op from child set of all the RSs
    List<Operator<? extends OperatorDesc>> parentOps = mapJoinOp.getParentOperators();
    for (int i = 0; i < parentOps.size(); i++) {
        Operator<? extends OperatorDesc> parentOp = parentOps.get(i);
        parentOp.getChildOperators().remove(op);
        if (i == bigTablePos) {
            List<Operator<? extends OperatorDesc>> grandParentOps = parentOp.getParentOperators();
            Preconditions.checkArgument(grandParentOps.size() == 1, "AssertionError: expect number of parents to be 1, but was " + grandParentOps.size());
            Operator<? extends OperatorDesc> grandParentOp = grandParentOps.get(0);
            grandParentOp.replaceChild(parentOp, mapJoinOp);
            mapJoinOp.replaceParent(parentOp, grandParentOp);
        }
    }
    return mapJoinOp;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 54 with SemanticException

use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.

the class HiveIntersectRewriteRule method onMatch.

// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
    final HiveIntersect hiveIntersect = call.rel(0);
    final RelOptCluster cluster = hiveIntersect.getCluster();
    final RexBuilder rexBuilder = cluster.getRexBuilder();
    int numOfBranch = hiveIntersect.getInputs().size();
    Builder<RelNode> bldr = new ImmutableList.Builder<RelNode>();
    // 1st level GB: create a GB (col0, col1, count(1) as c) for each branch
    for (int index = 0; index < numOfBranch; index++) {
        RelNode input = hiveIntersect.getInputs().get(index);
        final List<RexNode> gbChildProjLst = Lists.newArrayList();
        final List<Integer> groupSetPositions = Lists.newArrayList();
        for (int cInd = 0; cInd < input.getRowType().getFieldList().size(); cInd++) {
            gbChildProjLst.add(rexBuilder.makeInputRef(input, cInd));
            groupSetPositions.add(cInd);
        }
        gbChildProjLst.add(rexBuilder.makeBigintLiteral(new BigDecimal(1)));
        // create the project before GB because we need a new project with extra column '1'.
        RelNode gbInputRel = null;
        try {
            gbInputRel = HiveProject.create(input, gbChildProjLst, null);
        } catch (CalciteSemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
        // groupSetPosition includes all the positions
        final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
        List<AggregateCall> aggregateCalls = Lists.newArrayList();
        RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
        // count(1), 1's position is input.getRowType().getFieldList().size()
        AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, input.getRowType().getFieldList().size(), aggFnRetType);
        aggregateCalls.add(aggregateCall);
        HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, groupSet, null, aggregateCalls);
        bldr.add(aggregateRel);
    }
    // create a union above all the branches
    HiveRelNode union = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build());
    // 2nd level GB: create a GB (col0, col1, count(c)) for each branch
    final List<Integer> groupSetPositions = Lists.newArrayList();
    // the index of c
    int cInd = union.getRowType().getFieldList().size() - 1;
    for (int index = 0; index < union.getRowType().getFieldList().size(); index++) {
        if (index != cInd) {
            groupSetPositions.add(index);
        }
    }
    List<AggregateCall> aggregateCalls = Lists.newArrayList();
    RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
    AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
    aggregateCalls.add(aggregateCall);
    if (hiveIntersect.all) {
        aggregateCall = HiveCalciteUtil.createSingleArgAggCall("min", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
        aggregateCalls.add(aggregateCall);
    }
    final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
    HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), union, groupSet, null, aggregateCalls);
    // add a filter count(c) = #branches
    int countInd = cInd;
    List<RexNode> childRexNodeLst = new ArrayList<RexNode>();
    RexInputRef ref = rexBuilder.makeInputRef(aggregateRel, countInd);
    RexLiteral literal = rexBuilder.makeBigintLiteral(new BigDecimal(numOfBranch));
    childRexNodeLst.add(ref);
    childRexNodeLst.add(literal);
    ImmutableList.Builder<RelDataType> calciteArgTypesBldr = new ImmutableList.Builder<RelDataType>();
    calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
    calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
    RexNode factoredFilterExpr = null;
    try {
        factoredFilterExpr = rexBuilder.makeCall(SqlFunctionConverter.getCalciteFn("=", calciteArgTypesBldr.build(), TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), true, false), childRexNodeLst);
    } catch (CalciteSemanticException e) {
        LOG.debug(e.toString());
        throw new RuntimeException(e);
    }
    RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), aggregateRel, factoredFilterExpr);
    if (!hiveIntersect.all) {
        // the schema for intersect distinct is like this
        // R3 on all attributes + count(c) as cnt
        // finally add a project to project out the last column
        Set<Integer> projectOutColumnPositions = new HashSet<>();
        projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 1);
        try {
            call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(filterRel, projectOutColumnPositions));
        } catch (CalciteSemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
    } else {
        // the schema for intersect all is like this
        // R3 + count(c) as cnt + min(c) as m
        // we create a input project for udtf whose schema is like this
        // min(c) as m + R3
        List<RexNode> originalInputRefs = Lists.transform(filterRel.getRowType().getFieldList(), new Function<RelDataTypeField, RexNode>() {

            @Override
            public RexNode apply(RelDataTypeField input) {
                return new RexInputRef(input.getIndex(), input.getType());
            }
        });
        List<RexNode> copyInputRefs = new ArrayList<>();
        copyInputRefs.add(originalInputRefs.get(originalInputRefs.size() - 1));
        for (int i = 0; i < originalInputRefs.size() - 2; i++) {
            copyInputRefs.add(originalInputRefs.get(i));
        }
        RelNode srcRel = null;
        try {
            srcRel = HiveProject.create(filterRel, copyInputRefs, null);
            HiveTableFunctionScan udtf = HiveCalciteUtil.createUDTFForSetOp(cluster, srcRel);
            // finally add a project to project out the 1st column
            Set<Integer> projectOutColumnPositions = new HashSet<>();
            projectOutColumnPositions.add(0);
            call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(udtf, projectOutColumnPositions));
        } catch (SemanticException e) {
            LOG.debug(e.toString());
            throw new RuntimeException(e);
        }
    }
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) RexLiteral(org.apache.calcite.rex.RexLiteral) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ImmutableList(com.google.common.collect.ImmutableList) RexBuilder(org.apache.calcite.rex.RexBuilder) Builder(com.google.common.collect.ImmutableList.Builder) HiveTableFunctionScan(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) RexBuilder(org.apache.calcite.rex.RexBuilder) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HashSet(java.util.HashSet) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HiveIntersect(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect) HiveUnion(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion) BigDecimal(java.math.BigDecimal) HiveFilter(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter) AggregateCall(org.apache.calcite.rel.core.AggregateCall) HiveAggregate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) RexInputRef(org.apache.calcite.rex.RexInputRef) RexNode(org.apache.calcite.rex.RexNode)

Example 55 with SemanticException

use of org.apache.hadoop.hive.ql.parse.SemanticException in project hive by apache.

the class ConvertJoinMapJoin method checkAndConvertSMBJoin.

@SuppressWarnings("unchecked")
private Object checkAndConvertSMBJoin(OptimizeTezProcContext context, JoinOperator joinOp, TezBucketJoinProcCtx tezBucketJoinProcCtx, final long maxSize) throws SemanticException {
    // map join either based on the size. Check if we can convert to SMB join.
    if (!(HiveConf.getBoolVar(context.conf, ConfVars.HIVE_AUTO_SORTMERGE_JOIN)) || ((!HiveConf.getBoolVar(context.conf, ConfVars.HIVE_AUTO_SORTMERGE_JOIN_REDUCE)) && joinOp.getOpTraits().getNumReduceSinks() >= 2)) {
        fallbackToReduceSideJoin(joinOp, context, maxSize);
        return null;
    }
    Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
    try {
        String selector = HiveConf.getVar(context.parseContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
        bigTableMatcherClass = JavaUtils.loadClass(selector);
    } catch (ClassNotFoundException e) {
        throw new SemanticException(e.getMessage());
    }
    BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null);
    JoinDesc joinDesc = joinOp.getConf();
    JoinCondDesc[] joinCondns = joinDesc.getConds();
    Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
    if (joinCandidates.isEmpty()) {
        // of any type. So return false.
        return false;
    }
    int mapJoinConversionPos = bigTableMatcher.getBigTablePosition(context.parseContext, joinOp, joinCandidates);
    if (mapJoinConversionPos < 0) {
        // contains aliases from sub-query
        // we are just converting to a common merge join operator. The shuffle
        // join in map-reduce case.
        fallbackToReduceSideJoin(joinOp, context, maxSize);
        return null;
    }
    if (checkConvertJoinSMBJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) {
        convertJoinSMBJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx.getNumBuckets(), true);
    } else {
        // we are just converting to a common merge join operator. The shuffle
        // join in map-reduce case.
        fallbackToReduceSideJoin(joinOp, context, maxSize);
    }
    return null;
}
Also used : MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) CommonMergeJoinDesc(org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)131 ArrayList (java.util.ArrayList)64 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)36 HashMap (java.util.HashMap)30 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)27 Path (org.apache.hadoop.fs.Path)22 IOException (java.io.IOException)20 LinkedHashMap (java.util.LinkedHashMap)19 List (java.util.List)18 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)18 Node (org.apache.hadoop.hive.ql.lib.Node)17 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)17 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)16 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)16 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)16 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)16 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)16 Operator (org.apache.hadoop.hive.ql.exec.Operator)15 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)15 Table (org.apache.hadoop.hive.ql.metadata.Table)14