Search in sources :

Example 21 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class DriverTxnHandler method setValidWriteIds.

private void setValidWriteIds(ValidTxnWriteIdList txnWriteIds) {
    driverContext.getConf().set(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY, txnWriteIds.toString());
    if (driverContext.getPlan().getFetchTask() != null) {
        // This is needed for {@link HiveConf.ConfVars.HIVEFETCHTASKCONVERSION} optimization which initializes JobConf
        // in FetchOperator before recordValidTxns() but this has to be done after locks are acquired to avoid race
        // conditions in ACID. This case is supported only for single source query.
        Operator<?> source = driverContext.getPlan().getFetchTask().getWork().getSource();
        if (source instanceof TableScanOperator) {
            TableScanOperator tsOp = (TableScanOperator) source;
            String fullTableName = AcidUtils.getFullTableName(tsOp.getConf().getDatabaseName(), tsOp.getConf().getTableName());
            ValidWriteIdList writeIdList = txnWriteIds.getTableValidWriteIdList(fullTableName);
            if (tsOp.getConf().isTranscationalTable() && (writeIdList == null)) {
                throw new IllegalStateException(String.format("ACID table: %s is missing from the ValidWriteIdList config: %s", fullTableName, txnWriteIds.toString()));
            }
            if (writeIdList != null) {
                driverContext.getPlan().getFetchTask().setValidWriteIdList(writeIdList.toString());
            }
        }
    }
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList)

Example 22 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class SparkPlanGenerator method generateMapInput.

@SuppressWarnings("unchecked")
private MapInput generateMapInput(SparkPlan sparkPlan, MapWork mapWork) throws Exception {
    JobConf jobConf = cloneJobConf(mapWork);
    Class ifClass = getInputFormat(jobConf, mapWork);
    sc.sc().setCallSite(CallSite.apply(mapWork.getName(), ""));
    JavaPairRDD<WritableComparable, Writable> hadoopRDD;
    if (mapWork.getNumMapTasks() != null) {
        jobConf.setNumMapTasks(mapWork.getNumMapTasks());
        hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class, mapWork.getNumMapTasks());
    } else {
        hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class);
    }
    boolean toCache = false;
    String tables = mapWork.getAllRootOperators().stream().filter(op -> op instanceof TableScanOperator).map(ts -> ((TableScanDesc) ts.getConf()).getAlias()).collect(Collectors.joining(", "));
    String rddName = mapWork.getName() + " (" + tables + ", " + hadoopRDD.getNumPartitions() + (toCache ? ", cached)" : ")");
    // Caching is disabled for MapInput due to HIVE-8920
    MapInput result = new MapInput(sparkPlan, hadoopRDD, toCache, rddName, mapWork);
    return result;
}
Also used : StatsPublisher(org.apache.hadoop.hive.ql.stats.StatsPublisher) FileSystem(org.apache.hadoop.fs.FileSystem) CallSite(org.apache.spark.util.CallSite) LoggerFactory(org.slf4j.LoggerFactory) StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) WritableComparable(org.apache.hadoop.io.WritableComparable) HashMap(java.util.HashMap) Writable(org.apache.hadoop.io.Writable) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) ExecReducer(org.apache.hadoop.hive.ql.exec.mr.ExecReducer) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) BucketizedHiveInputFormat(org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat) Logger(org.slf4j.Logger) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) SessionState(org.apache.hadoop.hive.ql.session.SessionState) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) JavaUtils(org.apache.hadoop.hive.common.JavaUtils) MergeFileWork(org.apache.hadoop.hive.ql.io.merge.MergeFileWork) Operator(org.apache.hadoop.hive.ql.exec.Operator) ExecMapper(org.apache.hadoop.hive.ql.exec.mr.ExecMapper) JobConf(org.apache.hadoop.mapred.JobConf) StatsFactory(org.apache.hadoop.hive.ql.stats.StatsFactory) List(java.util.List) SparkEdgeProperty(org.apache.hadoop.hive.ql.plan.SparkEdgeProperty) MergeFileOutputFormat(org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) Preconditions(com.google.common.base.Preconditions) FileOutputFormat(org.apache.hadoop.mapred.FileOutputFormat) MergeFileMapper(org.apache.hadoop.hive.ql.io.merge.MergeFileMapper) ErrorMsg(org.apache.hadoop.hive.ql.ErrorMsg) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) WritableComparable(org.apache.hadoop.io.WritableComparable) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) Writable(org.apache.hadoop.io.Writable) JobConf(org.apache.hadoop.mapred.JobConf)

Example 23 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class ProjectionPusher method pushProjectionsAndFilters.

private void pushProjectionsAndFilters(final JobConf jobConf, final String splitPath, final String splitPathWithNoSchema) {
    if (mapWork == null) {
        return;
    } else if (mapWork.getPathToAliases() == null) {
        return;
    }
    final Set<String> aliases = new HashSet<String>();
    try {
        List<String> a = HiveFileFormatUtils.getFromPathRecursively(mapWork.getPathToAliases(), new Path(splitPath), null, false, true);
        if (a != null) {
            aliases.addAll(a);
        }
        if (a == null || a.isEmpty()) {
            // TODO: not having aliases for path usually means some bug. Should it give up?
            LOG.warn("Couldn't find aliases for " + splitPath);
        }
    } catch (IllegalArgumentException | IOException e) {
        throw new RuntimeException(e);
    }
    // Collect the needed columns from all the aliases and create ORed filter
    // expression for the table.
    boolean allColumnsNeeded = false;
    boolean noFilters = false;
    Set<Integer> neededColumnIDs = new HashSet<Integer>();
    // To support nested column pruning, we need to track the path from the top to the nested
    // fields
    Set<String> neededNestedColumnPaths = new HashSet<String>();
    List<ExprNodeGenericFuncDesc> filterExprs = new ArrayList<ExprNodeGenericFuncDesc>();
    RowSchema rowSchema = null;
    for (String alias : aliases) {
        final Operator<? extends Serializable> op = mapWork.getAliasToWork().get(alias);
        if (op != null && op instanceof TableScanOperator) {
            final TableScanOperator ts = (TableScanOperator) op;
            if (ts.getNeededColumnIDs() == null) {
                allColumnsNeeded = true;
            } else {
                neededColumnIDs.addAll(ts.getNeededColumnIDs());
                if (ts.getNeededNestedColumnPaths() != null) {
                    neededNestedColumnPaths.addAll(ts.getNeededNestedColumnPaths());
                }
            }
            rowSchema = ts.getSchema();
            ExprNodeGenericFuncDesc filterExpr = ts.getConf() == null ? null : ts.getConf().getFilterExpr();
            // No filter if any TS has no filter expression
            noFilters = filterExpr == null;
            filterExprs.add(filterExpr);
        }
    }
    ExprNodeGenericFuncDesc tableFilterExpr = null;
    if (!noFilters) {
        try {
            for (ExprNodeGenericFuncDesc filterExpr : filterExprs) {
                if (tableFilterExpr == null) {
                    tableFilterExpr = filterExpr;
                } else {
                    tableFilterExpr = ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPOr(), Arrays.<ExprNodeDesc>asList(tableFilterExpr, filterExpr));
                }
            }
        } catch (UDFArgumentException ex) {
            LOG.debug("Turn off filtering due to " + ex);
            tableFilterExpr = null;
        }
    }
    // push down projections
    if (!allColumnsNeeded) {
        if (!neededColumnIDs.isEmpty()) {
            ColumnProjectionUtils.appendReadColumns(jobConf, new ArrayList<Integer>(neededColumnIDs));
            ColumnProjectionUtils.appendNestedColumnPaths(jobConf, new ArrayList<String>(neededNestedColumnPaths));
        }
    } else {
        ColumnProjectionUtils.setReadAllColumns(jobConf);
    }
    pushFilters(jobConf, rowSchema, tableFilterExpr);
}
Also used : Path(org.apache.hadoop.fs.Path) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) IOException(java.io.IOException) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) HashSet(java.util.HashSet)

Example 24 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class ConvertJoinMapJoin method removeCycleCreatingSemiJoinOps.

// Remove any semijoin branch associated with hashjoin's parent's operator
// pipeline which can cause a cycle after hashjoin optimization.
private void removeCycleCreatingSemiJoinOps(MapJoinOperator mapjoinOp, Operator<?> parentSelectOpOfBigTable, ParseContext parseContext) throws SemanticException {
    Map<ReduceSinkOperator, TableScanOperator> semiJoinMap = new HashMap<ReduceSinkOperator, TableScanOperator>();
    for (Operator<?> op : parentSelectOpOfBigTable.getChildOperators()) {
        if (!(op instanceof SelectOperator)) {
            continue;
        }
        while (op.getChildOperators().size() > 0) {
            op = op.getChildOperators().get(0);
        }
        // If not ReduceSink Op, skip
        if (!(op instanceof ReduceSinkOperator)) {
            continue;
        }
        ReduceSinkOperator rs = (ReduceSinkOperator) op;
        TableScanOperator ts = parseContext.getRsToSemiJoinBranchInfo().get(rs).getTsOp();
        if (ts == null) {
            // skip, no semijoin branch
            continue;
        }
        // Found a semijoin branch.
        // There can be more than one semijoin branch coming from the parent
        // GBY Operator of the RS Operator.
        Operator<?> parentGB = op.getParentOperators().get(0);
        for (Operator<?> childRS : parentGB.getChildOperators()) {
            // Get the RS and TS for this branch
            rs = (ReduceSinkOperator) childRS;
            ts = parseContext.getRsToSemiJoinBranchInfo().get(rs).getTsOp();
            assert ts != null;
            for (Operator<?> parent : mapjoinOp.getParentOperators()) {
                if (!(parent instanceof ReduceSinkOperator)) {
                    continue;
                }
                Set<TableScanOperator> tsOps = OperatorUtils.findOperatorsUpstream(parent, TableScanOperator.class);
                boolean found = false;
                for (TableScanOperator parentTS : tsOps) {
                    // If the parent is same as the ts, then we have a cycle.
                    if (ts == parentTS) {
                        semiJoinMap.put(rs, ts);
                        found = true;
                        break;
                    }
                }
                if (found) {
                    break;
                }
            }
        }
    }
    if (semiJoinMap.size() > 0) {
        for (ReduceSinkOperator rs : semiJoinMap.keySet()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Found semijoin optimization from the big table side of a map join, which will cause a task cycle. " + "Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(semiJoinMap.get(rs)));
            }
            GenTezUtils.removeBranch(rs);
            GenTezUtils.removeSemiJoinOperator(parseContext, rs, semiJoinMap.get(rs));
        }
    }
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) HashMap(java.util.HashMap) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)

Example 25 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class DynamicPartitionPruningOptimization method generateSemiJoinOperatorPlan.

// Generates plan for min/max when dynamic partition pruning is ruled out.
private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContext parseContext, TableScanOperator ts, String keyBaseAlias, String internalColName, String colName, SemiJoinHint sjHint) throws SemanticException {
    // we will put a fork in the plan at the source of the reduce sink
    Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
    // we need the expr that generated the key of the reduce sink
    ExprNodeDesc key = ctx.getKeyCol();
    assert colName != null;
    // Fetch the TableScan Operator.
    Operator<?> op = parentOfRS;
    while (!(op == null || op instanceof TableScanOperator || op instanceof ReduceSinkOperator)) {
        op = op.getParentOperators().get(0);
    }
    Preconditions.checkNotNull(op);
    if (op instanceof TableScanOperator) {
        Table table = ((TableScanOperator) op).getConf().getTableMetadata();
        if (table.isPartitionKey(colName)) {
            // The column is partition column, skip the optimization.
            return false;
        }
    }
    // Check if there already exists a semijoin branch
    GroupByOperator gb = parseContext.getColExprToGBMap().get(key);
    if (gb != null) {
        // Already an existing semijoin branch, reuse it
        createFinalRsForSemiJoinOp(parseContext, ts, gb, key, keyBaseAlias, ctx.parent.getChildren().get(0), sjHint != null);
        // done!
        return true;
    }
    List<ExprNodeDesc> keyExprs = new ArrayList<ExprNodeDesc>();
    keyExprs.add(key);
    // group by requires "ArrayList", don't ask.
    ArrayList<String> outputNames = new ArrayList<String>();
    // project the relevant key column
    SelectDesc select = new SelectDesc(keyExprs, outputNames);
    // Create the new RowSchema for the projected column
    ColumnInfo columnInfo = parentOfRS.getSchema().getColumnInfo(internalColName);
    columnInfo = new ColumnInfo(columnInfo);
    outputNames.add(internalColName);
    ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>();
    signature.add(columnInfo);
    RowSchema rowSchema = new RowSchema(signature);
    // Create the column expr map
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ExprNodeDesc exprNode = null;
    if (columnInfo == null) {
        LOG.debug("No ColumnInfo found in {} for {}", parentOfRS.getOperatorId(), internalColName);
        return false;
    }
    exprNode = new ExprNodeColumnDesc(columnInfo);
    colExprMap.put(internalColName, exprNode);
    // Create the Select Operator
    SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(select, rowSchema, colExprMap, parentOfRS);
    // do a group by to aggregate min,max and bloom filter.
    float groupByMemoryUsage = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    float minReductionHashAggr = HiveConf.getFloatVar(parseContext.getConf(), ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
    float minReductionHashAggrLowerBound = HiveConf.getFloatVar(parseContext.getConf(), ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND);
    // Add min/max and bloom filter aggregations
    List<ObjectInspector> aggFnOIs = new ArrayList<ObjectInspector>();
    aggFnOIs.add(key.getWritableObjectInspector());
    ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
    params.add(new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), "", false));
    ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
    try {
        AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
        AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
        AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
        GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
        bloomFilterEval.setSourceOperator(selectOp);
        if (sjHint != null && sjHint.getNumEntries() > 0) {
            LOG.debug("Setting size for " + keyBaseAlias + " to " + sjHint.getNumEntries() + " based on the hint");
            bloomFilterEval.setHintEntries(sjHint.getNumEntries());
        }
        bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
        bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES));
        bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR));
        bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
        aggs.add(min);
        aggs.add(max);
        aggs.add(bloomFilter);
    } catch (SemanticException e) {
        LOG.error("Error creating min/max aggregations on key", e);
        throw new IllegalStateException("Error creating min/max aggregations on key", e);
    }
    // Create the Group by Operator
    ArrayList<String> gbOutputNames = new ArrayList<String>();
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(0));
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(1));
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(2));
    GroupByDesc groupBy = new GroupByDesc(GroupByDesc.Mode.HASH, gbOutputNames, new ArrayList<ExprNodeDesc>(), aggs, false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, null, false, -1, false);
    ArrayList<ColumnInfo> groupbyColInfos = new ArrayList<ColumnInfo>();
    groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(0), key.getTypeInfo(), "", false));
    groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(1), key.getTypeInfo(), "", false));
    groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(2), key.getTypeInfo(), "", false));
    GroupByOperator groupByOp = (GroupByOperator) OperatorFactory.getAndMakeChild(groupBy, new RowSchema(groupbyColInfos), selectOp);
    groupByOp.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
    // Get the column names of the aggregations for reduce sink
    int colPos = 0;
    ArrayList<ExprNodeDesc> rsValueCols = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < aggs.size() - 1; i++) {
        ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(key.getTypeInfo(), gbOutputNames.get(colPos), "", false);
        rsValueCols.add(colExpr);
        columnExprMap.put(gbOutputNames.get(colPos), colExpr);
        colPos++;
    }
    // Bloom Filter uses binary
    ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(TypeInfoFactory.binaryTypeInfo, gbOutputNames.get(colPos), "", false);
    rsValueCols.add(colExpr);
    columnExprMap.put(gbOutputNames.get(colPos), colExpr);
    colPos++;
    // Create the reduce sink operator
    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(new ArrayList<ExprNodeDesc>(), rsValueCols, gbOutputNames, false, -1, 0, 1, Operation.NOT_ACID, NullOrdering.defaultNullOrder(parseContext.getConf()));
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(groupByOp.getSchema()), groupByOp);
    rsOp.setColumnExprMap(columnExprMap);
    rsOp.getConf().setReducerTraits(EnumSet.of(ReduceSinkDesc.ReducerTraits.QUICKSTART));
    // Create the final Group By Operator
    ArrayList<AggregationDesc> aggsFinal = new ArrayList<AggregationDesc>();
    try {
        List<ObjectInspector> minFinalFnOIs = new ArrayList<ObjectInspector>();
        List<ObjectInspector> maxFinalFnOIs = new ArrayList<ObjectInspector>();
        List<ObjectInspector> bloomFilterFinalFnOIs = new ArrayList<ObjectInspector>();
        ArrayList<ExprNodeDesc> minFinalParams = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> maxFinalParams = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> bloomFilterFinalParams = new ArrayList<ExprNodeDesc>();
        // Use the expressions from Reduce Sink.
        minFinalFnOIs.add(rsValueCols.get(0).getWritableObjectInspector());
        maxFinalFnOIs.add(rsValueCols.get(1).getWritableObjectInspector());
        bloomFilterFinalFnOIs.add(rsValueCols.get(2).getWritableObjectInspector());
        // Coming from a ReduceSink the aggregations would be in the form VALUE._col0, VALUE._col1
        minFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(0).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(0), "", false));
        maxFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(1).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(1), "", false));
        bloomFilterFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(2).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(2), "", false));
        AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", minFinalFnOIs, false, false), minFinalParams, false, Mode.FINAL);
        AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", maxFinalFnOIs, false, false), maxFinalParams, false, Mode.FINAL);
        AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", bloomFilterFinalFnOIs, false, false), bloomFilterFinalParams, false, Mode.FINAL);
        GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
        bloomFilterEval.setSourceOperator(selectOp);
        if (sjHint != null && sjHint.getNumEntries() > 0) {
            bloomFilterEval.setHintEntries(sjHint.getNumEntries());
        }
        bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
        bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES));
        bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR));
        bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
        aggsFinal.add(min);
        aggsFinal.add(max);
        aggsFinal.add(bloomFilter);
    } catch (SemanticException e) {
        LOG.error("Error creating min/max aggregations on key", e);
        throw new IllegalStateException("Error creating min/max aggregations on key", e);
    }
    GroupByDesc groupByDescFinal = new GroupByDesc(GroupByDesc.Mode.FINAL, gbOutputNames, new ArrayList<ExprNodeDesc>(), aggsFinal, false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, null, false, 0, false);
    GroupByOperator groupByOpFinal = (GroupByOperator) OperatorFactory.getAndMakeChild(groupByDescFinal, new RowSchema(rsOp.getSchema()), rsOp);
    groupByOpFinal.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
    createFinalRsForSemiJoinOp(parseContext, ts, groupByOpFinal, key, keyBaseAlias, ctx.parent.getChildren().get(0), sjHint != null);
    return true;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) GenericUDAFBloomFilterEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Table(org.apache.hadoop.hive.ql.metadata.Table) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) SemiJoinHint(org.apache.hadoop.hive.ql.parse.SemiJoinHint) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc)

Aggregations

TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)133 Operator (org.apache.hadoop.hive.ql.exec.Operator)52 ArrayList (java.util.ArrayList)47 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)44 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)36 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)35 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)32 HashMap (java.util.HashMap)30 Path (org.apache.hadoop.fs.Path)30 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)29 Table (org.apache.hadoop.hive.ql.metadata.Table)26 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)25 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)24 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)24 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)23 LinkedHashMap (java.util.LinkedHashMap)22 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)22 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)22 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)22 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)21