Search in sources :

Example 1 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class HiveOpConverter method genReduceSinkAndBacktrackSelect.

private static SelectOperator genReduceSinkAndBacktrackSelect(Operator<?> input, ExprNodeDesc[] keys, int tag, ArrayList<ExprNodeDesc> partitionCols, String order, String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf, List<String> keepColNames) throws SemanticException {
    // 1. Generate RS operator
    // 1.1 Prune the tableNames, only count the tableNames that are not empty strings
    // as empty string in table aliases is only allowed for virtual columns.
    String tableAlias = null;
    Set<String> tableNames = input.getSchema().getTableNames();
    for (String tableName : tableNames) {
        if (tableName != null) {
            if (tableName.length() == 0) {
                if (tableAlias == null) {
                    tableAlias = tableName;
                }
            } else {
                if (tableAlias == null || tableAlias.length() == 0) {
                    tableAlias = tableName;
                } else {
                    if (!tableName.equals(tableAlias)) {
                        throw new SemanticException("In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is more than one");
                    }
                }
            }
        }
    }
    if (tableAlias == null) {
        throw new SemanticException("In CBO return path, genReduceSinkAndBacktrackSelect is expecting only one tableAlias but there is none");
    }
    // 1.2 Now generate RS operator
    ReduceSinkOperator rsOp = genReduceSink(input, tableAlias, keys, tag, partitionCols, order, nullOrder, numReducers, acidOperation, hiveConf);
    // 2. Generate backtrack Select operator
    Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSink(keepColNames, rsOp.getConf().getOutputKeyColumnNames(), rsOp.getConf().getOutputValueColumnNames(), rsOp.getValueIndex(), input);
    SelectDesc selectDesc = new SelectDesc(new ArrayList<ExprNodeDesc>(descriptors.values()), new ArrayList<String>(descriptors.keySet()));
    ArrayList<ColumnInfo> cinfoLst = createColInfosSubset(input, keepColNames);
    SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(selectDesc, new RowSchema(cinfoLst), rsOp);
    selectOp.setColumnExprMap(descriptors);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + selectOp + " with row schema: [" + selectOp.getSchema() + "]");
    }
    return selectOp;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 2 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class VectorSelectOperatorBench method setup.

@Setup
public void setup(Blackhole bh) throws HiveException {
    HiveConf hconf = new HiveConf();
    List<String> columns = new ArrayList<String>();
    columns.add("a");
    columns.add("b");
    columns.add("c");
    VectorizationContext vc = new VectorizationContext("name", columns);
    selDesc = new SelectDesc(false);
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc(Long.class, "a", "table", false);
    ExprNodeColumnDesc colDesc2 = new ExprNodeColumnDesc(Long.class, "b", "table", false);
    ExprNodeColumnDesc colDesc3 = new ExprNodeColumnDesc(Long.class, "c", "table", false);
    ExprNodeGenericFuncDesc plusDesc = new ExprNodeGenericFuncDesc();
    GenericUDF gudf = new GenericUDFOPPlus();
    plusDesc.setGenericUDF(gudf);
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(colDesc1);
    children.add(colDesc2);
    plusDesc.setChildren(children);
    plusDesc.setTypeInfo(TypeInfoFactory.longTypeInfo);
    colList.add(plusDesc);
    colList.add(colDesc3);
    selDesc.setColList(colList);
    List<String> outputColNames = new ArrayList<String>();
    outputColNames.add("_col0");
    outputColNames.add("_col1");
    selDesc.setOutputColumnNames(outputColNames);
    VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
    selDesc.setVectorDesc(vectorSelectDesc);
    List<ExprNodeDesc> selectColList = selDesc.getColList();
    VectorExpression[] vectorSelectExprs = new VectorExpression[selectColList.size()];
    for (int i = 0; i < selectColList.size(); i++) {
        ExprNodeDesc expr = selectColList.get(i);
        VectorExpression ve = vc.getVectorExpression(expr);
        vectorSelectExprs[i] = ve;
    }
    vectorSelectDesc.setSelectExpressions(vectorSelectExprs);
    vectorSelectDesc.setProjectedOutputColumns(new int[] { 3, 2 });
    CompilationOpContext opContext = new CompilationOpContext();
    vso = new VectorSelectOperator(opContext, selDesc, vc, vectorSelectDesc);
    // to trigger vectorForward
    child = new ArrayList<>();
    child.add(new BlackholeOperator(opContext, bh));
    child.add(new BlackholeOperator(opContext, bh));
    vso.initialize(hconf, null);
    vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(VectorizedRowBatch.DEFAULT_SIZE, 4, 17);
}
Also used : VectorSelectOperator(org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) BlackholeOperator(org.apache.hive.benchmark.vectorization.BlackholeOperator) GenericUDFOPPlus(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) HiveConf(org.apache.hadoop.hive.conf.HiveConf) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Setup(org.openjdk.jmh.annotations.Setup)

Example 3 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class DynamicPartitionPruningOptimization method generateEventOperatorPlan.

private void generateEventOperatorPlan(DynamicListContext ctx, ParseContext parseContext, TableScanOperator ts, String column, String columnType, ExprNodeDesc predicate) {
    // we will put a fork in the plan at the source of the reduce sink
    Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
    // we need the expr that generated the key of the reduce sink
    ExprNodeDesc key = ctx.getKeyCol();
    // we also need the expr for the partitioned table
    ExprNodeDesc partKey = ctx.parent.getChildren().get(0);
    LOG.debug("key expr: {}; partition key expr: {}", key, partKey);
    List<ExprNodeDesc> keyExprs = new ArrayList<ExprNodeDesc>();
    keyExprs.add(key);
    // group by requires "ArrayList", don't ask.
    ArrayList<String> outputNames = new ArrayList<String>();
    outputNames.add(HiveConf.getColumnInternalName(0));
    ArrayList<ColumnInfo> selectColInfos = new ArrayList<ColumnInfo>();
    selectColInfos.add(new ColumnInfo(outputNames.get(0), key.getTypeInfo(), "", false));
    // project the relevant key column
    SelectDesc select = new SelectDesc(keyExprs, outputNames);
    SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(selectColInfos), parentOfRS);
    Map<String, ExprNodeDesc> selectColumnExprMap = new HashMap<>();
    selectColumnExprMap.put(outputNames.get(0), key);
    selectOp.setColumnExprMap(selectColumnExprMap);
    // do a group by on the list to dedup
    float groupByMemoryUsage = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    float minReductionHashAggr = HiveConf.getFloatVar(parseContext.getConf(), ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
    float minReductionHashAggrLowerBound = HiveConf.getFloatVar(parseContext.getConf(), ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND);
    ArrayList<ExprNodeDesc> groupByExprs = new ArrayList<ExprNodeDesc>();
    ExprNodeDesc groupByExpr = new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), null, false);
    groupByExprs.add(groupByExpr);
    GroupByDesc groupBy = new GroupByDesc(GroupByDesc.Mode.HASH, outputNames, groupByExprs, new ArrayList<AggregationDesc>(), false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, null, false, -1, true);
    ArrayList<ColumnInfo> groupbyColInfos = new ArrayList<ColumnInfo>();
    groupbyColInfos.add(new ColumnInfo(outputNames.get(0), key.getTypeInfo(), "", false));
    GroupByOperator groupByOp = (GroupByOperator) OperatorFactory.getAndMakeChild(groupBy, new RowSchema(groupbyColInfos), selectOp);
    Map<String, ExprNodeDesc> colMap = new HashMap<String, ExprNodeDesc>();
    colMap.put(outputNames.get(0), groupByExpr);
    groupByOp.setColumnExprMap(colMap);
    // finally add the event broadcast operator
    if (HiveConf.getVar(parseContext.getConf(), ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
        DynamicPruningEventDesc eventDesc = new DynamicPruningEventDesc();
        eventDesc.setTableScan(ts);
        eventDesc.setGenerator(ctx.generator);
        eventDesc.setTable(PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(keyExprs, "key")));
        eventDesc.setTargetColumnName(column);
        eventDesc.setTargetColumnType(columnType);
        eventDesc.setPartKey(partKey);
        if (predicate != null) {
            eventDesc.setPredicate(predicate.clone());
        }
        OperatorFactory.getAndMakeChild(eventDesc, groupByOp);
    } else {
        // Must be spark branch
        SparkPartitionPruningSinkDesc desc = new SparkPartitionPruningSinkDesc();
        desc.setTable(PlanUtils.getReduceValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(keyExprs, "key")));
        desc.addTarget(column, columnType, partKey, null, ts);
        SparkPartitionPruningSinkOperator dppSink = (SparkPartitionPruningSinkOperator) OperatorFactory.getAndMakeChild(desc, groupByOp);
        if (HiveConf.getBoolVar(parseContext.getConf(), ConfVars.HIVE_COMBINE_EQUIVALENT_WORK_OPTIMIZATION)) {
            mayReuseExistingDPPSink(parentOfRS, Arrays.asList(selectOp, groupByOp, dppSink));
        }
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) SparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator)

Example 4 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class DynamicPartitionPruningOptimization method generateSemiJoinOperatorPlan.

// Generates plan for min/max when dynamic partition pruning is ruled out.
private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContext parseContext, TableScanOperator ts, String keyBaseAlias, String internalColName, String colName, SemiJoinHint sjHint) throws SemanticException {
    // we will put a fork in the plan at the source of the reduce sink
    Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
    // we need the expr that generated the key of the reduce sink
    ExprNodeDesc key = ctx.getKeyCol();
    assert colName != null;
    // Fetch the TableScan Operator.
    Operator<?> op = parentOfRS;
    while (!(op == null || op instanceof TableScanOperator || op instanceof ReduceSinkOperator)) {
        op = op.getParentOperators().get(0);
    }
    Preconditions.checkNotNull(op);
    if (op instanceof TableScanOperator) {
        Table table = ((TableScanOperator) op).getConf().getTableMetadata();
        if (table.isPartitionKey(colName)) {
            // The column is partition column, skip the optimization.
            return false;
        }
    }
    // Check if there already exists a semijoin branch
    GroupByOperator gb = parseContext.getColExprToGBMap().get(key);
    if (gb != null) {
        // Already an existing semijoin branch, reuse it
        createFinalRsForSemiJoinOp(parseContext, ts, gb, key, keyBaseAlias, ctx.parent.getChildren().get(0), sjHint != null);
        // done!
        return true;
    }
    List<ExprNodeDesc> keyExprs = new ArrayList<ExprNodeDesc>();
    keyExprs.add(key);
    // group by requires "ArrayList", don't ask.
    ArrayList<String> outputNames = new ArrayList<String>();
    // project the relevant key column
    SelectDesc select = new SelectDesc(keyExprs, outputNames);
    // Create the new RowSchema for the projected column
    ColumnInfo columnInfo = parentOfRS.getSchema().getColumnInfo(internalColName);
    columnInfo = new ColumnInfo(columnInfo);
    outputNames.add(internalColName);
    ArrayList<ColumnInfo> signature = new ArrayList<ColumnInfo>();
    signature.add(columnInfo);
    RowSchema rowSchema = new RowSchema(signature);
    // Create the column expr map
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ExprNodeDesc exprNode = null;
    if (columnInfo == null) {
        LOG.debug("No ColumnInfo found in {} for {}", parentOfRS.getOperatorId(), internalColName);
        return false;
    }
    exprNode = new ExprNodeColumnDesc(columnInfo);
    colExprMap.put(internalColName, exprNode);
    // Create the Select Operator
    SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(select, rowSchema, colExprMap, parentOfRS);
    // do a group by to aggregate min,max and bloom filter.
    float groupByMemoryUsage = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(parseContext.getConf(), HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    float minReductionHashAggr = HiveConf.getFloatVar(parseContext.getConf(), ConfVars.HIVEMAPAGGRHASHMINREDUCTION);
    float minReductionHashAggrLowerBound = HiveConf.getFloatVar(parseContext.getConf(), ConfVars.HIVEMAPAGGRHASHMINREDUCTIONLOWERBOUND);
    // Add min/max and bloom filter aggregations
    List<ObjectInspector> aggFnOIs = new ArrayList<ObjectInspector>();
    aggFnOIs.add(key.getWritableObjectInspector());
    ArrayList<ExprNodeDesc> params = new ArrayList<ExprNodeDesc>();
    params.add(new ExprNodeColumnDesc(key.getTypeInfo(), outputNames.get(0), "", false));
    ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
    try {
        AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
        AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
        AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", aggFnOIs, false, false), params, false, Mode.PARTIAL1);
        GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
        bloomFilterEval.setSourceOperator(selectOp);
        if (sjHint != null && sjHint.getNumEntries() > 0) {
            LOG.debug("Setting size for " + keyBaseAlias + " to " + sjHint.getNumEntries() + " based on the hint");
            bloomFilterEval.setHintEntries(sjHint.getNumEntries());
        }
        bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
        bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES));
        bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR));
        bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
        aggs.add(min);
        aggs.add(max);
        aggs.add(bloomFilter);
    } catch (SemanticException e) {
        LOG.error("Error creating min/max aggregations on key", e);
        throw new IllegalStateException("Error creating min/max aggregations on key", e);
    }
    // Create the Group by Operator
    ArrayList<String> gbOutputNames = new ArrayList<String>();
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(0));
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(1));
    gbOutputNames.add(SemanticAnalyzer.getColumnInternalName(2));
    GroupByDesc groupBy = new GroupByDesc(GroupByDesc.Mode.HASH, gbOutputNames, new ArrayList<ExprNodeDesc>(), aggs, false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, null, false, -1, false);
    ArrayList<ColumnInfo> groupbyColInfos = new ArrayList<ColumnInfo>();
    groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(0), key.getTypeInfo(), "", false));
    groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(1), key.getTypeInfo(), "", false));
    groupbyColInfos.add(new ColumnInfo(gbOutputNames.get(2), key.getTypeInfo(), "", false));
    GroupByOperator groupByOp = (GroupByOperator) OperatorFactory.getAndMakeChild(groupBy, new RowSchema(groupbyColInfos), selectOp);
    groupByOp.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
    // Get the column names of the aggregations for reduce sink
    int colPos = 0;
    ArrayList<ExprNodeDesc> rsValueCols = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < aggs.size() - 1; i++) {
        ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(key.getTypeInfo(), gbOutputNames.get(colPos), "", false);
        rsValueCols.add(colExpr);
        columnExprMap.put(gbOutputNames.get(colPos), colExpr);
        colPos++;
    }
    // Bloom Filter uses binary
    ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(TypeInfoFactory.binaryTypeInfo, gbOutputNames.get(colPos), "", false);
    rsValueCols.add(colExpr);
    columnExprMap.put(gbOutputNames.get(colPos), colExpr);
    colPos++;
    // Create the reduce sink operator
    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(new ArrayList<ExprNodeDesc>(), rsValueCols, gbOutputNames, false, -1, 0, 1, Operation.NOT_ACID, NullOrdering.defaultNullOrder(parseContext.getConf()));
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(groupByOp.getSchema()), groupByOp);
    rsOp.setColumnExprMap(columnExprMap);
    rsOp.getConf().setReducerTraits(EnumSet.of(ReduceSinkDesc.ReducerTraits.QUICKSTART));
    // Create the final Group By Operator
    ArrayList<AggregationDesc> aggsFinal = new ArrayList<AggregationDesc>();
    try {
        List<ObjectInspector> minFinalFnOIs = new ArrayList<ObjectInspector>();
        List<ObjectInspector> maxFinalFnOIs = new ArrayList<ObjectInspector>();
        List<ObjectInspector> bloomFilterFinalFnOIs = new ArrayList<ObjectInspector>();
        ArrayList<ExprNodeDesc> minFinalParams = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> maxFinalParams = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> bloomFilterFinalParams = new ArrayList<ExprNodeDesc>();
        // Use the expressions from Reduce Sink.
        minFinalFnOIs.add(rsValueCols.get(0).getWritableObjectInspector());
        maxFinalFnOIs.add(rsValueCols.get(1).getWritableObjectInspector());
        bloomFilterFinalFnOIs.add(rsValueCols.get(2).getWritableObjectInspector());
        // Coming from a ReduceSink the aggregations would be in the form VALUE._col0, VALUE._col1
        minFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(0).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(0), "", false));
        maxFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(1).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(1), "", false));
        bloomFilterFinalParams.add(new ExprNodeColumnDesc(rsValueCols.get(2).getTypeInfo(), Utilities.ReduceField.VALUE + "." + gbOutputNames.get(2), "", false));
        AggregationDesc min = new AggregationDesc("min", FunctionRegistry.getGenericUDAFEvaluator("min", minFinalFnOIs, false, false), minFinalParams, false, Mode.FINAL);
        AggregationDesc max = new AggregationDesc("max", FunctionRegistry.getGenericUDAFEvaluator("max", maxFinalFnOIs, false, false), maxFinalParams, false, Mode.FINAL);
        AggregationDesc bloomFilter = new AggregationDesc("bloom_filter", FunctionRegistry.getGenericUDAFEvaluator("bloom_filter", bloomFilterFinalFnOIs, false, false), bloomFilterFinalParams, false, Mode.FINAL);
        GenericUDAFBloomFilterEvaluator bloomFilterEval = (GenericUDAFBloomFilterEvaluator) bloomFilter.getGenericUDAFEvaluator();
        bloomFilterEval.setSourceOperator(selectOp);
        if (sjHint != null && sjHint.getNumEntries() > 0) {
            bloomFilterEval.setHintEntries(sjHint.getNumEntries());
        }
        bloomFilterEval.setMaxEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES));
        bloomFilterEval.setMinEntries(parseContext.getConf().getLongVar(ConfVars.TEZ_MIN_BLOOM_FILTER_ENTRIES));
        bloomFilterEval.setFactor(parseContext.getConf().getFloatVar(ConfVars.TEZ_BLOOM_FILTER_FACTOR));
        bloomFilter.setGenericUDAFWritableEvaluator(bloomFilterEval);
        aggsFinal.add(min);
        aggsFinal.add(max);
        aggsFinal.add(bloomFilter);
    } catch (SemanticException e) {
        LOG.error("Error creating min/max aggregations on key", e);
        throw new IllegalStateException("Error creating min/max aggregations on key", e);
    }
    GroupByDesc groupByDescFinal = new GroupByDesc(GroupByDesc.Mode.FINAL, gbOutputNames, new ArrayList<ExprNodeDesc>(), aggsFinal, false, groupByMemoryUsage, memoryThreshold, minReductionHashAggr, minReductionHashAggrLowerBound, null, false, 0, false);
    GroupByOperator groupByOpFinal = (GroupByOperator) OperatorFactory.getAndMakeChild(groupByDescFinal, new RowSchema(rsOp.getSchema()), rsOp);
    groupByOpFinal.setColumnExprMap(new HashMap<String, ExprNodeDesc>());
    createFinalRsForSemiJoinOp(parseContext, ts, groupByOpFinal, key, keyBaseAlias, ctx.parent.getChildren().get(0), sjHint != null);
    return true;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) GenericUDAFBloomFilterEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Table(org.apache.hadoop.hive.ql.metadata.Table) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) SemiJoinHint(org.apache.hadoop.hive.ql.parse.SemiJoinHint) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc)

Example 5 with SelectDesc

use of org.apache.hadoop.hive.ql.plan.SelectDesc in project hive by apache.

the class ColumnPrunerProcCtx method handleFilterUnionChildren.

/**
 * If the input filter operator has direct child(ren) which are union operator,
 * and the filter's column is not the same as union's
 * create select operator between them. The select operator has same number of columns as
 * pruned child operator.
 *
 * @param curOp
 *          The filter operator which need to handle children.
 * @throws SemanticException
 */
public void handleFilterUnionChildren(Operator<? extends OperatorDesc> curOp) throws SemanticException {
    if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
        return;
    }
    List<FieldNode> parentPrunList = prunedColLists.get(curOp);
    if (parentPrunList == null || parentPrunList.size() == 0) {
        return;
    }
    List<FieldNode> prunList = null;
    for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
        if (child instanceof UnionOperator) {
            prunList = genColLists(child);
            if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
                continue;
            }
            ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
            ArrayList<String> outputColNames = new ArrayList<String>();
            Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
            ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
            for (ColumnInfo colInfo : child.getSchema().getSignature()) {
                if (lookupColumn(prunList, colInfo.getInternalName()) == null) {
                    continue;
                }
                ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
                exprs.add(colDesc);
                outputColNames.add(colInfo.getInternalName());
                ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
                newCol.setAlias(colInfo.getAlias());
                outputRS.add(newCol);
                colExprMap.put(colInfo.getInternalName(), colDesc);
            }
            SelectDesc select = new SelectDesc(exprs, outputColNames, false);
            curOp.removeChild(child);
            SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), curOp);
            OperatorFactory.makeChild(sel, child);
            sel.setColumnExprMap(colExprMap);
        }
    }
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Aggregations

SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)55 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)50 ArrayList (java.util.ArrayList)43 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)32 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)31 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)30 HashMap (java.util.HashMap)28 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)25 LinkedHashMap (java.util.LinkedHashMap)20 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)16 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)15 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)13 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)13 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)13 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)13 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)13 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)13 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)13 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)12