Examples with GroupReduceOperatorBase - org.apache.flink.api.common.operators.base.GroupReduceOperatorBase

Example 1 with GroupReduceOperatorBase

use of org.apache.flink.api.common.operators.base.GroupReduceOperatorBase in project flink by apache.

the class AggregateOperator method translateToDataFlow.

@SuppressWarnings("unchecked")
@Override
@Internal
protected org.apache.flink.api.common.operators.base.GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> translateToDataFlow(Operator<IN> input) {
    // sanity check
    if (this.aggregationFunctions.isEmpty() || this.aggregationFunctions.size() != this.fields.size()) {
        throw new IllegalStateException();
    }
    // construct the aggregation function
    AggregationFunction<Object>[] aggFunctions = new AggregationFunction[this.aggregationFunctions.size()];
    int[] fields = new int[this.fields.size()];
    StringBuilder genName = new StringBuilder();
    for (int i = 0; i < fields.length; i++) {
        aggFunctions[i] = (AggregationFunction<Object>) this.aggregationFunctions.get(i);
        fields[i] = this.fields.get(i);
        genName.append(aggFunctions[i].toString()).append('(').append(fields[i]).append(')').append(',');
    }
    genName.append(" at ").append(aggregateLocationName);
    genName.setLength(genName.length() - 1);
    @SuppressWarnings("rawtypes") RichGroupReduceFunction<IN, IN> function = new AggregatingUdf(aggFunctions, fields);
    String name = getName() != null ? getName() : genName.toString();
    // distinguish between grouped reduce and non-grouped reduce
    if (this.grouping == null) {
        // non grouped aggregation
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, new int[0], name);
        po.setCombinable(true);
        // set input
        po.setInput(input);
        // set parallelism
        po.setParallelism(this.getParallelism());
        return po;
    }
    if (this.grouping.getKeys() instanceof Keys.ExpressionKeys) {
        // grouped aggregation
        int[] logicalKeyPositions = this.grouping.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombinable(true);
        po.setInput(input);
        po.setParallelism(this.getParallelism());
        po.setCustomPartitioner(grouping.getCustomPartitioner());
        SingleInputSemanticProperties props = new SingleInputSemanticProperties();
        for (int keyField : logicalKeyPositions) {
            boolean keyFieldUsedInAgg = false;
            for (int aggField : fields) {
                if (keyField == aggField) {
                    keyFieldUsedInAgg = true;
                    break;
                }
            }
            if (!keyFieldUsedInAgg) {
                props.addForwardedField(keyField, keyField);
            }
        }
        po.setSemanticProperties(props);
        return po;
    } else if (this.grouping.getKeys() instanceof Keys.SelectorFunctionKeys) {
        throw new UnsupportedOperationException("Aggregate does not support grouping with KeySelector functions, yet.");
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}

Also used : GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) AggregationFunction(org.apache.flink.api.java.aggregation.AggregationFunction) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) Keys(org.apache.flink.api.common.operators.Keys) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) SingleInputSemanticProperties(org.apache.flink.api.common.operators.SingleInputSemanticProperties) Internal(org.apache.flink.annotation.Internal)

Example 2 with GroupReduceOperatorBase

use of org.apache.flink.api.common.operators.base.GroupReduceOperatorBase in project flink by apache.

the class ScalaAggregateOperator method translateToDataFlow.

@SuppressWarnings("unchecked")
@Override
protected org.apache.flink.api.common.operators.base.GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> translateToDataFlow(Operator<IN> input) {
    // sanity check
    if (this.aggregationFunctions.isEmpty() || this.aggregationFunctions.size() != this.fields.size()) {
        throw new IllegalStateException();
    }
    // construct the aggregation function
    AggregationFunction<Object>[] aggFunctions = new AggregationFunction[this.aggregationFunctions.size()];
    int[] fields = new int[this.fields.size()];
    StringBuilder genName = new StringBuilder();
    for (int i = 0; i < fields.length; i++) {
        aggFunctions[i] = (AggregationFunction<Object>) this.aggregationFunctions.get(i);
        fields[i] = this.fields.get(i);
        genName.append(aggFunctions[i].toString()).append('(').append(fields[i]).append(')').append(',');
    }
    genName.setLength(genName.length() - 1);
    @SuppressWarnings("rawtypes") RichGroupReduceFunction<IN, IN> function = new AggregatingUdf(getInputType(), aggFunctions, fields);
    String name = getName() != null ? getName() : genName.toString();
    // distinguish between grouped reduce and non-grouped reduce
    if (this.grouping == null) {
        // non grouped aggregation
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, new int[0], name);
        po.setCombinable(true);
        // set input
        po.setInput(input);
        // set parallelism
        po.setParallelism(this.getParallelism());
        return po;
    }
    if (this.grouping.getKeys() instanceof Keys.ExpressionKeys) {
        // grouped aggregation
        int[] logicalKeyPositions = this.grouping.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GroupReduceFunction<IN, IN>>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombinable(true);
        // set input
        po.setInput(input);
        // set parallelism
        po.setParallelism(this.getParallelism());
        SingleInputSemanticProperties props = new SingleInputSemanticProperties();
        for (int keyField : logicalKeyPositions) {
            boolean keyFieldUsedInAgg = false;
            for (int aggField : fields) {
                if (keyField == aggField) {
                    keyFieldUsedInAgg = true;
                    break;
                }
            }
            if (!keyFieldUsedInAgg) {
                props.addForwardedField(keyField, keyField);
            }
        }
        po.setSemanticProperties(props);
        po.setCustomPartitioner(grouping.getCustomPartitioner());
        return po;
    } else if (this.grouping.getKeys() instanceof Keys.SelectorFunctionKeys) {
        throw new UnsupportedOperationException("Aggregate does not support grouping with KeySelector functions, yet.");
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}

Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) AggregationFunction(org.apache.flink.api.java.aggregation.AggregationFunction) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) Keys(org.apache.flink.api.common.operators.Keys) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) SingleInputSemanticProperties(org.apache.flink.api.common.operators.SingleInputSemanticProperties)

Example 3 with GroupReduceOperatorBase

use of org.apache.flink.api.common.operators.base.GroupReduceOperatorBase in project flink by apache.

the class GroupReduceOperator method translateToDataFlow.

// --------------------------------------------------------------------------------------------
// Translation
// --------------------------------------------------------------------------------------------
@Override
@SuppressWarnings("unchecked")
protected GroupReduceOperatorBase<?, OUT, ?> translateToDataFlow(Operator<IN> input) {
    String name = getName() != null ? getName() : "GroupReduce at " + defaultName;
    // wrap CombineFunction in GroupCombineFunction if combinable
    if (combinable && function instanceof CombineFunction<?, ?>) {
        this.function = function instanceof RichGroupReduceFunction<?, ?> ? new RichCombineToGroupCombineWrapper((RichGroupReduceFunction<?, ?>) function) : new CombineToGroupCombineWrapper((CombineFunction<?, ?>) function);
    }
    // distinguish between grouped reduce and non-grouped reduce
    if (grouper == null) {
        // non grouped reduce
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, OUT, GroupReduceFunction<IN, OUT>> po = new GroupReduceOperatorBase<>(function, operatorInfo, new int[0], name);
        po.setCombinable(combinable);
        po.setInput(input);
        // the parallelism for a non grouped reduce can only be 1
        po.setParallelism(1);
        return po;
    }
    if (grouper.getKeys() instanceof SelectorFunctionKeys) {
        @SuppressWarnings("unchecked") SelectorFunctionKeys<IN, ?> selectorKeys = (SelectorFunctionKeys<IN, ?>) grouper.getKeys();
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouping = (SortedGrouping<IN>) grouper;
            SelectorFunctionKeys<IN, ?> sortKeys = sortedGrouping.getSortSelectionFunctionKey();
            Ordering groupOrder = sortedGrouping.getGroupOrdering();
            PlanUnwrappingSortedReduceGroupOperator<IN, OUT, ?, ?> po = translateSelectorFunctionSortedReducer(selectorKeys, sortKeys, groupOrder, function, getResultType(), name, input, isCombinable());
            po.setParallelism(this.getParallelism());
            po.setCustomPartitioner(grouper.getCustomPartitioner());
            return po;
        } else {
            PlanUnwrappingReduceGroupOperator<IN, OUT, ?> po = translateSelectorFunctionReducer(selectorKeys, function, getResultType(), name, input, isCombinable());
            po.setParallelism(this.getParallelism());
            po.setCustomPartitioner(grouper.getCustomPartitioner());
            return po;
        }
    } else if (grouper.getKeys() instanceof ExpressionKeys) {
        int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
        UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType());
        GroupReduceOperatorBase<IN, OUT, GroupReduceFunction<IN, OUT>> po = new GroupReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name);
        po.setCombinable(combinable);
        po.setInput(input);
        po.setParallelism(getParallelism());
        po.setCustomPartitioner(grouper.getCustomPartitioner());
        // set group order
        if (grouper instanceof SortedGrouping) {
            SortedGrouping<IN> sortedGrouper = (SortedGrouping<IN>) grouper;
            int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions();
            Order[] sortOrders = sortedGrouper.getGroupSortOrders();
            Ordering o = new Ordering();
            for (int i = 0; i < sortKeyPositions.length; i++) {
                o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]);
            }
            po.setGroupOrder(o);
        }
        return po;
    } else {
        throw new UnsupportedOperationException("Unrecognized key type.");
    }
}

Also used : RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) RichGroupReduceFunction(org.apache.flink.api.common.functions.RichGroupReduceFunction) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) ExpressionKeys(org.apache.flink.api.common.operators.Keys.ExpressionKeys) RichCombineToGroupCombineWrapper(org.apache.flink.api.java.operators.translation.RichCombineToGroupCombineWrapper) CombineToGroupCombineWrapper(org.apache.flink.api.java.operators.translation.CombineToGroupCombineWrapper) UnaryOperatorInformation(org.apache.flink.api.common.operators.UnaryOperatorInformation) RichCombineToGroupCombineWrapper(org.apache.flink.api.java.operators.translation.RichCombineToGroupCombineWrapper) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) Ordering(org.apache.flink.api.common.operators.Ordering)

Example 4 with GroupReduceOperatorBase

use of org.apache.flink.api.common.operators.base.GroupReduceOperatorBase in project flink by apache.

the class UnionPropertyPropagationTest method testUnion2.

@Test
public void testUnion2() {
    final int NUM_INPUTS = 4;
    // construct the plan it will be multiple flat maps, all unioned
    // and the "unioned" inputDataSet will be grouped
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<String> source = env.readTextFile(IN_FILE);
    DataSet<Tuple2<String, Integer>> lastUnion = source.flatMap(new DummyFlatMap());
    for (int i = 1; i < NUM_INPUTS; i++) {
        lastUnion = lastUnion.union(source.flatMap(new DummyFlatMap()));
    }
    DataSet<Tuple2<String, Integer>> result = lastUnion.groupBy(0).aggregate(Aggregations.SUM, 1);
    result.writeAsText(OUT_FILE);
    // return the plan
    Plan plan = env.createProgramPlan("Test union on new java-api");
    OptimizedPlan oPlan = compileNoStats(plan);
    JobGraphGenerator jobGen = new JobGraphGenerator();
    // Compile plan to verify that no error is thrown
    jobGen.compileJobGraph(oPlan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            /* Test on the union output connections
                         * It must be under the GroupOperator and the strategy should be forward
                         */
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) {
                final Channel inConn = ((SingleInputPlanNode) visitable).getInput();
                Assert.assertTrue("Union should just forward the Partitioning", inConn.getShipStrategy() == ShipStrategyType.FORWARD);
                Assert.assertTrue("Union Node should be under Group operator", inConn.getSource() instanceof NAryUnionPlanNode);
            }
            /* Test on the union input connections
                         * Must be NUM_INPUTS input connections, all FlatMapOperators with a own partitioning strategy (probably hash)
                         */
            if (visitable instanceof NAryUnionPlanNode) {
                int numberInputs = 0;
                for (Iterator<Channel> inputs = visitable.getInputs().iterator(); inputs.hasNext(); numberInputs++) {
                    final Channel inConn = inputs.next();
                    PlanNode inNode = inConn.getSource();
                    Assert.assertTrue("Input of Union should be FlatMapOperators", inNode.getProgramOperator() instanceof FlatMapOperatorBase);
                    Assert.assertTrue("Shipment strategy under union should partition the data", inConn.getShipStrategy() == ShipStrategyType.PARTITION_HASH);
                }
                Assert.assertTrue("NAryUnion should have " + NUM_INPUTS + " inputs", numberInputs == NUM_INPUTS);
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        // DO NOTHING
        }
    });
}

Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) Iterator(java.util.Iterator) Test(org.junit.Test)

Example 5 with GroupReduceOperatorBase

use of org.apache.flink.api.common.operators.base.GroupReduceOperatorBase in project flink by apache.

the class UnionPropertyPropagationTest method testUnion1.

@Test
public void testUnion1() {
    // construct the plan
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    DataSet<Long> sourceA = env.generateSequence(0, 1);
    DataSet<Long> sourceB = env.generateSequence(0, 1);
    DataSet<Long> redA = sourceA.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>());
    DataSet<Long> redB = sourceB.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>());
    redA.union(redB).groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).output(new DiscardingOutputFormat<Long>());
    Plan plan = env.createProgramPlan();
    OptimizedPlan oPlan = compileNoStats(plan);
    JobGraphGenerator jobGen = new JobGraphGenerator();
    // Compile plan to verify that no error is thrown
    jobGen.compileJobGraph(oPlan);
    oPlan.accept(new Visitor<PlanNode>() {

        @Override
        public boolean preVisit(PlanNode visitable) {
            if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) {
                for (Channel inConn : visitable.getInputs()) {
                    Assert.assertTrue("Reduce should just forward the input if it is already partitioned", inConn.getShipStrategy() == ShipStrategyType.FORWARD);
                }
                // just check latest ReduceNode
                return false;
            }
            return true;
        }

        @Override
        public void postVisit(PlanNode visitable) {
        // DO NOTHING
        }
    });
}

Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Channel(org.apache.flink.optimizer.plan.Channel) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) PlanNode(org.apache.flink.optimizer.plan.PlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) NAryUnionPlanNode(org.apache.flink.optimizer.plan.NAryUnionPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) IdentityGroupReducer(org.apache.flink.optimizer.testfunctions.IdentityGroupReducer) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) Test(org.junit.Test)

Aggregations

GroupReduceOperatorBase (org.apache.flink.api.common.operators.base.GroupReduceOperatorBase)9 UnaryOperatorInformation (org.apache.flink.api.common.operators.UnaryOperatorInformation)4 PlanNode (org.apache.flink.optimizer.plan.PlanNode)4 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)4 Plan (org.apache.flink.api.common.Plan)3 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)3 RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)3 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)3 Channel (org.apache.flink.optimizer.plan.Channel)3 NAryUnionPlanNode (org.apache.flink.optimizer.plan.NAryUnionPlanNode)3 Test (org.junit.Test)3 Keys (org.apache.flink.api.common.operators.Keys)2 Ordering (org.apache.flink.api.common.operators.Ordering)2 SingleInputSemanticProperties (org.apache.flink.api.common.operators.SingleInputSemanticProperties)2 FlatMapOperatorBase (org.apache.flink.api.common.operators.base.FlatMapOperatorBase)2 MapOperatorBase (org.apache.flink.api.common.operators.base.MapOperatorBase)2 MapPartitionOperatorBase (org.apache.flink.api.common.operators.base.MapPartitionOperatorBase)2 AggregationFunction (org.apache.flink.api.java.aggregation.AggregationFunction)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 GroupReduceNode (org.apache.flink.optimizer.dag.GroupReduceNode)2