Search in sources :

Example 1 with CoGroupOperatorBase

use of org.apache.flink.api.common.operators.base.CoGroupOperatorBase in project flink by apache.

the class CoGroupSortTranslationTest method testSortTuplesAndPojos.

@Test
public void testSortTuplesAndPojos() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
        DataSet<TestPoJo> input2 = env.fromElements(new TestPoJo());
        input1.coGroup(input2).where(1).equalTo("b").sortFirstGroup(0, Order.DESCENDING).sortSecondGroup("c", Order.ASCENDING).sortSecondGroup("a", Order.DESCENDING).with(new CoGroupFunction<Tuple2<Long, Long>, TestPoJo, Long>() {

            @Override
            public void coGroup(Iterable<Tuple2<Long, Long>> first, Iterable<TestPoJo> second, Collector<Long> out) {
            }
        }).output(new DiscardingOutputFormat<Long>());
        Plan p = env.createProgramPlan();
        GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
        CoGroupOperatorBase<?, ?, ?, ?> coGroup = (CoGroupOperatorBase<?, ?, ?, ?>) sink.getInput();
        assertNotNull(coGroup.getGroupOrderForInputOne());
        assertNotNull(coGroup.getGroupOrderForInputTwo());
        assertEquals(1, coGroup.getGroupOrderForInputOne().getNumberOfFields());
        assertEquals(0, coGroup.getGroupOrderForInputOne().getFieldNumber(0).intValue());
        assertEquals(Order.DESCENDING, coGroup.getGroupOrderForInputOne().getOrder(0));
        assertEquals(2, coGroup.getGroupOrderForInputTwo().getNumberOfFields());
        assertEquals(2, coGroup.getGroupOrderForInputTwo().getFieldNumber(0).intValue());
        assertEquals(0, coGroup.getGroupOrderForInputTwo().getFieldNumber(1).intValue());
        assertEquals(Order.ASCENDING, coGroup.getGroupOrderForInputTwo().getOrder(0));
        assertEquals(Order.DESCENDING, coGroup.getGroupOrderForInputTwo().getOrder(1));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) CoGroupOperatorBase(org.apache.flink.api.common.operators.base.CoGroupOperatorBase) CoGroupFunction(org.apache.flink.api.common.functions.CoGroupFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Collector(org.apache.flink.util.Collector) Test(org.junit.Test)

Example 2 with CoGroupOperatorBase

use of org.apache.flink.api.common.operators.base.CoGroupOperatorBase in project flink by apache.

the class CoGroupSortTranslationTest method testGroupSortTuples.

@Test
public void testGroupSortTuples() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(0L, 0L));
        DataSet<Tuple3<Long, Long, Long>> input2 = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
        input1.coGroup(input2).where(1).equalTo(2).sortFirstGroup(0, Order.DESCENDING).sortSecondGroup(1, Order.ASCENDING).sortSecondGroup(0, Order.DESCENDING).with(new CoGroupFunction<Tuple2<Long, Long>, Tuple3<Long, Long, Long>, Long>() {

            @Override
            public void coGroup(Iterable<Tuple2<Long, Long>> first, Iterable<Tuple3<Long, Long, Long>> second, Collector<Long> out) {
            }
        }).output(new DiscardingOutputFormat<Long>());
        Plan p = env.createProgramPlan();
        GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
        CoGroupOperatorBase<?, ?, ?, ?> coGroup = (CoGroupOperatorBase<?, ?, ?, ?>) sink.getInput();
        assertNotNull(coGroup.getGroupOrderForInputOne());
        assertNotNull(coGroup.getGroupOrderForInputTwo());
        assertEquals(1, coGroup.getGroupOrderForInputOne().getNumberOfFields());
        assertEquals(0, coGroup.getGroupOrderForInputOne().getFieldNumber(0).intValue());
        assertEquals(Order.DESCENDING, coGroup.getGroupOrderForInputOne().getOrder(0));
        assertEquals(2, coGroup.getGroupOrderForInputTwo().getNumberOfFields());
        assertEquals(1, coGroup.getGroupOrderForInputTwo().getFieldNumber(0).intValue());
        assertEquals(0, coGroup.getGroupOrderForInputTwo().getFieldNumber(1).intValue());
        assertEquals(Order.ASCENDING, coGroup.getGroupOrderForInputTwo().getOrder(0));
        assertEquals(Order.DESCENDING, coGroup.getGroupOrderForInputTwo().getOrder(1));
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) CoGroupOperatorBase(org.apache.flink.api.common.operators.base.CoGroupOperatorBase) CoGroupFunction(org.apache.flink.api.common.functions.CoGroupFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Collector(org.apache.flink.util.Collector) Test(org.junit.Test)

Example 3 with CoGroupOperatorBase

use of org.apache.flink.api.common.operators.base.CoGroupOperatorBase in project flink by apache.

the class CoGroupOperator method translateToDataFlow.

@Override
@Internal
protected org.apache.flink.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?> translateToDataFlow(Operator<I1> input1, Operator<I2> input2) {
    String name = getName() != null ? getName() : "CoGroup at " + defaultName;
    try {
        keys1.areCompatible(keys2);
    } catch (IncompatibleKeysException e) {
        throw new InvalidProgramException("The types of the key fields do not match.", e);
    }
    final org.apache.flink.api.common.operators.base.CoGroupOperatorBase<?, ?, OUT, ?> po;
    if (keys1 instanceof SelectorFunctionKeys && keys2 instanceof SelectorFunctionKeys) {
        @SuppressWarnings("unchecked") SelectorFunctionKeys<I1, ?> selectorKeys1 = (SelectorFunctionKeys<I1, ?>) keys1;
        @SuppressWarnings("unchecked") SelectorFunctionKeys<I2, ?> selectorKeys2 = (SelectorFunctionKeys<I2, ?>) keys2;
        po = translateSelectorFunctionCoGroup(selectorKeys1, selectorKeys2, function, getResultType(), name, input1, input2);
        po.setParallelism(getParallelism());
        po.setCustomPartitioner(customPartitioner);
    } else if (keys2 instanceof SelectorFunctionKeys) {
        int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
        @SuppressWarnings("unchecked") SelectorFunctionKeys<I2, ?> selectorKeys2 = (SelectorFunctionKeys<I2, ?>) keys2;
        po = translateSelectorFunctionCoGroupRight(logicalKeyPositions1, selectorKeys2, function, getInput1Type(), getResultType(), name, input1, input2);
        po.setParallelism(getParallelism());
        po.setCustomPartitioner(customPartitioner);
    } else if (keys1 instanceof SelectorFunctionKeys) {
        @SuppressWarnings("unchecked") SelectorFunctionKeys<I1, ?> selectorKeys1 = (SelectorFunctionKeys<I1, ?>) keys1;
        int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
        po = translateSelectorFunctionCoGroupLeft(selectorKeys1, logicalKeyPositions2, function, getInput2Type(), getResultType(), name, input1, input2);
    } else if (keys1 instanceof Keys.ExpressionKeys && keys2 instanceof Keys.ExpressionKeys) {
        try {
            keys1.areCompatible(keys2);
        } catch (IncompatibleKeysException e) {
            throw new InvalidProgramException("The types of the key fields do not match.", e);
        }
        int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
        int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
        CoGroupOperatorBase<I1, I2, OUT, CoGroupFunction<I1, I2, OUT>> op = new CoGroupOperatorBase<>(function, new BinaryOperatorInformation<>(getInput1Type(), getInput2Type(), getResultType()), logicalKeyPositions1, logicalKeyPositions2, name);
        op.setFirstInput(input1);
        op.setSecondInput(input2);
        po = op;
    } else {
        throw new UnsupportedOperationException("Unrecognized or incompatible key types.");
    }
    // configure shared characteristics
    po.setParallelism(getParallelism());
    po.setCustomPartitioner(customPartitioner);
    if (groupSortKeyOrderFirst.size() > 0) {
        Ordering o = new Ordering();
        for (Pair<Integer, Order> entry : groupSortKeyOrderFirst) {
            o.appendOrdering(entry.getLeft(), null, entry.getRight());
        }
        po.setGroupOrderForInputOne(o);
    }
    if (groupSortKeyOrderSecond.size() > 0) {
        Ordering o = new Ordering();
        for (Pair<Integer, Order> entry : groupSortKeyOrderSecond) {
            o.appendOrdering(entry.getLeft(), null, entry.getRight());
        }
        po.setGroupOrderForInputTwo(o);
    }
    return po;
}
Also used : SelectorFunctionKeys(org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys) CoGroupOperatorBase(org.apache.flink.api.common.operators.base.CoGroupOperatorBase) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Ordering(org.apache.flink.api.common.operators.Ordering) BinaryOperatorInformation(org.apache.flink.api.common.operators.BinaryOperatorInformation) Order(org.apache.flink.api.common.operators.Order) ExpressionKeys(org.apache.flink.api.common.operators.Keys.ExpressionKeys) IncompatibleKeysException(org.apache.flink.api.common.operators.Keys.IncompatibleKeysException) Internal(org.apache.flink.annotation.Internal)

Example 4 with CoGroupOperatorBase

use of org.apache.flink.api.common.operators.base.CoGroupOperatorBase in project flink by apache.

the class GraphCreatingVisitor method preVisit.

@SuppressWarnings("deprecation")
@Override
public boolean preVisit(Operator<?> c) {
    // check if we have been here before
    if (this.con2node.containsKey(c)) {
        return false;
    }
    final OptimizerNode n;
    // create a node for the operator (or sink or source) if we have not been here before
    if (c instanceof GenericDataSinkBase) {
        DataSinkNode dsn = new DataSinkNode((GenericDataSinkBase<?>) c);
        this.sinks.add(dsn);
        n = dsn;
    } else if (c instanceof GenericDataSourceBase) {
        n = new DataSourceNode((GenericDataSourceBase<?, ?>) c);
    } else if (c instanceof MapOperatorBase) {
        n = new MapNode((MapOperatorBase<?, ?, ?>) c);
    } else if (c instanceof MapPartitionOperatorBase) {
        n = new MapPartitionNode((MapPartitionOperatorBase<?, ?, ?>) c);
    } else if (c instanceof FlatMapOperatorBase) {
        n = new FlatMapNode((FlatMapOperatorBase<?, ?, ?>) c);
    } else if (c instanceof FilterOperatorBase) {
        n = new FilterNode((FilterOperatorBase<?, ?>) c);
    } else if (c instanceof ReduceOperatorBase) {
        n = new ReduceNode((ReduceOperatorBase<?, ?>) c);
    } else if (c instanceof GroupCombineOperatorBase) {
        n = new GroupCombineNode((GroupCombineOperatorBase<?, ?, ?>) c);
    } else if (c instanceof GroupReduceOperatorBase) {
        n = new GroupReduceNode((GroupReduceOperatorBase<?, ?, ?>) c);
    } else if (c instanceof InnerJoinOperatorBase) {
        n = new JoinNode((InnerJoinOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof OuterJoinOperatorBase) {
        n = new OuterJoinNode((OuterJoinOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CoGroupOperatorBase) {
        n = new CoGroupNode((CoGroupOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CoGroupRawOperatorBase) {
        n = new CoGroupRawNode((CoGroupRawOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CrossOperatorBase) {
        n = new CrossNode((CrossOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof BulkIterationBase) {
        n = new BulkIterationNode((BulkIterationBase<?>) c);
    } else if (c instanceof DeltaIterationBase) {
        n = new WorksetIterationNode((DeltaIterationBase<?, ?>) c);
    } else if (c instanceof Union) {
        n = new BinaryUnionNode((Union<?>) c);
    } else if (c instanceof PartitionOperatorBase) {
        n = new PartitionNode((PartitionOperatorBase<?>) c);
    } else if (c instanceof SortPartitionOperatorBase) {
        n = new SortPartitionNode((SortPartitionOperatorBase<?>) c);
    } else if (c instanceof BulkIterationBase.PartialSolutionPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final BulkIterationBase.PartialSolutionPlaceHolder<?> holder = (BulkIterationBase.PartialSolutionPlaceHolder<?>) c;
        final BulkIterationBase<?> enclosingIteration = holder.getContainingBulkIteration();
        final BulkIterationNode containingIterationNode = (BulkIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        BulkPartialSolutionNode p = new BulkPartialSolutionNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else if (c instanceof DeltaIterationBase.WorksetPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final DeltaIterationBase.WorksetPlaceHolder<?> holder = (DeltaIterationBase.WorksetPlaceHolder<?>) c;
        final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
        final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        WorksetNode p = new WorksetNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else if (c instanceof DeltaIterationBase.SolutionSetPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final DeltaIterationBase.SolutionSetPlaceHolder<?> holder = (DeltaIterationBase.SolutionSetPlaceHolder<?>) c;
        final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
        final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        SolutionSetNode p = new SolutionSetNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else {
        throw new IllegalArgumentException("Unknown operator type: " + c);
    }
    this.con2node.put(c, n);
    // key-less reducer (all-reduce)
    if (n.getParallelism() < 1) {
        // set the parallelism
        int par = c.getParallelism();
        if (par > 0) {
            if (this.forceParallelism && par != this.defaultParallelism) {
                par = this.defaultParallelism;
                Optimizer.LOG.warn("The parallelism of nested dataflows (such as step functions in iterations) is " + "currently fixed to the parallelism of the surrounding operator (the iteration).");
            }
        } else {
            par = this.defaultParallelism;
        }
        n.setParallelism(par);
    }
    return true;
}
Also used : FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) FilterNode(org.apache.flink.optimizer.dag.FilterNode) CrossOperatorBase(org.apache.flink.api.common.operators.base.CrossOperatorBase) CoGroupOperatorBase(org.apache.flink.api.common.operators.base.CoGroupOperatorBase) BulkPartialSolutionNode(org.apache.flink.optimizer.dag.BulkPartialSolutionNode) DeltaIterationBase(org.apache.flink.api.common.operators.base.DeltaIterationBase) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) SortPartitionOperatorBase(org.apache.flink.api.common.operators.base.SortPartitionOperatorBase) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) JoinNode(org.apache.flink.optimizer.dag.JoinNode) OuterJoinNode(org.apache.flink.optimizer.dag.OuterJoinNode) OuterJoinOperatorBase(org.apache.flink.api.common.operators.base.OuterJoinOperatorBase) FlatMapNode(org.apache.flink.optimizer.dag.FlatMapNode) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) OuterJoinNode(org.apache.flink.optimizer.dag.OuterJoinNode) BulkIterationBase(org.apache.flink.api.common.operators.base.BulkIterationBase) CoGroupRawOperatorBase(org.apache.flink.api.common.operators.base.CoGroupRawOperatorBase) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) WorksetNode(org.apache.flink.optimizer.dag.WorksetNode) CoGroupNode(org.apache.flink.optimizer.dag.CoGroupNode) FlatMapNode(org.apache.flink.optimizer.dag.FlatMapNode) MapNode(org.apache.flink.optimizer.dag.MapNode) GroupCombineNode(org.apache.flink.optimizer.dag.GroupCombineNode) Union(org.apache.flink.api.common.operators.Union) FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) SolutionSetNode(org.apache.flink.optimizer.dag.SolutionSetNode) OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) WorksetIterationNode(org.apache.flink.optimizer.dag.WorksetIterationNode) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) DataSourceNode(org.apache.flink.optimizer.dag.DataSourceNode) SortPartitionNode(org.apache.flink.optimizer.dag.SortPartitionNode) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) PartitionOperatorBase(org.apache.flink.api.common.operators.base.PartitionOperatorBase) SortPartitionOperatorBase(org.apache.flink.api.common.operators.base.SortPartitionOperatorBase) CoGroupRawNode(org.apache.flink.optimizer.dag.CoGroupRawNode) BinaryUnionNode(org.apache.flink.optimizer.dag.BinaryUnionNode) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) PartitionNode(org.apache.flink.optimizer.dag.PartitionNode) SortPartitionNode(org.apache.flink.optimizer.dag.SortPartitionNode) FilterOperatorBase(org.apache.flink.api.common.operators.base.FilterOperatorBase) GroupCombineOperatorBase(org.apache.flink.api.common.operators.base.GroupCombineOperatorBase) BulkIterationNode(org.apache.flink.optimizer.dag.BulkIterationNode) ReduceNode(org.apache.flink.optimizer.dag.ReduceNode) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) GenericDataSourceBase(org.apache.flink.api.common.operators.GenericDataSourceBase) CrossNode(org.apache.flink.optimizer.dag.CrossNode)

Aggregations

CoGroupOperatorBase (org.apache.flink.api.common.operators.base.CoGroupOperatorBase)4 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)2 Plan (org.apache.flink.api.common.Plan)2 CoGroupFunction (org.apache.flink.api.common.functions.CoGroupFunction)2 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)2 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)2 Collector (org.apache.flink.util.Collector)2 Test (org.junit.Test)2 Internal (org.apache.flink.annotation.Internal)1 BinaryOperatorInformation (org.apache.flink.api.common.operators.BinaryOperatorInformation)1 GenericDataSinkBase (org.apache.flink.api.common.operators.GenericDataSinkBase)1 GenericDataSourceBase (org.apache.flink.api.common.operators.GenericDataSourceBase)1 ExpressionKeys (org.apache.flink.api.common.operators.Keys.ExpressionKeys)1 IncompatibleKeysException (org.apache.flink.api.common.operators.Keys.IncompatibleKeysException)1 SelectorFunctionKeys (org.apache.flink.api.common.operators.Keys.SelectorFunctionKeys)1 Order (org.apache.flink.api.common.operators.Order)1 Ordering (org.apache.flink.api.common.operators.Ordering)1 Union (org.apache.flink.api.common.operators.Union)1 BulkIterationBase (org.apache.flink.api.common.operators.base.BulkIterationBase)1 CoGroupRawOperatorBase (org.apache.flink.api.common.operators.base.CoGroupRawOperatorBase)1