Search in sources :

Example 1 with GenericDataSinkBase

use of org.apache.flink.api.common.operators.GenericDataSinkBase in project flink by apache.

the class DeltaIterationTranslationTest method testCorrectTranslation.

@Test
public void testCorrectTranslation() {
    try {
        final String jobName = "Test JobName";
        final String iterationName = "Test Name";
        final String beforeNextWorksetMap = "Some Mapper";
        final String aggregatorName = "AggregatorName";
        final int[] iterationKeys = new int[] { 2 };
        final int numIterations = 13;
        final int defaultParallelism = 133;
        final int iterationParallelism = 77;
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        // ------------ construct the test program ------------------
        {
            env.setParallelism(defaultParallelism);
            @SuppressWarnings("unchecked") DataSet<Tuple3<Double, Long, String>> initialSolutionSet = env.fromElements(new Tuple3<Double, Long, String>(3.44, 5L, "abc"));
            @SuppressWarnings("unchecked") DataSet<Tuple2<Double, String>> initialWorkSet = env.fromElements(new Tuple2<Double, String>(1.23, "abc"));
            DeltaIteration<Tuple3<Double, Long, String>, Tuple2<Double, String>> iteration = initialSolutionSet.iterateDelta(initialWorkSet, numIterations, iterationKeys);
            iteration.name(iterationName).parallelism(iterationParallelism);
            iteration.registerAggregator(aggregatorName, new LongSumAggregator());
            // test that multiple workset consumers are supported
            DataSet<Tuple2<Double, String>> worksetSelfJoin = iteration.getWorkset().map(new IdentityMapper<Tuple2<Double, String>>()).join(iteration.getWorkset()).where(1).equalTo(1).projectFirst(0, 1);
            DataSet<Tuple3<Double, Long, String>> joined = worksetSelfJoin.join(iteration.getSolutionSet()).where(1).equalTo(2).with(new SolutionWorksetJoin());
            DataSet<Tuple3<Double, Long, String>> result = iteration.closeWith(joined, joined.map(new NextWorksetMapper()).name(beforeNextWorksetMap));
            result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
            result.writeAsText("/dev/null");
        }
        Plan p = env.createProgramPlan(jobName);
        // ------------- validate the plan ----------------
        assertEquals(jobName, p.getJobName());
        assertEquals(defaultParallelism, p.getDefaultParallelism());
        // validate the iteration
        GenericDataSinkBase<?> sink1, sink2;
        {
            Iterator<? extends GenericDataSinkBase<?>> sinks = p.getDataSinks().iterator();
            sink1 = sinks.next();
            sink2 = sinks.next();
        }
        DeltaIterationBase<?, ?> iteration = (DeltaIterationBase<?, ?>) sink1.getInput();
        // check that multi consumer translation works for iterations
        assertEquals(iteration, sink2.getInput());
        // check the basic iteration properties
        assertEquals(numIterations, iteration.getMaximumNumberOfIterations());
        assertArrayEquals(iterationKeys, iteration.getSolutionSetKeyFields());
        assertEquals(iterationParallelism, iteration.getParallelism());
        assertEquals(iterationName, iteration.getName());
        MapOperatorBase<?, ?, ?> nextWorksetMapper = (MapOperatorBase<?, ?, ?>) iteration.getNextWorkset();
        InnerJoinOperatorBase<?, ?, ?, ?> solutionSetJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) iteration.getSolutionSetDelta();
        InnerJoinOperatorBase<?, ?, ?, ?> worksetSelfJoin = (InnerJoinOperatorBase<?, ?, ?, ?>) solutionSetJoin.getFirstInput();
        MapOperatorBase<?, ?, ?> worksetMapper = (MapOperatorBase<?, ?, ?>) worksetSelfJoin.getFirstInput();
        assertEquals(IdentityMapper.class, worksetMapper.getUserCodeWrapper().getUserCodeClass());
        assertEquals(NextWorksetMapper.class, nextWorksetMapper.getUserCodeWrapper().getUserCodeClass());
        if (solutionSetJoin.getUserCodeWrapper().getUserCodeObject() instanceof WrappingFunction) {
            WrappingFunction<?> wf = (WrappingFunction<?>) solutionSetJoin.getUserCodeWrapper().getUserCodeObject();
            assertEquals(SolutionWorksetJoin.class, wf.getWrappedFunction().getClass());
        } else {
            assertEquals(SolutionWorksetJoin.class, solutionSetJoin.getUserCodeWrapper().getUserCodeClass());
        }
        assertEquals(beforeNextWorksetMap, nextWorksetMapper.getName());
        assertEquals(aggregatorName, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) DataSet(org.apache.flink.api.java.DataSet) LongSumAggregator(org.apache.flink.api.common.aggregators.LongSumAggregator) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) Iterator(java.util.Iterator) DeltaIterationBase(org.apache.flink.api.common.operators.base.DeltaIterationBase) DeltaIteration(org.apache.flink.api.java.operators.DeltaIteration) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) Plan(org.apache.flink.api.common.Plan) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) Test(org.junit.Test)

Example 2 with GenericDataSinkBase

use of org.apache.flink.api.common.operators.GenericDataSinkBase in project flink by apache.

the class OperatorTranslation method translateToPlan.

public Plan translateToPlan(List<DataSink<?>> sinks, String jobName) {
    List<GenericDataSinkBase<?>> planSinks = new ArrayList<>();
    for (DataSink<?> sink : sinks) {
        planSinks.add(translate(sink));
    }
    Plan p = new Plan(planSinks);
    p.setJobName(jobName);
    return p;
}
Also used : GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) ArrayList(java.util.ArrayList) Plan(org.apache.flink.api.common.Plan)

Example 3 with GenericDataSinkBase

use of org.apache.flink.api.common.operators.GenericDataSinkBase in project flink by apache.

the class UnionTranslationTest method translateUnion2Group.

@Test
public void translateUnion2Group() {
    try {
        final int parallelism = 4;
        ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
        DataSet<Tuple3<Double, StringValue, LongValue>> dataset1 = getSourceDataSet(env, 3);
        DataSet<Tuple3<Double, StringValue, LongValue>> dataset2 = getSourceDataSet(env, 2);
        dataset1.union(dataset2).groupBy((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "").reduceGroup((GroupReduceFunction<Tuple3<Double, StringValue, LongValue>, String>) (values, out) -> {
        }).returns(String.class).output(new DiscardingOutputFormat<>());
        Plan p = env.createProgramPlan();
        // The plan should look like the following one.
        // 
        // DataSet1(3) - MapOperator(3)-+
        // |- Union(-1) - SingleInputOperator - Sink
        // DataSet2(2) - MapOperator(2)-+
        GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
        Union unionOperator = (Union) ((SingleInputOperator) sink.getInput()).getInput();
        // The key mappers should be added to both of the two input streams for union.
        assertTrue(unionOperator.getFirstInput() instanceof MapOperatorBase<?, ?, ?>);
        assertTrue(unionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>);
        // The parallelisms of the key mappers should be equal to those of their inputs.
        assertEquals(unionOperator.getFirstInput().getParallelism(), 3);
        assertEquals(unionOperator.getSecondInput().getParallelism(), 2);
        // The union should always have the default parallelism.
        assertEquals(unionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test caused an error: " + e.getMessage());
    }
}
Also used : KeySelector(org.apache.flink.api.java.functions.KeySelector) Tuple3(org.apache.flink.api.java.tuple.Tuple3) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) LongValue(org.apache.flink.types.LongValue) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) Union(org.apache.flink.api.common.operators.Union) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) SingleInputOperator(org.apache.flink.api.common.operators.SingleInputOperator) DataSet(org.apache.flink.api.java.DataSet) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) StringValue(org.apache.flink.types.StringValue) GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Plan(org.apache.flink.api.common.Plan) Assert.fail(org.junit.Assert.fail) Order(org.apache.flink.api.common.operators.Order) Assert.assertEquals(org.junit.Assert.assertEquals) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) Plan(org.apache.flink.api.common.Plan) Union(org.apache.flink.api.common.operators.Union) Tuple3(org.apache.flink.api.java.tuple.Tuple3) LongValue(org.apache.flink.types.LongValue) StringValue(org.apache.flink.types.StringValue) Test(org.junit.Test)

Example 4 with GenericDataSinkBase

use of org.apache.flink.api.common.operators.GenericDataSinkBase in project flink by apache.

the class DataSink method translateToDataFlow.

// --------------------------------------------------------------------------------------------
protected GenericDataSinkBase<T> translateToDataFlow(Operator<T> input) {
    // select the name (or create a default one)
    String name = this.name != null ? this.name : this.format.toString();
    GenericDataSinkBase<T> sink = new GenericDataSinkBase<>(this.format, new UnaryOperatorInformation<>(this.type, new NothingTypeInfo()), name);
    // set input
    sink.setInput(input);
    // set parameters
    if (this.parameters != null) {
        sink.getParameters().addAll(this.parameters);
    }
    // set parallelism
    if (this.parallelism > 0) {
        // use specified parallelism
        sink.setParallelism(this.parallelism);
    } else {
        // if no parallelism has been specified, use parallelism of input operator to enable
        // chaining
        sink.setParallelism(input.getParallelism());
    }
    if (this.sortKeyPositions != null) {
        // configure output sorting
        Ordering ordering = new Ordering();
        for (int i = 0; i < this.sortKeyPositions.length; i++) {
            ordering.appendOrdering(this.sortKeyPositions[i], null, this.sortOrders[i]);
        }
        sink.setLocalOrder(ordering);
    }
    return sink;
}
Also used : GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) Ordering(org.apache.flink.api.common.operators.Ordering) NothingTypeInfo(org.apache.flink.api.common.typeinfo.NothingTypeInfo)

Example 5 with GenericDataSinkBase

use of org.apache.flink.api.common.operators.GenericDataSinkBase in project flink by apache.

the class GraphCreatingVisitor method preVisit.

@SuppressWarnings("deprecation")
@Override
public boolean preVisit(Operator<?> c) {
    // check if we have been here before
    if (this.con2node.containsKey(c)) {
        return false;
    }
    final OptimizerNode n;
    // create a node for the operator (or sink or source) if we have not been here before
    if (c instanceof GenericDataSinkBase) {
        DataSinkNode dsn = new DataSinkNode((GenericDataSinkBase<?>) c);
        this.sinks.add(dsn);
        n = dsn;
    } else if (c instanceof GenericDataSourceBase) {
        n = new DataSourceNode((GenericDataSourceBase<?, ?>) c);
    } else if (c instanceof MapOperatorBase) {
        n = new MapNode((MapOperatorBase<?, ?, ?>) c);
    } else if (c instanceof MapPartitionOperatorBase) {
        n = new MapPartitionNode((MapPartitionOperatorBase<?, ?, ?>) c);
    } else if (c instanceof FlatMapOperatorBase) {
        n = new FlatMapNode((FlatMapOperatorBase<?, ?, ?>) c);
    } else if (c instanceof FilterOperatorBase) {
        n = new FilterNode((FilterOperatorBase<?, ?>) c);
    } else if (c instanceof ReduceOperatorBase) {
        n = new ReduceNode((ReduceOperatorBase<?, ?>) c);
    } else if (c instanceof GroupCombineOperatorBase) {
        n = new GroupCombineNode((GroupCombineOperatorBase<?, ?, ?>) c);
    } else if (c instanceof GroupReduceOperatorBase) {
        n = new GroupReduceNode((GroupReduceOperatorBase<?, ?, ?>) c);
    } else if (c instanceof InnerJoinOperatorBase) {
        n = new JoinNode((InnerJoinOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof OuterJoinOperatorBase) {
        n = new OuterJoinNode((OuterJoinOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CoGroupOperatorBase) {
        n = new CoGroupNode((CoGroupOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CoGroupRawOperatorBase) {
        n = new CoGroupRawNode((CoGroupRawOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof CrossOperatorBase) {
        n = new CrossNode((CrossOperatorBase<?, ?, ?, ?>) c);
    } else if (c instanceof BulkIterationBase) {
        n = new BulkIterationNode((BulkIterationBase<?>) c);
    } else if (c instanceof DeltaIterationBase) {
        n = new WorksetIterationNode((DeltaIterationBase<?, ?>) c);
    } else if (c instanceof Union) {
        n = new BinaryUnionNode((Union<?>) c);
    } else if (c instanceof PartitionOperatorBase) {
        n = new PartitionNode((PartitionOperatorBase<?>) c);
    } else if (c instanceof SortPartitionOperatorBase) {
        n = new SortPartitionNode((SortPartitionOperatorBase<?>) c);
    } else if (c instanceof BulkIterationBase.PartialSolutionPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final BulkIterationBase.PartialSolutionPlaceHolder<?> holder = (BulkIterationBase.PartialSolutionPlaceHolder<?>) c;
        final BulkIterationBase<?> enclosingIteration = holder.getContainingBulkIteration();
        final BulkIterationNode containingIterationNode = (BulkIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        BulkPartialSolutionNode p = new BulkPartialSolutionNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else if (c instanceof DeltaIterationBase.WorksetPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final DeltaIterationBase.WorksetPlaceHolder<?> holder = (DeltaIterationBase.WorksetPlaceHolder<?>) c;
        final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
        final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        WorksetNode p = new WorksetNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else if (c instanceof DeltaIterationBase.SolutionSetPlaceHolder) {
        if (this.parent == null) {
            throw new InvalidProgramException("It is currently not supported to create data sinks inside iterations.");
        }
        final DeltaIterationBase.SolutionSetPlaceHolder<?> holder = (DeltaIterationBase.SolutionSetPlaceHolder<?>) c;
        final DeltaIterationBase<?, ?> enclosingIteration = holder.getContainingWorksetIteration();
        final WorksetIterationNode containingIterationNode = (WorksetIterationNode) this.parent.con2node.get(enclosingIteration);
        // catch this for the recursive translation of step functions
        SolutionSetNode p = new SolutionSetNode(holder, containingIterationNode);
        p.setParallelism(containingIterationNode.getParallelism());
        n = p;
    } else {
        throw new IllegalArgumentException("Unknown operator type: " + c);
    }
    this.con2node.put(c, n);
    // key-less reducer (all-reduce)
    if (n.getParallelism() < 1) {
        // set the parallelism
        int par = c.getParallelism();
        if (n instanceof BinaryUnionNode) {
            // Keep parallelism of union undefined for now.
            // It will be determined based on the parallelism of its successor.
            par = -1;
        } else if (par > 0) {
            if (this.forceParallelism && par != this.defaultParallelism) {
                par = this.defaultParallelism;
                Optimizer.LOG.warn("The parallelism of nested dataflows (such as step functions in iterations) is " + "currently fixed to the parallelism of the surrounding operator (the iteration).");
            }
        } else {
            par = this.defaultParallelism;
        }
        n.setParallelism(par);
    }
    return true;
}
Also used : FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) ReduceOperatorBase(org.apache.flink.api.common.operators.base.ReduceOperatorBase) FilterNode(org.apache.flink.optimizer.dag.FilterNode) CrossOperatorBase(org.apache.flink.api.common.operators.base.CrossOperatorBase) CoGroupOperatorBase(org.apache.flink.api.common.operators.base.CoGroupOperatorBase) BulkPartialSolutionNode(org.apache.flink.optimizer.dag.BulkPartialSolutionNode) DeltaIterationBase(org.apache.flink.api.common.operators.base.DeltaIterationBase) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) SortPartitionOperatorBase(org.apache.flink.api.common.operators.base.SortPartitionOperatorBase) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) DataSinkNode(org.apache.flink.optimizer.dag.DataSinkNode) JoinNode(org.apache.flink.optimizer.dag.JoinNode) OuterJoinNode(org.apache.flink.optimizer.dag.OuterJoinNode) OuterJoinOperatorBase(org.apache.flink.api.common.operators.base.OuterJoinOperatorBase) FlatMapNode(org.apache.flink.optimizer.dag.FlatMapNode) GroupReduceOperatorBase(org.apache.flink.api.common.operators.base.GroupReduceOperatorBase) OuterJoinNode(org.apache.flink.optimizer.dag.OuterJoinNode) BulkIterationBase(org.apache.flink.api.common.operators.base.BulkIterationBase) CoGroupRawOperatorBase(org.apache.flink.api.common.operators.base.CoGroupRawOperatorBase) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) WorksetNode(org.apache.flink.optimizer.dag.WorksetNode) CoGroupNode(org.apache.flink.optimizer.dag.CoGroupNode) FlatMapNode(org.apache.flink.optimizer.dag.FlatMapNode) MapNode(org.apache.flink.optimizer.dag.MapNode) GroupCombineNode(org.apache.flink.optimizer.dag.GroupCombineNode) Union(org.apache.flink.api.common.operators.Union) FlatMapOperatorBase(org.apache.flink.api.common.operators.base.FlatMapOperatorBase) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) SolutionSetNode(org.apache.flink.optimizer.dag.SolutionSetNode) OptimizerNode(org.apache.flink.optimizer.dag.OptimizerNode) WorksetIterationNode(org.apache.flink.optimizer.dag.WorksetIterationNode) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) InnerJoinOperatorBase(org.apache.flink.api.common.operators.base.InnerJoinOperatorBase) DataSourceNode(org.apache.flink.optimizer.dag.DataSourceNode) SortPartitionNode(org.apache.flink.optimizer.dag.SortPartitionNode) MapPartitionOperatorBase(org.apache.flink.api.common.operators.base.MapPartitionOperatorBase) PartitionOperatorBase(org.apache.flink.api.common.operators.base.PartitionOperatorBase) SortPartitionOperatorBase(org.apache.flink.api.common.operators.base.SortPartitionOperatorBase) CoGroupRawNode(org.apache.flink.optimizer.dag.CoGroupRawNode) BinaryUnionNode(org.apache.flink.optimizer.dag.BinaryUnionNode) MapPartitionNode(org.apache.flink.optimizer.dag.MapPartitionNode) PartitionNode(org.apache.flink.optimizer.dag.PartitionNode) SortPartitionNode(org.apache.flink.optimizer.dag.SortPartitionNode) FilterOperatorBase(org.apache.flink.api.common.operators.base.FilterOperatorBase) GroupCombineOperatorBase(org.apache.flink.api.common.operators.base.GroupCombineOperatorBase) BulkIterationNode(org.apache.flink.optimizer.dag.BulkIterationNode) ReduceNode(org.apache.flink.optimizer.dag.ReduceNode) GroupReduceNode(org.apache.flink.optimizer.dag.GroupReduceNode) GenericDataSourceBase(org.apache.flink.api.common.operators.GenericDataSourceBase) CrossNode(org.apache.flink.optimizer.dag.CrossNode)

Aggregations

GenericDataSinkBase (org.apache.flink.api.common.operators.GenericDataSinkBase)6 Plan (org.apache.flink.api.common.Plan)4 MapOperatorBase (org.apache.flink.api.common.operators.base.MapOperatorBase)4 Union (org.apache.flink.api.common.operators.Union)3 DataSet (org.apache.flink.api.java.DataSet)3 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)3 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)3 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)3 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)2 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)2 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)2 Order (org.apache.flink.api.common.operators.Order)2 SingleInputOperator (org.apache.flink.api.common.operators.SingleInputOperator)2 DeltaIterationBase (org.apache.flink.api.common.operators.base.DeltaIterationBase)2 InnerJoinOperatorBase (org.apache.flink.api.common.operators.base.InnerJoinOperatorBase)2 KeySelector (org.apache.flink.api.java.functions.KeySelector)2 Test (org.junit.Test)2 ArrayList (java.util.ArrayList)1 Iterator (java.util.Iterator)1 LongSumAggregator (org.apache.flink.api.common.aggregators.LongSumAggregator)1