Search in sources :

Example 6 with ShipStrategyType

use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.

the class BatchTask method getOutputCollector.

// --------------------------------------------------------------------------------------------
// Result Shipping and Chained Tasks
// --------------------------------------------------------------------------------------------
/**
 * Creates the {@link Collector} for the given task, as described by the given configuration.
 * The output collector contains the writers that forward the data to the different tasks that
 * the given task is connected to. Each writer applies the partitioning as described in the
 * configuration.
 *
 * @param task The task that the output collector is created for.
 * @param config The configuration describing the output shipping strategies.
 * @param cl The classloader used to load user defined types.
 * @param eventualOutputs The output writers that this task forwards to the next task for each
 *     output.
 * @param outputOffset The offset to start to get the writers for the outputs
 * @param numOutputs The number of outputs described in the configuration.
 * @return The OutputCollector that data produced in this task is submitted to.
 */
public static <T> Collector<T> getOutputCollector(AbstractInvokable task, TaskConfig config, ClassLoader cl, List<RecordWriter<?>> eventualOutputs, int outputOffset, int numOutputs) throws Exception {
    if (numOutputs == 0) {
        return null;
    }
    // get the factory for the serializer
    final TypeSerializerFactory<T> serializerFactory = config.getOutputSerializer(cl);
    final List<RecordWriter<SerializationDelegate<T>>> writers = new ArrayList<>(numOutputs);
    // create a writer for each output
    for (int i = 0; i < numOutputs; i++) {
        // create the OutputEmitter from output ship strategy
        final ShipStrategyType strategy = config.getOutputShipStrategy(i);
        final int indexInSubtaskGroup = task.getIndexInSubtaskGroup();
        final TypeComparatorFactory<T> compFactory = config.getOutputComparator(i, cl);
        final ChannelSelector<SerializationDelegate<T>> oe;
        if (compFactory == null) {
            oe = new OutputEmitter<>(strategy, indexInSubtaskGroup);
        } else {
            final DataDistribution dataDist = config.getOutputDataDistribution(i, cl);
            final Partitioner<?> partitioner = config.getOutputPartitioner(i, cl);
            final TypeComparator<T> comparator = compFactory.createComparator();
            oe = new OutputEmitter<>(strategy, indexInSubtaskGroup, comparator, partitioner, dataDist);
        }
        final RecordWriter<SerializationDelegate<T>> recordWriter = new RecordWriterBuilder().setChannelSelector(oe).setTaskName(task.getEnvironment().getTaskInfo().getTaskNameWithSubtasks()).build(task.getEnvironment().getWriter(outputOffset + i));
        recordWriter.setMetricGroup(task.getEnvironment().getMetricGroup().getIOMetricGroup());
        writers.add(recordWriter);
    }
    if (eventualOutputs != null) {
        eventualOutputs.addAll(writers);
    }
    return new OutputCollector<>(writers, serializerFactory.getSerializer());
}
Also used : OutputCollector(org.apache.flink.runtime.operators.shipping.OutputCollector) ArrayList(java.util.ArrayList) SerializationDelegate(org.apache.flink.runtime.plugable.SerializationDelegate) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) RecordWriter(org.apache.flink.runtime.io.network.api.writer.RecordWriter) DataDistribution(org.apache.flink.api.common.distributions.DataDistribution) RecordWriterBuilder(org.apache.flink.runtime.io.network.api.writer.RecordWriterBuilder)

Example 7 with ShipStrategyType

use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.

the class ParallelismChangeTest method checkPropertyHandlingWithIncreasingGlobalParallelism2.

/**
 * Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all
 * properties).
 *
 * <p>Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st
 * reduce is not reusable. Expected to re-establish partitioning between map and reduce (hash).
 */
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism2() {
    final int p = DEFAULT_PARALLELISM;
    // construct the plan
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(p);
    DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(p);
    set1.map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p).name("Reduce1").map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Reduce2").output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    // when reducer 1 distributes its data across the instances of map2, it needs to employ a
    // local hash method,
    // because map2 has twice as many instances and key/value pairs with the same key need to be
    // processed by the same
    // mapper respectively reducer
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
    SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
    ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
    ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn);
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn);
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) IdentityMapper(org.apache.flink.optimizer.testfunctions.IdentityMapper) IdentityGroupReducer(org.apache.flink.optimizer.testfunctions.IdentityGroupReducer) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 8 with ShipStrategyType

use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.

the class WorksetIterationsRecordApiCompilerTest method testRecordApiWithDeferredSoltionSetUpdateWithMapper.

@Test
public void testRecordApiWithDeferredSoltionSetUpdateWithMapper() {
    Plan plan = getTestPlan(false, true);
    OptimizedPlan oPlan;
    try {
        oPlan = compileNoStats(plan);
    } catch (CompilerException ce) {
        ce.printStackTrace();
        fail("The pact compiler is unable to compile this plan correctly.");
        // silence the compiler
        return;
    }
    OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
    DualInputPlanNode joinWithInvariantNode = resolver.getNode(JOIN_WITH_INVARIANT_NAME);
    DualInputPlanNode joinWithSolutionSetNode = resolver.getNode(JOIN_WITH_SOLUTION_SET);
    SingleInputPlanNode worksetReducer = resolver.getNode(NEXT_WORKSET_REDUCER_NAME);
    SingleInputPlanNode deltaMapper = resolver.getNode(SOLUTION_DELTA_MAPPER_NAME);
    // iteration preserves partitioning in reducer, so the first partitioning is out of the
    // loop,
    // the in-loop partitioning is before the final reducer
    // verify joinWithInvariant
    assertEquals(ShipStrategyType.FORWARD, joinWithInvariantNode.getInput1().getShipStrategy());
    assertEquals(ShipStrategyType.PARTITION_HASH, joinWithInvariantNode.getInput2().getShipStrategy());
    assertEquals(list0, joinWithInvariantNode.getKeysForInput1());
    assertEquals(list0, joinWithInvariantNode.getKeysForInput2());
    // verify joinWithSolutionSet
    assertEquals(ShipStrategyType.FORWARD, joinWithSolutionSetNode.getInput1().getShipStrategy());
    assertEquals(ShipStrategyType.FORWARD, joinWithSolutionSetNode.getInput2().getShipStrategy());
    // verify reducer
    assertEquals(ShipStrategyType.PARTITION_HASH, worksetReducer.getInput().getShipStrategy());
    assertEquals(list0, worksetReducer.getKeys(0));
    // currently, the system may partition before or after the mapper
    ShipStrategyType ss1 = deltaMapper.getInput().getShipStrategy();
    ShipStrategyType ss2 = deltaMapper.getOutgoingChannels().get(0).getShipStrategy();
    assertTrue((ss1 == ShipStrategyType.FORWARD && ss2 == ShipStrategyType.PARTITION_HASH) || (ss2 == ShipStrategyType.FORWARD && ss1 == ShipStrategyType.PARTITION_HASH));
    new JobGraphGenerator().compileJobGraph(oPlan);
}
Also used : DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) JobGraphGenerator(org.apache.flink.optimizer.plantranslate.JobGraphGenerator) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) Test(org.junit.Test)

Example 9 with ShipStrategyType

use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.

the class ReplicatingDataSourceTest method checkCrossWithReplicatedSourceInputBehindMap.

/**
 * Tests cross program with replicated data source behind map and filter.
 */
@Test
public void checkCrossWithReplicatedSourceInputBehindMap() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
    ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
    DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
    DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
    DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.map(new IdMap()).filter(new NoFilter()).cross(source2).writeAsText("/some/newpath");
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    // when cross should have forward strategy on both sides
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor();
    ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy();
    ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy();
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1);
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2);
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) ReplicatingInputFormat(org.apache.flink.api.common.io.ReplicatingInputFormat) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Example 10 with ShipStrategyType

use of org.apache.flink.runtime.operators.shipping.ShipStrategyType in project flink by apache.

the class ReplicatingDataSourceTest method checkCrossWithReplicatedSourceInput.

/**
 * Tests cross program with replicated data source.
 */
@Test
public void checkCrossWithReplicatedSourceInput() {
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
    env.setParallelism(DEFAULT_PARALLELISM);
    TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
    ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif = new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
    DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
    DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
    DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1.cross(source2).writeAsText("/some/newpath");
    Plan plan = env.createProgramPlan();
    // submit the plan to the compiler
    OptimizedPlan oPlan = compileNoStats(plan);
    // check the optimized Plan
    // when cross should have forward strategy on both sides
    SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
    DualInputPlanNode crossNode = (DualInputPlanNode) sinkNode.getPredecessor();
    ShipStrategyType crossIn1 = crossNode.getInput1().getShipStrategy();
    ShipStrategyType crossIn2 = crossNode.getInput2().getShipStrategy();
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn1);
    Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, crossIn2);
}
Also used : Path(org.apache.flink.core.fs.Path) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) ShipStrategyType(org.apache.flink.runtime.operators.shipping.ShipStrategyType) DualInputPlanNode(org.apache.flink.optimizer.plan.DualInputPlanNode) FileInputSplit(org.apache.flink.core.fs.FileInputSplit) ReplicatingInputFormat(org.apache.flink.api.common.io.ReplicatingInputFormat) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

ShipStrategyType (org.apache.flink.runtime.operators.shipping.ShipStrategyType)23 Plan (org.apache.flink.api.common.Plan)13 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)13 Test (org.junit.Test)13 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)12 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)11 DualInputPlanNode (org.apache.flink.optimizer.plan.DualInputPlanNode)11 ReplicatingInputFormat (org.apache.flink.api.common.io.ReplicatingInputFormat)8 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)8 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)8 FileInputSplit (org.apache.flink.core.fs.FileInputSplit)8 Path (org.apache.flink.core.fs.Path)8 CompilerException (org.apache.flink.optimizer.CompilerException)8 SingleInputPlanNode (org.apache.flink.optimizer.plan.SingleInputPlanNode)8 Channel (org.apache.flink.optimizer.plan.Channel)5 PlanNode (org.apache.flink.optimizer.plan.PlanNode)5 DataExchangeMode (org.apache.flink.runtime.io.network.DataExchangeMode)5 ArrayList (java.util.ArrayList)4 NamedChannel (org.apache.flink.optimizer.plan.NamedChannel)4 Set (java.util.Set)3