Examples with MapFunction - org.apache.flink.api.common.functions.MapFunction

Example 46 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class ProcessFailureCancelingITCase method testCancelingOnProcessFailure.

@Test
public void testCancelingOnProcessFailure() {
    final StringWriter processOutput = new StringWriter();
    ActorSystem jmActorSystem = null;
    Process taskManagerProcess = null;
    try {
        // check that we run this test only if the java command
        // is available on this machine
        String javaCommand = getJavaCommandPath();
        if (javaCommand == null) {
            System.out.println("---- Skipping Process Failure test : Could not find java executable ----");
            return;
        }
        // create a logging file for the process
        File tempLogFile = File.createTempFile(getClass().getSimpleName() + "-", "-log4j.properties");
        tempLogFile.deleteOnExit();
        CommonTestUtils.printLog4jDebugConfig(tempLogFile);
        // find a free port to start the JobManager
        final int jobManagerPort = NetUtils.getAvailablePort();
        // start a JobManager
        Tuple2<String, Object> localAddress = new Tuple2<String, Object>("localhost", jobManagerPort);
        Configuration jmConfig = new Configuration();
        jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "5 s");
        jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "2000 s");
        jmConfig.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 10);
        jmConfig.setString(ConfigConstants.AKKA_ASK_TIMEOUT, "100 s");
        jmActorSystem = AkkaUtils.createActorSystem(jmConfig, new Some<>(localAddress));
        ActorRef jmActor = JobManager.startJobManagerActors(jmConfig, jmActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
        // the TaskManager java command
        String[] command = new String[] { javaCommand, "-Dlog.level=DEBUG", "-Dlog4j.configuration=file:" + tempLogFile.getAbsolutePath(), "-Xms80m", "-Xmx80m", "-classpath", getCurrentClasspath(), AbstractTaskManagerProcessFailureRecoveryTest.TaskManagerProcessEntryPoint.class.getName(), String.valueOf(jobManagerPort) };
        // start the first two TaskManager processes
        taskManagerProcess = new ProcessBuilder(command).start();
        new CommonTestUtils.PipeForwarder(taskManagerProcess.getErrorStream(), processOutput);
        // we wait for the JobManager to have the two TaskManagers available
        // since some of the CI environments are very hostile, we need to give this a lot of time (2 minutes)
        waitUntilNumTaskManagersAreRegistered(jmActor, 1, 120000);
        final Throwable[] errorRef = new Throwable[1];
        // start the test program, which infinitely blocks 
        Runnable programRunner = new Runnable() {

            @Override
            public void run() {
                try {
                    ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", jobManagerPort);
                    env.setParallelism(2);
                    env.setRestartStrategy(RestartStrategies.noRestart());
                    env.getConfig().disableSysoutLogging();
                    env.generateSequence(0, Long.MAX_VALUE).map(new MapFunction<Long, Long>() {

                        @Override
                        public Long map(Long value) throws Exception {
                            synchronized (this) {
                                wait();
                            }
                            return 0L;
                        }
                    }).output(new DiscardingOutputFormat<Long>());
                    env.execute();
                } catch (Throwable t) {
                    errorRef[0] = t;
                }
            }
        };
        Thread programThread = new Thread(programRunner);
        // kill the TaskManager
        taskManagerProcess.destroy();
        taskManagerProcess = null;
        // immediately submit the job. this should hit the case
        // where the JobManager still thinks it has the TaskManager and tries to send it tasks
        programThread.start();
        // try to cancel the job
        cancelRunningJob(jmActor);
        // we should see a failure within reasonable time (10s is the ask timeout).
        // since the CI environment is often slow, we conservatively give it up to 2 minutes, 
        // to fail, which is much lower than the failure time given by the heartbeats ( > 2000s)
        programThread.join(120000);
        assertFalse("The program did not cancel in time (2 minutes)", programThread.isAlive());
        Throwable error = errorRef[0];
        assertNotNull("The program did not fail properly", error);
        assertTrue(error instanceof ProgramInvocationException);
    // all seems well :-)
    } catch (Exception e) {
        e.printStackTrace();
        printProcessLog("TaskManager", processOutput.toString());
        fail(e.getMessage());
    } catch (Error e) {
        e.printStackTrace();
        printProcessLog("TaskManager 1", processOutput.toString());
        throw e;
    } finally {
        if (taskManagerProcess != null) {
            taskManagerProcess.destroy();
        }
        if (jmActorSystem != null) {
            jmActorSystem.shutdown();
        }
    }
}

Also used : ActorSystem(akka.actor.ActorSystem) MemoryArchivist(org.apache.flink.runtime.jobmanager.MemoryArchivist) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) JobManager(org.apache.flink.runtime.jobmanager.JobManager) MapFunction(org.apache.flink.api.common.functions.MapFunction) StringWriter(java.io.StringWriter) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) TimeoutException(java.util.concurrent.TimeoutException) Some(scala.Some) Tuple2(scala.Tuple2) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) File(java.io.File) Test(org.junit.Test)

Example 47 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class StreamingOperatorsITCase method testFoldOperationWithNonJavaSerializableType.

/**
	 * Tests whether the fold operation can also be called with non Java serializable types.
	 */
@Test
public void testFoldOperationWithNonJavaSerializableType() throws Exception {
    final int numElements = 10;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<Tuple2<Integer, NonSerializable>> input = env.addSource(new NonSerializableTupleSource(numElements));
    final MemorySinkFunction sinkFunction = new MemorySinkFunction(0);
    final ArrayList<Integer> actualResult = new ArrayList<>();
    MemorySinkFunction.registerCollection(0, actualResult);
    input.keyBy(0).fold(new NonSerializable(42), new FoldFunction<Tuple2<Integer, NonSerializable>, NonSerializable>() {

        private static final long serialVersionUID = 2705497830143608897L;

        @Override
        public NonSerializable fold(NonSerializable accumulator, Tuple2<Integer, NonSerializable> value) throws Exception {
            return new NonSerializable(accumulator.value + value.f1.value);
        }
    }).map(new MapFunction<NonSerializable, Integer>() {

        private static final long serialVersionUID = 6906984044674568945L;

        @Override
        public Integer map(NonSerializable value) throws Exception {
            return value.value;
        }
    }).addSink(sinkFunction);
    Collection<Integer> expected = new ArrayList<>(10);
    for (int i = 0; i < numElements; i++) {
        expected.add(42 + i);
    }
    env.execute();
    Collections.sort(actualResult);
    Assert.assertEquals(expected, actualResult);
    MemorySinkFunction.clear();
}

Also used : MapFunction(org.apache.flink.api.common.functions.MapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 48 with MapFunction

use of org.apache.flink.api.common.functions.MapFunction in project flink by apache.

the class PartitionerITCase method partitionerTest.

@Test
public void partitionerTest() {
    TestListResultSink<Tuple2<Integer, String>> hashPartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>();
    TestListResultSink<Tuple2<Integer, String>> customPartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>();
    TestListResultSink<Tuple2<Integer, String>> broadcastPartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>();
    TestListResultSink<Tuple2<Integer, String>> forwardPartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>();
    TestListResultSink<Tuple2<Integer, String>> rebalancePartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>();
    TestListResultSink<Tuple2<Integer, String>> globalPartitionResultSink = new TestListResultSink<Tuple2<Integer, String>>();
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(3);
    DataStream<Tuple1<String>> src = env.fromElements(new Tuple1<String>("a"), new Tuple1<String>("b"), new Tuple1<String>("b"), new Tuple1<String>("a"), new Tuple1<String>("a"), new Tuple1<String>("c"), new Tuple1<String>("a"));
    // partition by hash
    src.keyBy(0).map(new SubtaskIndexAssigner()).addSink(hashPartitionResultSink);
    // partition custom
    DataStream<Tuple2<Integer, String>> partitionCustom = src.partitionCustom(new Partitioner<String>() {

        @Override
        public int partition(String key, int numPartitions) {
            if (key.equals("c")) {
                return 2;
            } else {
                return 0;
            }
        }
    }, 0).map(new SubtaskIndexAssigner());
    partitionCustom.addSink(customPartitionResultSink);
    // partition broadcast
    src.broadcast().map(new SubtaskIndexAssigner()).addSink(broadcastPartitionResultSink);
    // partition rebalance
    src.rebalance().map(new SubtaskIndexAssigner()).addSink(rebalancePartitionResultSink);
    // partition forward
    src.map(new MapFunction<Tuple1<String>, Tuple1<String>>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Tuple1<String> map(Tuple1<String> value) throws Exception {
            return value;
        }
    }).forward().map(new SubtaskIndexAssigner()).addSink(forwardPartitionResultSink);
    // partition global
    src.global().map(new SubtaskIndexAssigner()).addSink(globalPartitionResultSink);
    try {
        env.execute();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    List<Tuple2<Integer, String>> hashPartitionResult = hashPartitionResultSink.getResult();
    List<Tuple2<Integer, String>> customPartitionResult = customPartitionResultSink.getResult();
    List<Tuple2<Integer, String>> broadcastPartitionResult = broadcastPartitionResultSink.getResult();
    List<Tuple2<Integer, String>> forwardPartitionResult = forwardPartitionResultSink.getResult();
    List<Tuple2<Integer, String>> rebalancePartitionResult = rebalancePartitionResultSink.getResult();
    List<Tuple2<Integer, String>> globalPartitionResult = globalPartitionResultSink.getResult();
    verifyHashPartitioning(hashPartitionResult);
    verifyCustomPartitioning(customPartitionResult);
    verifyBroadcastPartitioning(broadcastPartitionResult);
    verifyRebalancePartitioning(forwardPartitionResult);
    verifyRebalancePartitioning(rebalancePartitionResult);
    verifyGlobalPartitioning(globalPartitionResult);
}

Also used : MapFunction(org.apache.flink.api.common.functions.MapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) TestListResultSink(org.apache.flink.test.streaming.runtime.util.TestListResultSink) Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Partitioner(org.apache.flink.api.common.functions.Partitioner) Test(org.junit.Test)

Aggregations

MapFunction (org.apache.flink.api.common.functions.MapFunction)48 Test (org.junit.Test)31 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)29 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)19 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)19 Configuration (org.apache.flink.configuration.Configuration)10 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)9 Plan (org.apache.flink.api.common.Plan)8 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)8 OptimizedPlan (org.apache.flink.optimizer.plan.OptimizedPlan)8 RichFlatMapFunction (org.apache.flink.api.common.functions.RichFlatMapFunction)7 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)7 DiscardingOutputFormat (org.apache.flink.api.java.io.DiscardingOutputFormat)6 Edge (org.apache.flink.graph.Edge)6 SinkPlanNode (org.apache.flink.optimizer.plan.SinkPlanNode)6 NullValue (org.apache.flink.types.NullValue)6 FilterFunction (org.apache.flink.api.common.functions.FilterFunction)5 FieldList (org.apache.flink.api.common.operators.util.FieldList)5 DataSet (org.apache.flink.api.java.DataSet)5 Tuple1 (org.apache.flink.api.java.tuple.Tuple1)5