Search in sources :

Example 51 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class RescalingITCase method createJobGraphWithKeyedAndNonPartitionedOperatorState.

private static JobGraph createJobGraphWithKeyedAndNonPartitionedOperatorState(int parallelism, int maxParallelism, int fixedParallelism, int numberKeys, int numberElements, boolean terminateAfterEmission, int checkpointingInterval) {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.getConfig().setMaxParallelism(maxParallelism);
    env.enableCheckpointing(checkpointingInterval);
    env.setRestartStrategy(RestartStrategies.noRestart());
    DataStream<Integer> input = env.addSource(new SubtaskIndexNonPartitionedStateSource(numberKeys, numberElements, terminateAfterEmission)).setParallelism(fixedParallelism).keyBy(new KeySelector<Integer, Integer>() {

        private static final long serialVersionUID = -7952298871120320940L;

        @Override
        public Integer getKey(Integer value) throws Exception {
            return value;
        }
    });
    SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys);
    DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements));
    result.addSink(new CollectionSink<Tuple2<Integer, Integer>>());
    return env.getStreamGraph().getJobGraph();
}
Also used : CountDownLatch(java.util.concurrent.CountDownLatch) TimeoutException(java.util.concurrent.TimeoutException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)

Example 52 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class SavepointITCase method testCanRestoreWithModifiedStatelessOperators.

/**
	 * FLINK-5985
	 *
	 * This test ensures we can restore from a savepoint under modifications to the job graph that only concern
	 * stateless operators.
	 */
@Test
public void testCanRestoreWithModifiedStatelessOperators() throws Exception {
    // Config
    int numTaskManagers = 2;
    int numSlotsPerTaskManager = 2;
    int parallelism = 2;
    // Test deadline
    final Deadline deadline = new FiniteDuration(5, TimeUnit.MINUTES).fromNow();
    final File tmpDir = CommonTestUtils.createTempDirectory();
    final File savepointDir = new File(tmpDir, "savepoints");
    TestingCluster flink = null;
    String savepointPath;
    try {
        // Flink configuration
        final Configuration config = new Configuration();
        config.setInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, numTaskManagers);
        config.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, numSlotsPerTaskManager);
        config.setString(ConfigConstants.SAVEPOINT_DIRECTORY_KEY, savepointDir.toURI().toString());
        LOG.info("Flink configuration: " + config + ".");
        // Start Flink
        flink = new TestingCluster(config);
        LOG.info("Starting Flink cluster.");
        flink.start(true);
        // Retrieve the job manager
        LOG.info("Retrieving JobManager.");
        ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("JobManager: " + jobManager + ".");
        final StatefulCounter statefulCounter = new StatefulCounter();
        StatefulCounter.resetForTest(parallelism);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(parallelism);
        env.addSource(new InfiniteTestSource()).shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return 4 * value;
            }
        }).shuffle().map(statefulCounter).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return 2 * value;
            }
        }).addSink(new DiscardingSink<Integer>());
        JobGraph originalJobGraph = env.getStreamGraph().getJobGraph();
        JobSubmissionResult submissionResult = flink.submitJobDetached(originalJobGraph);
        JobID jobID = submissionResult.getJobID();
        // wait for the Tasks to be ready
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        Future<Object> savepointPathFuture = jobManager.ask(new TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
        savepointPath = ((TriggerSavepointSuccess) Await.result(savepointPathFuture, deadline.timeLeft())).savepointPath();
        Future<Object> savepointFuture = jobManager.ask(new RequestSavepoint(savepointPath), deadline.timeLeft());
        ((ResponseSavepoint) Await.result(savepointFuture, deadline.timeLeft())).savepoint();
        LOG.info("Retrieved savepoint: " + savepointPath + ".");
        // Shut down the Flink cluster (thereby canceling the job)
        LOG.info("Shutting down Flink cluster.");
        flink.shutdown();
        flink.awaitTermination();
    } finally {
        flink.shutdown();
        flink.awaitTermination();
    }
    try {
        LOG.info("Restarting Flink cluster.");
        flink.start(true);
        // Retrieve the job manager
        LOG.info("Retrieving JobManager.");
        ActorGateway jobManager = Await.result(flink.leaderGateway().future(), deadline.timeLeft());
        LOG.info("JobManager: " + jobManager + ".");
        // Reset static test helpers
        StatefulCounter.resetForTest(parallelism);
        // Gather all task deployment descriptors
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(parallelism);
        // generate a modified job graph that adds a stateless op
        env.addSource(new InfiniteTestSource()).shuffle().map(new StatefulCounter()).uid("statefulCounter").shuffle().map(new MapFunction<Integer, Integer>() {

            @Override
            public Integer map(Integer value) throws Exception {
                return value;
            }
        }).addSink(new DiscardingSink<Integer>());
        JobGraph modifiedJobGraph = env.getStreamGraph().getJobGraph();
        // Set the savepoint path
        modifiedJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        LOG.info("Resubmitting job " + modifiedJobGraph.getJobID() + " with " + "savepoint path " + savepointPath + " in detached mode.");
        // Submit the job
        flink.submitJobDetached(modifiedJobGraph);
        // Await state is restored
        StatefulCounter.getRestoreLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        // Await some progress after restore
        StatefulCounter.getProgressLatch().await(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
    } finally {
        flink.shutdown();
        flink.awaitTermination();
    }
}
Also used : RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) Configuration(org.apache.flink.configuration.Configuration) MapFunction(org.apache.flink.api.common.functions.MapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) TestingCluster(org.apache.flink.runtime.testingUtils.TestingCluster) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) ResponseSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.ResponseSavepoint) RequestSavepoint(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.RequestSavepoint) DisposeSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.DisposeSavepoint) FileNotFoundException(java.io.FileNotFoundException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) JobSubmissionResult(org.apache.flink.api.common.JobSubmissionResult) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TriggerSavepoint(org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) File(java.io.File) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 53 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class StreamCheckpointNotifierITCase method testProgram.

/**
	 * Runs the following program:
	 *
	 * <pre>
	 *     [ (source)->(filter) ] -> [ (co-map) ] -> [ (map) ] -> [ (groupBy/reduce)->(sink) ]
	 * </pre>
	 */
@Test
public void testProgram() {
    try {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        assertEquals("test setup broken", PARALLELISM, env.getParallelism());
        env.enableCheckpointing(500);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 0L));
        final int numElements = 10000;
        final int numTaskTotal = PARALLELISM * 5;
        DataStream<Long> stream = env.addSource(new GeneratingSourceFunction(numElements, numTaskTotal));
        stream.filter(new LongRichFilterFunction()).connect(stream).flatMap(new LeftIdentityCoRichFlatMapFunction()).map(new IdentityMapFunction()).startNewChain().keyBy(0).reduce(new OnceFailingReducer(numElements)).addSink(new DiscardingSink<Tuple1<Long>>());
        env.execute();
        final long failureCheckpointID = OnceFailingReducer.failureCheckpointID;
        assertNotEquals(0L, failureCheckpointID);
        List<List<Long>[]> allLists = Arrays.asList(GeneratingSourceFunction.completedCheckpoints, LongRichFilterFunction.completedCheckpoints, LeftIdentityCoRichFlatMapFunction.completedCheckpoints, IdentityMapFunction.completedCheckpoints, OnceFailingReducer.completedCheckpoints);
        for (List<Long>[] parallelNotifications : allLists) {
            for (List<Long> notifications : parallelNotifications) {
                assertTrue("No checkpoint notification was received.", notifications.size() > 0);
                assertFalse("Failure checkpoint was marked as completed.", notifications.contains(failureCheckpointID));
                assertFalse("No checkpoint received after failure.", notifications.get(notifications.size() - 1) == failureCheckpointID);
                assertTrue("Checkpoint notification was received multiple times", notifications.size() == new HashSet<Long>(notifications).size());
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : IOException(java.io.IOException) Tuple1(org.apache.flink.api.java.tuple.Tuple1) AtomicLong(java.util.concurrent.atomic.AtomicLong) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ArrayList(java.util.ArrayList) List(java.util.List) Test(org.junit.Test)

Example 54 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class AbstractEventTimeWindowCheckpointingITCase method testPreAggregatedTumblingTimeWindow.

@Test
public void testPreAggregatedTumblingTimeWindow() {
    final int NUM_ELEMENTS_PER_KEY = numElementsPerKey();
    final int WINDOW_SIZE = windowSize();
    final int NUM_KEYS = numKeys();
    FailingSource.reset();
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", cluster.getLeaderRPCPort());
        env.setParallelism(PARALLELISM);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.enableCheckpointing(100);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 0));
        env.getConfig().disableSysoutLogging();
        env.setStateBackend(this.stateBackend);
        env.addSource(new FailingSource(NUM_KEYS, NUM_ELEMENTS_PER_KEY, NUM_ELEMENTS_PER_KEY / 3)).rebalance().keyBy(0).timeWindow(Time.of(WINDOW_SIZE, MILLISECONDS)).reduce(new ReduceFunction<Tuple2<Long, IntType>>() {

            @Override
            public Tuple2<Long, IntType> reduce(Tuple2<Long, IntType> a, Tuple2<Long, IntType> b) {
                return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
            }
        }, new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

            private boolean open = false;

            @Override
            public void open(Configuration parameters) {
                assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
                open = true;
            }

            @Override
            public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> input, Collector<Tuple4<Long, Long, Long, IntType>> out) {
                // validate that the function has been opened properly
                assertTrue(open);
                for (Tuple2<Long, IntType> in : input) {
                    out.collect(new Tuple4<>(in.f0, window.getStart(), window.getEnd(), in.f1));
                }
            }
        }).addSink(new ValidatingSink(NUM_KEYS, NUM_ELEMENTS_PER_KEY / WINDOW_SIZE)).setParallelism(1);
        tryExecute(env, "Tumbling Window Test");
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) SuccessException(org.apache.flink.test.util.SuccessException) IOException(java.io.IOException) Tuple4(org.apache.flink.api.java.tuple.Tuple4) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Tuple(org.apache.flink.api.java.tuple.Tuple) Test(org.junit.Test)

Example 55 with StreamExecutionEnvironment

use of org.apache.flink.streaming.api.environment.StreamExecutionEnvironment in project flink by apache.

the class AccumulatorLiveITCase method testStreaming.

@Test
public void testStreaming() throws Exception {
    StreamExecutionEnvironment env = new DummyStreamExecutionEnvironment();
    env.setParallelism(1);
    DataStream<String> input = env.fromCollection(inputData);
    input.flatMap(new NotifyingMapper()).writeUsingOutputFormat(new NotifyingOutputFormat()).disableChaining();
    jobGraph = env.getStreamGraph().getJobGraph();
    jobID = jobGraph.getJobID();
    verifyResults();
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)383 Test (org.junit.Test)286 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)192 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)81 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)75 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)48 EventTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger)42 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)34 Properties (java.util.Properties)32 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)31 TumblingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows)30 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)27 SuccessException (org.apache.flink.test.util.SuccessException)27 IOException (java.io.IOException)24 Configuration (org.apache.flink.configuration.Configuration)24 SlidingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows)24 KeySelector (org.apache.flink.api.java.functions.KeySelector)22 ProcessingTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.ProcessingTimeTrigger)21 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)20 MapFunction (org.apache.flink.api.common.functions.MapFunction)19