Search in sources :

Example 11 with Deadline

use of scala.concurrent.duration.Deadline in project flink by apache.

the class WebFrontendITCase method testStopYarn.

@Test
public void testStopYarn() throws Exception {
    // this only works if there is no active job at this point
    assertTrue(cluster.getCurrentlyRunningJobsJava().isEmpty());
    // Create a task
    final JobVertex sender = new JobVertex("Sender");
    sender.setParallelism(2);
    sender.setInvokableClass(StoppableInvokable.class);
    final JobGraph jobGraph = new JobGraph("Stoppable streaming test job", sender);
    final JobID jid = jobGraph.getJobID();
    cluster.submitJobDetached(jobGraph);
    // wait for job to show up
    while (cluster.getCurrentlyRunningJobsJava().isEmpty()) {
        Thread.sleep(10);
    }
    final FiniteDuration testTimeout = new FiniteDuration(2, TimeUnit.MINUTES);
    final Deadline deadline = testTimeout.fromNow();
    while (!cluster.getCurrentlyRunningJobsJava().isEmpty()) {
        try (HttpTestClient client = new HttpTestClient("localhost", port)) {
            // Request the file from the web server
            client.sendGetRequest("/jobs/" + jid + "/yarn-stop", deadline.timeLeft());
            HttpTestClient.SimpleHttpResponse response = client.getNextResponse(deadline.timeLeft());
            assertEquals(HttpResponseStatus.OK, response.getStatus());
            assertEquals(response.getType(), MimeTypes.getMimeTypeForExtension("json"));
            assertEquals("{}", response.getContent());
        }
        Thread.sleep(20);
    }
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) HttpTestClient(org.apache.flink.runtime.webmonitor.testutils.HttpTestClient) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 12 with Deadline

use of scala.concurrent.duration.Deadline in project flink by apache.

the class TaskManagerRegistrationTest method testTaskManagerNoExcessiveRegistrationMessages.

/**
	 * Tests that the TaskManager does not send an excessive amount of registration messages to
	 * the job manager if its registration was rejected.
	 */
@Test
public void testTaskManagerNoExcessiveRegistrationMessages() throws Exception {
    new JavaTestKit(actorSystem) {

        {
            ActorGateway jm = null;
            ActorGateway taskManager = null;
            try {
                FiniteDuration timeout = new FiniteDuration(5, TimeUnit.SECONDS);
                jm = TestingUtils.createForwardingActor(actorSystem, getTestActor(), Option.<String>empty());
                final ActorGateway jmGateway = jm;
                long refusedRegistrationPause = 500;
                long initialRegistrationPause = 100;
                long maxDelay = 30000;
                Configuration tmConfig = new Configuration(config);
                tmConfig.setString(ConfigConstants.TASK_MANAGER_REFUSED_REGISTRATION_PAUSE, refusedRegistrationPause + " ms");
                tmConfig.setString(ConfigConstants.TASK_MANAGER_INITIAL_REGISTRATION_PAUSE, initialRegistrationPause + " ms");
                // we make the test actor (the test kit) the JobManager to intercept
                // the messages
                taskManager = createTaskManager(actorSystem, jmGateway, tmConfig, true, false);
                final ActorGateway taskManagerGateway = taskManager;
                final Deadline deadline = timeout.fromNow();
                try {
                    while (deadline.hasTimeLeft()) {
                        // the TaskManager should try to register
                        expectMsgClass(deadline.timeLeft(), RegisterTaskManager.class);
                        // we decline the registration
                        taskManagerGateway.tell(new RefuseRegistration(new Exception("test reason")), jmGateway);
                    }
                } catch (AssertionError error) {
                // ignore since it simply means that we have used up all our time
                }
                RegisterTaskManager[] registerTaskManagerMessages = new ReceiveWhile<RegisterTaskManager>(RegisterTaskManager.class, timeout) {

                    @Override
                    protected RegisterTaskManager match(Object msg) throws Exception {
                        if (msg instanceof RegisterTaskManager) {
                            return (RegisterTaskManager) msg;
                        } else {
                            throw noMatch();
                        }
                    }
                }.get();
                int maxExponent = (int) Math.floor(Math.log(((double) maxDelay / initialRegistrationPause + 1)) / Math.log(2));
                int exponent = (int) Math.ceil(Math.log(((double) timeout.toMillis() / initialRegistrationPause + 1)) / Math.log(2));
                int exp = Math.min(maxExponent, exponent);
                long difference = timeout.toMillis() - (initialRegistrationPause * (1 << exp));
                int numberRegisterTaskManagerMessages = exp;
                if (difference > 0) {
                    numberRegisterTaskManagerMessages += Math.ceil((double) difference / maxDelay);
                }
                int maxExpectedNumberOfRegisterTaskManagerMessages = numberRegisterTaskManagerMessages * 2;
                assertTrue("The number of RegisterTaskManager messages #" + registerTaskManagerMessages.length + " should be less than #" + maxExpectedNumberOfRegisterTaskManagerMessages, registerTaskManagerMessages.length <= maxExpectedNumberOfRegisterTaskManagerMessages);
            } finally {
                stopActor(taskManager);
                stopActor(jm);
            }
        }
    };
}
Also used : RegisterTaskManager(org.apache.flink.runtime.messages.RegistrationMessages.RegisterTaskManager) Configuration(org.apache.flink.configuration.Configuration) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) RefuseRegistration(org.apache.flink.runtime.messages.RegistrationMessages.RefuseRegistration) InvalidActorNameException(akka.actor.InvalidActorNameException) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JavaTestKit(akka.testkit.JavaTestKit) Test(org.junit.Test)

Example 13 with Deadline

use of scala.concurrent.duration.Deadline in project flink by apache.

the class AbstractQueryableStateITCase method testFoldingState.

/**
	 * Tests simple folding state queryable state instance. Each source emits
	 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
	 * queried. The folding state sums these up and maps them to Strings. The
	 * test succeeds after each subtask index is queried with result n*(n+1)/2
	 * (as a String).
	 */
@Test
public void testFoldingState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Folding state
        FoldingStateDescriptor<Tuple2<Integer, Long>, String> foldingState = new FoldingStateDescriptor<>("any", "0", new SumFold(), StringSerializer.INSTANCE);
        QueryableStateStream<Integer, String> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("pumba", foldingState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Now query
        String expected = Integer.toString(numElements * (numElements + 1) / 2);
        for (int key = 0; key < NUM_SLOTS; key++) {
            final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
            boolean success = false;
            while (deadline.hasTimeLeft() && !success) {
                Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, false);
                byte[] serializedValue = Await.result(future, deadline.timeLeft());
                String value = KvStateRequestSerializer.deserializeValue(serializedValue, queryableState.getValueSerializer());
                if (expected.equals(value)) {
                    success = true;
                } else {
                    // Retry
                    Thread.sleep(50);
                }
            }
            assertTrue("Did not succeed query", success);
        }
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) FoldingStateDescriptor(org.apache.flink.api.common.state.FoldingStateDescriptor) KeySelector(org.apache.flink.api.java.functions.KeySelector) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 14 with Deadline

use of scala.concurrent.duration.Deadline in project flink by apache.

the class AbstractQueryableStateITCase method testReducingState.

/**
	 * Tests simple reducing state queryable state instance. Each source emits
	 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
	 * queried. The reducing state instance sums these up. The test succeeds
	 * after each subtask index is queried with result n*(n+1)/2.
	 */
@Test
public void testReducingState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Reducing state
        ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any", new SumReduce(), source.getType());
        QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("jungle", reducingState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Wait until job is running
        // Now query
        long expected = numElements * (numElements + 1) / 2;
        executeValueQuery(deadline, client, jobId, queryableState, expected);
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 15 with Deadline

use of scala.concurrent.duration.Deadline in project flink by apache.

the class RescalingITCase method testSavepointRescalingPartitionedOperatorState.

/**
	 * Tests rescaling of partitioned operator state. More specific, we test the mechanism with {@link ListCheckpointed}
	 * as it subsumes {@link org.apache.flink.streaming.api.checkpoint.CheckpointedFunction}.
	 */
public void testSavepointRescalingPartitionedOperatorState(boolean scaleOut, OperatorCheckpointMethod checkpointMethod) throws Exception {
    final int parallelism = scaleOut ? numSlots : numSlots / 2;
    final int parallelism2 = scaleOut ? numSlots / 2 : numSlots;
    final int maxParallelism = 13;
    FiniteDuration timeout = new FiniteDuration(3, TimeUnit.MINUTES);
    Deadline deadline = timeout.fromNow();
    JobID jobID = null;
    ActorGateway jobManager = null;
    int counterSize = Math.max(parallelism, parallelism2);
    if (checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION || checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION_BROADCAST) {
        PartitionedStateSource.CHECK_CORRECT_SNAPSHOT = new int[counterSize];
        PartitionedStateSource.CHECK_CORRECT_RESTORE = new int[counterSize];
    } else {
        PartitionedStateSourceListCheckpointed.CHECK_CORRECT_SNAPSHOT = new int[counterSize];
        PartitionedStateSourceListCheckpointed.CHECK_CORRECT_RESTORE = new int[counterSize];
    }
    try {
        jobManager = cluster.getLeaderGateway(deadline.timeLeft());
        JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, checkpointMethod);
        jobID = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        Object savepointResponse = null;
        // wait until the operator is started
        StateSourceBase.workStartedLatch.await();
        while (deadline.hasTimeLeft()) {
            Future<Object> savepointPathFuture = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobID, Option.<String>empty()), deadline.timeLeft());
            FiniteDuration waitingTime = new FiniteDuration(10, TimeUnit.SECONDS);
            savepointResponse = Await.result(savepointPathFuture, waitingTime);
            if (savepointResponse instanceof JobManagerMessages.TriggerSavepointSuccess) {
                break;
            }
            System.out.println(savepointResponse);
        }
        assertTrue(savepointResponse instanceof JobManagerMessages.TriggerSavepointSuccess);
        final String savepointPath = ((JobManagerMessages.TriggerSavepointSuccess) savepointResponse).savepointPath();
        Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), deadline.timeLeft());
        Future<Object> cancellationResponseFuture = jobManager.ask(new JobManagerMessages.CancelJob(jobID), deadline.timeLeft());
        Object cancellationResponse = Await.result(cancellationResponseFuture, deadline.timeLeft());
        assertTrue(cancellationResponse instanceof JobManagerMessages.CancellationSuccess);
        Await.ready(jobRemovedFuture, deadline.timeLeft());
        // job successfully removed
        jobID = null;
        JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, checkpointMethod);
        scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath));
        jobID = scaledJobGraph.getJobID();
        cluster.submitJobAndWait(scaledJobGraph, false);
        int sumExp = 0;
        int sumAct = 0;
        if (checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION) {
            for (int c : PartitionedStateSource.CHECK_CORRECT_SNAPSHOT) {
                sumExp += c;
            }
            for (int c : PartitionedStateSource.CHECK_CORRECT_RESTORE) {
                sumAct += c;
            }
        } else if (checkpointMethod == OperatorCheckpointMethod.CHECKPOINTED_FUNCTION_BROADCAST) {
            for (int c : PartitionedStateSource.CHECK_CORRECT_SNAPSHOT) {
                sumExp += c;
            }
            for (int c : PartitionedStateSource.CHECK_CORRECT_RESTORE) {
                sumAct += c;
            }
            sumExp *= parallelism2;
        } else {
            for (int c : PartitionedStateSourceListCheckpointed.CHECK_CORRECT_SNAPSHOT) {
                sumExp += c;
            }
            for (int c : PartitionedStateSourceListCheckpointed.CHECK_CORRECT_RESTORE) {
                sumAct += c;
            }
        }
        assertEquals(sumExp, sumAct);
        jobID = null;
    } finally {
        // clear any left overs from a possibly failed job
        if (jobID != null && jobManager != null) {
            Future<Object> jobRemovedFuture = jobManager.ask(new TestingJobManagerMessages.NotifyWhenJobRemoved(jobID), timeout);
            try {
                Await.ready(jobRemovedFuture, timeout);
            } catch (TimeoutException | InterruptedException ie) {
                fail("Failed while cleaning up the cluster.");
            }
        }
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) TestingJobManagerMessages(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) JobID(org.apache.flink.api.common.JobID) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

Deadline (scala.concurrent.duration.Deadline)59 Test (org.junit.Test)50 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)31 Configuration (org.apache.flink.configuration.Configuration)28 FiniteDuration (scala.concurrent.duration.FiniteDuration)24 JobID (org.apache.flink.api.common.JobID)21 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)20 ActorRef (akka.actor.ActorRef)12 File (java.io.File)12 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)12 TestingCluster (org.apache.flink.runtime.testingUtils.TestingCluster)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)10 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)10 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)10 ActorSystem (akka.actor.ActorSystem)9 ArrayList (java.util.ArrayList)9 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)9 UUID (java.util.UUID)8 AtomicLong (java.util.concurrent.atomic.AtomicLong)8 KeySelector (org.apache.flink.api.java.functions.KeySelector)8