Search in sources :

Example 51 with Deadline

use of scala.concurrent.duration.Deadline in project flink by apache.

the class AbstractQueryableStateITCase method testQueryableState.

/**
	 * Runs a simple topology producing random (key, 1) pairs at the sources (where
	 * number of keys is in fixed in range 0...numKeys). The records are keyed and
	 * a reducing queryable state instance is created, which sums up the records.
	 *
	 * After submitting the job in detached mode, the QueryableStateCLient is used
	 * to query the counts of each key in rounds until all keys have non-zero counts.
	 */
@Test
@SuppressWarnings("unchecked")
public void testQueryableState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numKeys = 256;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        //
        // Test program
        //
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
        // Reducing state
        ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
        final String queryName = "hakuna-matata";
        final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState(queryName, reducingState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        cluster.submitJobDetached(jobGraph);
        //
        // Start querying
        //
        jobId = jobGraph.getJobID();
        final AtomicLongArray counts = new AtomicLongArray(numKeys);
        boolean allNonZero = false;
        while (!allNonZero && deadline.hasTimeLeft()) {
            allNonZero = true;
            final List<Future<byte[]>> futures = new ArrayList<>(numKeys);
            for (int i = 0; i < numKeys; i++) {
                final int key = i;
                if (counts.get(key) > 0) {
                    // Skip this one
                    continue;
                } else {
                    allNonZero = false;
                }
                final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
                Future<byte[]> serializedResult = getKvStateWithRetries(client, jobId, queryName, key, serializedKey, QUERY_RETRY_DELAY, false);
                serializedResult.onSuccess(new OnSuccess<byte[]>() {

                    @Override
                    public void onSuccess(byte[] result) throws Throwable {
                        Tuple2<Integer, Long> value = KvStateRequestSerializer.deserializeValue(result, queryableState.getValueSerializer());
                        counts.set(key, value.f1);
                        assertEquals("Key mismatch", key, value.f0.intValue());
                    }
                }, TEST_ACTOR_SYSTEM.dispatcher());
                futures.add(serializedResult);
            }
            Future<Iterable<byte[]>> futureSequence = Futures.sequence(futures, TEST_ACTOR_SYSTEM.dispatcher());
            Await.ready(futureSequence, deadline.timeLeft());
        }
        assertTrue("Not all keys are non-zero", allNonZero);
        // All should be non-zero
        for (int i = 0; i < numKeys; i++) {
            long count = counts.get(i);
            assertTrue("Count at position " + i + " is " + count, count > 0);
        }
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : ArrayList(java.util.ArrayList) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) Deadline(scala.concurrent.duration.Deadline) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicLongArray(java.util.concurrent.atomic.AtomicLongArray) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) Future(scala.concurrent.Future) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 52 with Deadline

use of scala.concurrent.duration.Deadline in project flink by apache.

the class AbstractQueryableStateITCase method testValueState.

/**
	 * Tests simple value state queryable state instance. Each source emits
	 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
	 * queried. The tests succeeds after each subtask index is queried with
	 * value numElements (the latest element updated the state).
	 */
@Test
public void testValueState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Value state
        ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType());
        QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("hakuna", valueState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Now query
        long expected = numElements;
        executeValueQuery(deadline, client, jobId, queryableState, expected);
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 53 with Deadline

use of scala.concurrent.duration.Deadline in project flink by apache.

the class AbstractQueryableStateITCase method testDuplicateRegistrationFailsJob.

/**
	 * Tests that duplicate query registrations fail the job at the JobManager.
	 */
@Test
public void testDuplicateRegistrationFailsJob() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numKeys = 256;
    JobID jobId = null;
    try {
        //
        // Test program
        //
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
        // Reducing state
        ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
        final String queryName = "duplicate-me";
        final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState(queryName, reducingState);
        final QueryableStateStream<Integer, Tuple2<Integer, Long>> duplicate = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState(queryName);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        Future<JobStatusIs> failedFuture = cluster.getLeaderGateway(deadline.timeLeft()).ask(new NotifyWhenJobStatus(jobId, JobStatus.FAILED), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobStatusIs>apply(JobStatusIs.class));
        cluster.submitJobDetached(jobGraph);
        JobStatusIs jobStatus = Await.result(failedFuture, deadline.timeLeft());
        assertEquals(JobStatus.FAILED, jobStatus.state());
        // Get the job and check the cause
        JobFound jobFound = Await.result(cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.RequestJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobFound>apply(JobFound.class)), deadline.timeLeft());
        String failureCause = jobFound.executionGraph().getFailureCauseAsString();
        assertTrue("Not instance of SuppressRestartsException", failureCause.startsWith("org.apache.flink.runtime.execution.SuppressRestartsException"));
        int causedByIndex = failureCause.indexOf("Caused by: ");
        String subFailureCause = failureCause.substring(causedByIndex + "Caused by: ".length());
        assertTrue("Not caused by IllegalStateException", subFailureCause.startsWith("java.lang.IllegalStateException"));
        assertTrue("Exception does not contain registration name", subFailureCause.contains(queryName));
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
    }
}
Also used : KeySelector(org.apache.flink.api.java.functions.KeySelector) JobFound(org.apache.flink.runtime.messages.JobManagerMessages.JobFound) ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) JobStatusIs(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.JobStatusIs) Deadline(scala.concurrent.duration.Deadline) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) NotifyWhenJobStatus(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenJobStatus) Test(org.junit.Test)

Example 54 with Deadline

use of scala.concurrent.duration.Deadline in project flink by apache.

the class AbstractQueryableStateITCase method testValueStateShortcut.

/**
	 * Tests simple value state queryable state instance. Each source emits
	 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
	 * queried. The tests succeeds after each subtask index is queried with
	 * value numElements (the latest element updated the state).
	 *
	 * This is the same as the simple value state test, but uses the API shortcut.
	 */
@Test
public void testValueStateShortcut() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Value state shortcut
        QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("matata");
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Now query
        long expected = numElements;
        executeValueQuery(deadline, client, jobId, queryableState, expected);
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 55 with Deadline

use of scala.concurrent.duration.Deadline in project flink by apache.

the class ExecutionGraphRestartTest method testNoRestartOnSuppressException.

@Test
public void testNoRestartOnSuppressException() throws Exception {
    Tuple2<ExecutionGraph, Instance> executionGraphInstanceTuple = createSpyExecutionGraph(new FixedDelayRestartStrategy(1, 1000));
    ExecutionGraph eg = executionGraphInstanceTuple.f0;
    // Fail with unrecoverable Exception
    eg.getAllExecutionVertices().iterator().next().fail(new SuppressRestartsException(new Exception("Test Exception")));
    assertEquals(JobStatus.FAILING, eg.getState());
    for (ExecutionVertex vertex : eg.getAllExecutionVertices()) {
        vertex.getCurrentExecutionAttempt().cancelingComplete();
    }
    FiniteDuration timeout = new FiniteDuration(2, TimeUnit.MINUTES);
    // Wait for async restart
    Deadline deadline = timeout.fromNow();
    while (deadline.hasTimeLeft() && eg.getState() != JobStatus.FAILED) {
        Thread.sleep(100);
    }
    assertEquals(JobStatus.FAILED, eg.getState());
    // No restart
    verify(eg, never()).restart();
    RestartStrategy restartStrategy = eg.getRestartStrategy();
    assertTrue(restartStrategy instanceof FixedDelayRestartStrategy);
    assertEquals(0, ((FixedDelayRestartStrategy) restartStrategy).getCurrentRestartAttempt());
}
Also used : SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) FixedDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy) Instance(org.apache.flink.runtime.instance.Instance) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) FailureRateRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FailureRateRestartStrategy) InfiniteDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) RestartStrategy(org.apache.flink.runtime.executiongraph.restart.RestartStrategy) FixedDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

Deadline (scala.concurrent.duration.Deadline)59 Test (org.junit.Test)50 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)31 Configuration (org.apache.flink.configuration.Configuration)28 FiniteDuration (scala.concurrent.duration.FiniteDuration)24 JobID (org.apache.flink.api.common.JobID)21 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)20 ActorRef (akka.actor.ActorRef)12 File (java.io.File)12 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)12 TestingCluster (org.apache.flink.runtime.testingUtils.TestingCluster)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)10 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)10 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)10 ActorSystem (akka.actor.ActorSystem)9 ArrayList (java.util.ArrayList)9 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)9 UUID (java.util.UUID)8 AtomicLong (java.util.concurrent.atomic.AtomicLong)8 KeySelector (org.apache.flink.api.java.functions.KeySelector)8