Search in sources :

Example 1 with QueryableStateClient

use of org.apache.flink.runtime.query.QueryableStateClient in project flink by apache.

the class AbstractQueryableStateITCase method testFoldingState.

/**
	 * Tests simple folding state queryable state instance. Each source emits
	 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
	 * queried. The folding state sums these up and maps them to Strings. The
	 * test succeeds after each subtask index is queried with result n*(n+1)/2
	 * (as a String).
	 */
@Test
public void testFoldingState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Folding state
        FoldingStateDescriptor<Tuple2<Integer, Long>, String> foldingState = new FoldingStateDescriptor<>("any", "0", new SumFold(), StringSerializer.INSTANCE);
        QueryableStateStream<Integer, String> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("pumba", foldingState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Now query
        String expected = Integer.toString(numElements * (numElements + 1) / 2);
        for (int key = 0; key < NUM_SLOTS; key++) {
            final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
            boolean success = false;
            while (deadline.hasTimeLeft() && !success) {
                Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, false);
                byte[] serializedValue = Await.result(future, deadline.timeLeft());
                String value = KvStateRequestSerializer.deserializeValue(serializedValue, queryableState.getValueSerializer());
                if (expected.equals(value)) {
                    success = true;
                } else {
                    // Retry
                    Thread.sleep(50);
                }
            }
            assertTrue("Did not succeed query", success);
        }
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) FoldingStateDescriptor(org.apache.flink.api.common.state.FoldingStateDescriptor) KeySelector(org.apache.flink.api.java.functions.KeySelector) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 2 with QueryableStateClient

use of org.apache.flink.runtime.query.QueryableStateClient in project flink by apache.

the class AbstractQueryableStateITCase method testReducingState.

/**
	 * Tests simple reducing state queryable state instance. Each source emits
	 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then
	 * queried. The reducing state instance sums these up. The test succeeds
	 * after each subtask index is queried with result n*(n+1)/2.
	 */
@Test
public void testReducingState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Reducing state
        ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any", new SumReduce(), source.getType());
        QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("jungle", reducingState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Wait until job is running
        // Now query
        long expected = numElements * (numElements + 1) / 2;
        executeValueQuery(deadline, client, jobId, queryableState, expected);
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 3 with QueryableStateClient

use of org.apache.flink.runtime.query.QueryableStateClient in project flink by apache.

the class AbstractQueryableStateITCase method testQueryNonStartedJobState.

/**
	 * Similar tests as {@link #testValueState()} but before submitting the
	 * job, we already issue one request which fails.
	 */
@Test
public void testQueryNonStartedJobState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Value state
        ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), null);
        QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState("hakuna", valueState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        // Now query
        long expected = numElements;
        // query once
        client.getKvState(jobId, queryableState.getQueryableStateName(), 0, KvStateRequestSerializer.serializeKeyAndNamespace(0, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE));
        cluster.submitJobDetached(jobGraph);
        executeValueQuery(deadline, client, jobId, queryableState, expected);
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with QueryableStateClient

use of org.apache.flink.runtime.query.QueryableStateClient in project flink by apache.

the class AbstractQueryableStateITCase method testValueStateDefault.

/**
	 * Tests simple value state queryable state instance with a default value
	 * set. Each source emits (subtaskIndex, 0)..(subtaskIndex, numElements)
	 * tuples, the key is mapped to 1 but key 0 is queried which should throw
	 * a {@link UnknownKeyOrNamespace} exception.
	 *
	 * @throws UnknownKeyOrNamespace thrown due querying a non-existent key
	 */
@Test(expected = UnknownKeyOrNamespace.class)
public void testValueStateDefault() throws Exception, UnknownKeyOrNamespace {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numElements = 1024;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
        // Value state
        ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), Tuple2.of(0, 1337l));
        // only expose key "1"
        QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return 1;
            }
        }).asQueryableState("hakuna", valueState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        cluster.submitJobDetached(jobGraph);
        // Now query
        int key = 0;
        final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
        Future<byte[]> future = getKvStateWithRetries(client, jobId, queryableState.getQueryableStateName(), key, serializedKey, QUERY_RETRY_DELAY, true);
        Await.result(future, deadline.timeLeft());
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : Deadline(scala.concurrent.duration.Deadline) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 5 with QueryableStateClient

use of org.apache.flink.runtime.query.QueryableStateClient in project flink by apache.

the class AbstractQueryableStateITCase method testQueryableState.

/**
	 * Runs a simple topology producing random (key, 1) pairs at the sources (where
	 * number of keys is in fixed in range 0...numKeys). The records are keyed and
	 * a reducing queryable state instance is created, which sums up the records.
	 *
	 * After submitting the job in detached mode, the QueryableStateCLient is used
	 * to query the counts of each key in rounds until all keys have non-zero counts.
	 */
@Test
@SuppressWarnings("unchecked")
public void testQueryableState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numKeys = 256;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        //
        // Test program
        //
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
        // Reducing state
        ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
        final String queryName = "hakuna-matata";
        final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState(queryName, reducingState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        cluster.submitJobDetached(jobGraph);
        //
        // Start querying
        //
        jobId = jobGraph.getJobID();
        final AtomicLongArray counts = new AtomicLongArray(numKeys);
        boolean allNonZero = false;
        while (!allNonZero && deadline.hasTimeLeft()) {
            allNonZero = true;
            final List<Future<byte[]>> futures = new ArrayList<>(numKeys);
            for (int i = 0; i < numKeys; i++) {
                final int key = i;
                if (counts.get(key) > 0) {
                    // Skip this one
                    continue;
                } else {
                    allNonZero = false;
                }
                final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
                Future<byte[]> serializedResult = getKvStateWithRetries(client, jobId, queryName, key, serializedKey, QUERY_RETRY_DELAY, false);
                serializedResult.onSuccess(new OnSuccess<byte[]>() {

                    @Override
                    public void onSuccess(byte[] result) throws Throwable {
                        Tuple2<Integer, Long> value = KvStateRequestSerializer.deserializeValue(result, queryableState.getValueSerializer());
                        counts.set(key, value.f1);
                        assertEquals("Key mismatch", key, value.f0.intValue());
                    }
                }, TEST_ACTOR_SYSTEM.dispatcher());
                futures.add(serializedResult);
            }
            Future<Iterable<byte[]>> futureSequence = Futures.sequence(futures, TEST_ACTOR_SYSTEM.dispatcher());
            Await.ready(futureSequence, deadline.timeLeft());
        }
        assertTrue("Not all keys are non-zero", allNonZero);
        // All should be non-zero
        for (int i = 0; i < numKeys; i++) {
            long count = counts.get(i);
            assertTrue("Count at position " + i + " is " + count, count > 0);
        }
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : ArrayList(java.util.ArrayList) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) Deadline(scala.concurrent.duration.Deadline) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicLongArray(java.util.concurrent.atomic.AtomicLongArray) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) Future(scala.concurrent.Future) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

AtomicLong (java.util.concurrent.atomic.AtomicLong)7 JobID (org.apache.flink.api.common.JobID)7 KeySelector (org.apache.flink.api.java.functions.KeySelector)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)7 CancellationSuccess (org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess)7 QueryableStateClient (org.apache.flink.runtime.query.QueryableStateClient)7 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)7 Test (org.junit.Test)7 Deadline (scala.concurrent.duration.Deadline)7 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)3 ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)2 ArrayList (java.util.ArrayList)1 AtomicLongArray (java.util.concurrent.atomic.AtomicLongArray)1 FoldingStateDescriptor (org.apache.flink.api.common.state.FoldingStateDescriptor)1 Future (scala.concurrent.Future)1