Search in sources :

Example 36 with ReducingStateDescriptor

use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.

the class AbstractQueryableStateITCase method testQueryableState.

/**
	 * Runs a simple topology producing random (key, 1) pairs at the sources (where
	 * number of keys is in fixed in range 0...numKeys). The records are keyed and
	 * a reducing queryable state instance is created, which sums up the records.
	 *
	 * After submitting the job in detached mode, the QueryableStateCLient is used
	 * to query the counts of each key in rounds until all keys have non-zero counts.
	 */
@Test
@SuppressWarnings("unchecked")
public void testQueryableState() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numKeys = 256;
    final QueryableStateClient client = new QueryableStateClient(cluster.configuration());
    JobID jobId = null;
    try {
        //
        // Test program
        //
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
        // Reducing state
        ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
        final String queryName = "hakuna-matata";
        final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState(queryName, reducingState);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        cluster.submitJobDetached(jobGraph);
        //
        // Start querying
        //
        jobId = jobGraph.getJobID();
        final AtomicLongArray counts = new AtomicLongArray(numKeys);
        boolean allNonZero = false;
        while (!allNonZero && deadline.hasTimeLeft()) {
            allNonZero = true;
            final List<Future<byte[]>> futures = new ArrayList<>(numKeys);
            for (int i = 0; i < numKeys; i++) {
                final int key = i;
                if (counts.get(key) > 0) {
                    // Skip this one
                    continue;
                } else {
                    allNonZero = false;
                }
                final byte[] serializedKey = KvStateRequestSerializer.serializeKeyAndNamespace(key, queryableState.getKeySerializer(), VoidNamespace.INSTANCE, VoidNamespaceSerializer.INSTANCE);
                Future<byte[]> serializedResult = getKvStateWithRetries(client, jobId, queryName, key, serializedKey, QUERY_RETRY_DELAY, false);
                serializedResult.onSuccess(new OnSuccess<byte[]>() {

                    @Override
                    public void onSuccess(byte[] result) throws Throwable {
                        Tuple2<Integer, Long> value = KvStateRequestSerializer.deserializeValue(result, queryableState.getValueSerializer());
                        counts.set(key, value.f1);
                        assertEquals("Key mismatch", key, value.f0.intValue());
                    }
                }, TEST_ACTOR_SYSTEM.dispatcher());
                futures.add(serializedResult);
            }
            Future<Iterable<byte[]>> futureSequence = Futures.sequence(futures, TEST_ACTOR_SYSTEM.dispatcher());
            Await.ready(futureSequence, deadline.timeLeft());
        }
        assertTrue("Not all keys are non-zero", allNonZero);
        // All should be non-zero
        for (int i = 0; i < numKeys; i++) {
            long count = counts.get(i);
            assertTrue("Count at position " + i + " is " + count, count > 0);
        }
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
        client.shutDown();
    }
}
Also used : ArrayList(java.util.ArrayList) QueryableStateClient(org.apache.flink.runtime.query.QueryableStateClient) KeySelector(org.apache.flink.api.java.functions.KeySelector) ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) Deadline(scala.concurrent.duration.Deadline) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicLongArray(java.util.concurrent.atomic.AtomicLongArray) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) Future(scala.concurrent.Future) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 37 with ReducingStateDescriptor

use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.

the class AbstractQueryableStateITCase method testDuplicateRegistrationFailsJob.

/**
	 * Tests that duplicate query registrations fail the job at the JobManager.
	 */
@Test
public void testDuplicateRegistrationFailsJob() throws Exception {
    // Config
    final Deadline deadline = TEST_TIMEOUT.fromNow();
    final int numKeys = 256;
    JobID jobId = null;
    try {
        //
        // Test program
        //
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStateBackend(stateBackend);
        env.setParallelism(NUM_SLOTS);
        // Very important, because cluster is shared between tests and we
        // don't explicitly check that all slots are available before
        // submitting.
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000));
        DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
        // Reducing state
        ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
        final String queryName = "duplicate-me";
        final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState(queryName, reducingState);
        final QueryableStateStream<Integer, Tuple2<Integer, Long>> duplicate = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

            @Override
            public Integer getKey(Tuple2<Integer, Long> value) throws Exception {
                return value.f0;
            }
        }).asQueryableState(queryName);
        // Submit the job graph
        JobGraph jobGraph = env.getStreamGraph().getJobGraph();
        jobId = jobGraph.getJobID();
        Future<JobStatusIs> failedFuture = cluster.getLeaderGateway(deadline.timeLeft()).ask(new NotifyWhenJobStatus(jobId, JobStatus.FAILED), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobStatusIs>apply(JobStatusIs.class));
        cluster.submitJobDetached(jobGraph);
        JobStatusIs jobStatus = Await.result(failedFuture, deadline.timeLeft());
        assertEquals(JobStatus.FAILED, jobStatus.state());
        // Get the job and check the cause
        JobFound jobFound = Await.result(cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.RequestJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<JobFound>apply(JobFound.class)), deadline.timeLeft());
        String failureCause = jobFound.executionGraph().getFailureCauseAsString();
        assertTrue("Not instance of SuppressRestartsException", failureCause.startsWith("org.apache.flink.runtime.execution.SuppressRestartsException"));
        int causedByIndex = failureCause.indexOf("Caused by: ");
        String subFailureCause = failureCause.substring(causedByIndex + "Caused by: ".length());
        assertTrue("Not caused by IllegalStateException", subFailureCause.startsWith("java.lang.IllegalStateException"));
        assertTrue("Exception does not contain registration name", subFailureCause.contains(queryName));
    } finally {
        // Free cluster resources
        if (jobId != null) {
            Future<CancellationSuccess> cancellation = cluster.getLeaderGateway(deadline.timeLeft()).ask(new JobManagerMessages.CancelJob(jobId), deadline.timeLeft()).mapTo(ClassTag$.MODULE$.<CancellationSuccess>apply(CancellationSuccess.class));
            Await.ready(cancellation, deadline.timeLeft());
        }
    }
}
Also used : KeySelector(org.apache.flink.api.java.functions.KeySelector) JobFound(org.apache.flink.runtime.messages.JobManagerMessages.JobFound) ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) JobStatusIs(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.JobStatusIs) Deadline(scala.concurrent.duration.Deadline) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) CancellationSuccess(org.apache.flink.runtime.messages.JobManagerMessages.CancellationSuccess) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) NotifyWhenJobStatus(org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.NotifyWhenJobStatus) Test(org.junit.Test)

Example 38 with ReducingStateDescriptor

use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.

the class WindowOperatorMigrationTest method testRestoreReducingEventTimeWindowsFromFlink11.

@Test
@SuppressWarnings("unchecked")
public void testRestoreReducingEventTimeWindowsFromFlink11() throws Exception {
    final int WINDOW_SIZE = 3;
    TypeInformation<Tuple2<String, Integer>> inputType = TypeInfoParser.parse("Tuple2<String, Integer>");
    ReducingStateDescriptor<Tuple2<String, Integer>> stateDesc = new ReducingStateDescriptor<>("window-contents", new SumReducer(), inputType.createSerializer(new ExecutionConfig()));
    WindowOperator<String, Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>, TimeWindow> operator = new WindowOperator<>(TumblingEventTimeWindows.of(Time.of(WINDOW_SIZE, TimeUnit.SECONDS)), new TimeWindow.Serializer(), new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO.createSerializer(new ExecutionConfig()), stateDesc, new InternalSingleValueWindowFunction<>(new PassThroughWindowFunction<String, TimeWindow, Tuple2<String, Integer>>()), EventTimeTrigger.create(), 0, null);
    ConcurrentLinkedQueue<Object> expectedOutput = new ConcurrentLinkedQueue<>();
    /*

		operator.setInputType(TypeInfoParser.<Tuple2<String, Integer>>parse("Tuple2<String, Integer>"), new ExecutionConfig());

		OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Tuple2<String, Integer>> testHarness =
				new OneInputStreamOperatorTestHarness<>(operator);

		testHarness.configureForKeyedStream(new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);

		testHarness.setup();
		testHarness.open();

		// add elements out-of-order
		testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 3999));
		testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 3000));

		testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 20));
		testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 0));
		testHarness.processElement(new StreamRecord<>(new Tuple2<>("key1", 1), 999));

		testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1998));
		testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1999));
		testHarness.processElement(new StreamRecord<>(new Tuple2<>("key2", 1), 1000));

		testHarness.processWatermark(new Watermark(999));
		expectedOutput.add(new Watermark(999));
		TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput(), new Tuple2ResultSortComparator());

		testHarness.processWatermark(new Watermark(1999));
		expectedOutput.add(new Watermark(1999));
		TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput(), new Tuple2ResultSortComparator());

		// do snapshot and save to file
		StreamTaskState snapshot = testHarness.snapshot(0L, 0L);
		testHarness.snaphotToFile(snapshot, "src/test/resources/win-op-migration-test-reduce-event-time-flink1.1-snapshot");
		testHarness.close();

		*/
    OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Tuple2<String, Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, new TupleKeySelector(), BasicTypeInfo.STRING_TYPE_INFO);
    testHarness.setup();
    testHarness.initializeStateFromLegacyCheckpoint(getResourceFilename("win-op-migration-test-reduce-event-time-flink1.1-snapshot"));
    testHarness.open();
    testHarness.processWatermark(new Watermark(2999));
    expectedOutput.add(new StreamRecord<>(new Tuple2<>("key1", 3), 2999));
    expectedOutput.add(new StreamRecord<>(new Tuple2<>("key2", 3), 2999));
    expectedOutput.add(new Watermark(2999));
    testHarness.processWatermark(new Watermark(3999));
    expectedOutput.add(new Watermark(3999));
    testHarness.processWatermark(new Watermark(4999));
    expectedOutput.add(new Watermark(4999));
    testHarness.processWatermark(new Watermark(5999));
    expectedOutput.add(new StreamRecord<>(new Tuple2<>("key2", 2), 5999));
    expectedOutput.add(new Watermark(5999));
    TestHarnessUtil.assertOutputEqualsSorted("Output was not correct.", expectedOutput, testHarness.getOutput(), new Tuple2ResultSortComparator());
    testHarness.close();
}
Also used : ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TimeWindow(org.apache.flink.streaming.api.windowing.windows.TimeWindow) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) PassThroughWindowFunction(org.apache.flink.streaming.api.functions.windowing.PassThroughWindowFunction) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 39 with ReducingStateDescriptor

use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.

the class AbstractQueryableStateTestBase method testDuplicateRegistrationFailsJob.

/**
 * Tests that duplicate query registrations fail the job at the JobManager.
 */
@Test(timeout = 60_000)
public void testDuplicateRegistrationFailsJob() throws Exception {
    final int numKeys = 256;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStateBackend(stateBackend);
    env.setParallelism(maxParallelism);
    // Very important, because cluster is shared between tests and we
    // don't explicitly check that all slots are available before
    // submitting.
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
    DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
    // Reducing state
    ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
    final String queryName = "duplicate-me";
    final QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

        private static final long serialVersionUID = -4126824763829132959L;

        @Override
        public Integer getKey(Tuple2<Integer, Long> value) {
            return value.f0;
        }
    }).asQueryableState(queryName, reducingState);
    final QueryableStateStream<Integer, Tuple2<Integer, Long>> duplicate = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

        private static final long serialVersionUID = -6265024000462809436L;

        @Override
        public Integer getKey(Tuple2<Integer, Long> value) {
            return value.f0;
        }
    }).asQueryableState(queryName);
    // Submit the job graph
    final JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    clusterClient.submitJob(jobGraph).thenCompose(clusterClient::requestJobResult).thenApply(JobResult::getSerializedThrowable).thenAccept(serializedThrowable -> {
        assertTrue(serializedThrowable.isPresent());
        final Throwable t = serializedThrowable.get().deserializeError(getClass().getClassLoader());
        final String failureCause = ExceptionUtils.stringifyException(t);
        assertThat(failureCause, containsString("KvState with name '" + queryName + "' has already been registered by another operator"));
    }).get();
}
Also used : Deadline(org.apache.flink.api.common.time.Deadline) Arrays(java.util.Arrays) ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) Tuple2(org.apache.flink.api.java.tuple.Tuple2) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ClassLoaderUtils(org.apache.flink.testutils.ClassLoaderUtils) ExceptionUtils(org.apache.flink.util.ExceptionUtils) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) AggregateFunction(org.apache.flink.api.common.functions.AggregateFunction) StateBackend(org.apache.flink.runtime.state.StateBackend) URLClassLoader(java.net.URLClassLoader) AggregatingState(org.apache.flink.api.common.state.AggregatingState) CheckpointListener(org.apache.flink.api.common.state.CheckpointListener) ReducingState(org.apache.flink.api.common.state.ReducingState) QueryableStateStream(org.apache.flink.streaming.api.datastream.QueryableStateStream) Duration(java.time.Duration) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Assert.fail(org.junit.Assert.fail) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) ClassRule(org.junit.ClassRule) State(org.apache.flink.api.common.state.State) KeySelector(org.apache.flink.api.java.functions.KeySelector) ScheduledExecutor(org.apache.flink.util.concurrent.ScheduledExecutor) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) CancellationException(java.util.concurrent.CancellationException) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Preconditions(org.apache.flink.util.Preconditions) Executors(java.util.concurrent.Executors) Serializable(java.io.Serializable) TestingUtils(org.apache.flink.testutils.TestingUtils) VoidNamespaceSerializer(org.apache.flink.queryablestate.client.VoidNamespaceSerializer) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) ClusterClient(org.apache.flink.client.program.ClusterClient) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) VoidNamespace(org.apache.flink.queryablestate.client.VoidNamespace) Time(org.apache.flink.api.common.time.Time) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) AtomicLongArray(java.util.concurrent.atomic.AtomicLongArray) ScheduledExecutorServiceAdapter(org.apache.flink.util.concurrent.ScheduledExecutorServiceAdapter) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) JobStatus(org.apache.flink.api.common.JobStatus) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) JobResult(org.apache.flink.runtime.jobmaster.JobResult) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FutureUtils(org.apache.flink.util.concurrent.FutureUtils) Collector(org.apache.flink.util.Collector) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) RichParallelSourceFunction(org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ProcessFunction(org.apache.flink.streaming.api.functions.ProcessFunction) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) AggregatingStateDescriptor(org.apache.flink.api.common.state.AggregatingStateDescriptor) Before(org.junit.Before) Serializer(com.esotericsoftware.kryo.Serializer) StateDescriptor(org.apache.flink.api.common.state.StateDescriptor) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) IOException(java.io.IOException) UnknownKeyOrNamespaceException(org.apache.flink.queryablestate.exceptions.UnknownKeyOrNamespaceException) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) JobID(org.apache.flink.api.common.JobID) Ignore(org.junit.Ignore) MapState(org.apache.flink.api.common.state.MapState) Assert(org.junit.Assert) QueryableStateClient(org.apache.flink.queryablestate.client.QueryableStateClient) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) JobResult(org.apache.flink.runtime.jobmaster.JobResult) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) KeySelector(org.apache.flink.api.java.functions.KeySelector) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 40 with ReducingStateDescriptor

use of org.apache.flink.api.common.state.ReducingStateDescriptor in project flink by apache.

the class AbstractQueryableStateTestBase method testQueryableState.

/**
 * Runs a simple topology producing random (key, 1) pairs at the sources (where number of keys
 * is in fixed in range 0...numKeys). The records are keyed and a reducing queryable state
 * instance is created, which sums up the records.
 *
 * <p>After submitting the job in detached mode, the QueryableStateCLient is used to query the
 * counts of each key in rounds until all keys have non-zero counts.
 */
@Test
public void testQueryableState() throws Exception {
    final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
    final int numKeys = 256;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStateBackend(stateBackend);
    env.setParallelism(maxParallelism);
    // Very important, because cluster is shared between tests and we
    // don't explicitly check that all slots are available before
    // submitting.
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
    DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestKeyRangeSource(numKeys));
    ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any-name", new SumReduce(), source.getType());
    final String queryName = "hakuna-matata";
    source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

        private static final long serialVersionUID = 7143749578983540352L;

        @Override
        public Integer getKey(Tuple2<Integer, Long> value) {
            return value.f0;
        }
    }).asQueryableState(queryName, reducingState);
    try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
        final JobID jobId = autoCancellableJob.getJobId();
        final JobGraph jobGraph = autoCancellableJob.getJobGraph();
        clusterClient.submitJob(jobGraph).get();
        final AtomicLongArray counts = new AtomicLongArray(numKeys);
        final List<CompletableFuture<ReducingState<Tuple2<Integer, Long>>>> futures = new ArrayList<>(numKeys);
        boolean allNonZero = false;
        while (!allNonZero && deadline.hasTimeLeft()) {
            allNonZero = true;
            futures.clear();
            for (int i = 0; i < numKeys; i++) {
                final int key = i;
                if (counts.get(key) > 0L) {
                    // Skip this one
                    continue;
                } else {
                    allNonZero = false;
                }
                CompletableFuture<ReducingState<Tuple2<Integer, Long>>> result = getKvState(deadline, client, jobId, queryName, key, BasicTypeInfo.INT_TYPE_INFO, reducingState, false, executor);
                result.thenAccept(response -> {
                    try {
                        Tuple2<Integer, Long> res = response.get();
                        counts.set(key, res.f1);
                        assertEquals("Key mismatch", key, res.f0.intValue());
                    } catch (Exception e) {
                        Assert.fail(e.getMessage());
                    }
                });
                futures.add(result);
            }
            // wait for all the futures to complete
            CompletableFuture.allOf(futures.toArray(new CompletableFuture<?>[futures.size()])).get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        }
        assertTrue("Not all keys are non-zero", allNonZero);
        // All should be non-zero
        for (int i = 0; i < numKeys; i++) {
            long count = counts.get(i);
            assertTrue("Count at position " + i + " is " + count, count > 0);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) KeySelector(org.apache.flink.api.java.functions.KeySelector) CompletableFuture(java.util.concurrent.CompletableFuture) ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) Deadline(org.apache.flink.api.common.time.Deadline) CancellationException(java.util.concurrent.CancellationException) CompletionException(java.util.concurrent.CompletionException) IOException(java.io.IOException) UnknownKeyOrNamespaceException(org.apache.flink.queryablestate.exceptions.UnknownKeyOrNamespaceException) ExecutionException(java.util.concurrent.ExecutionException) ReducingState(org.apache.flink.api.common.state.ReducingState) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicLongArray(java.util.concurrent.atomic.AtomicLongArray) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

ReducingStateDescriptor (org.apache.flink.api.common.state.ReducingStateDescriptor)67 Test (org.junit.Test)60 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)51 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)38 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)35 TypeHint (org.apache.flink.api.common.typeinfo.TypeHint)27 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)26 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)23 Watermark (org.apache.flink.streaming.api.watermark.Watermark)21 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)19 PassThroughWindowFunction (org.apache.flink.streaming.api.functions.windowing.PassThroughWindowFunction)19 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)17 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)17 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)14 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)10 EventTimeTrigger (org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger)9 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)8 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)7 OperatorSubtaskState (org.apache.flink.runtime.checkpoint.OperatorSubtaskState)7 AtomicLong (java.util.concurrent.atomic.AtomicLong)6