Search in sources :

Example 86 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class AbstractQueryableStateTestBase method testReducingState.

/**
 * Tests simple reducing state queryable state instance. Each source emits (subtaskIndex,
 * 0)..(subtaskIndex, numElements) tuples, which are then queried. The reducing state instance
 * sums these up. The test succeeds after each subtask index is queried with result n*(n+1)/2.
 */
@Test
public void testReducingState() throws Exception {
    final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
    final long numElements = 1024L;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStateBackend(stateBackend);
    env.setParallelism(maxParallelism);
    // Very important, because cluster is shared between tests and we
    // don't explicitly check that all slots are available before
    // submitting.
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
    DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
    ReducingStateDescriptor<Tuple2<Integer, Long>> reducingState = new ReducingStateDescriptor<>("any", new SumReduce(), source.getType());
    source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

        private static final long serialVersionUID = 8470749712274833552L;

        @Override
        public Integer getKey(Tuple2<Integer, Long> value) {
            return value.f0;
        }
    }).asQueryableState("jungle", reducingState);
    try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
        final JobID jobId = autoCancellableJob.getJobId();
        final JobGraph jobGraph = autoCancellableJob.getJobGraph();
        clusterClient.submitJob(jobGraph).get();
        final long expected = numElements * (numElements + 1L) / 2L;
        for (int key = 0; key < maxParallelism; key++) {
            boolean success = false;
            while (deadline.hasTimeLeft() && !success) {
                CompletableFuture<ReducingState<Tuple2<Integer, Long>>> future = getKvState(deadline, client, jobId, "jungle", key, BasicTypeInfo.INT_TYPE_INFO, reducingState, false, executor);
                Tuple2<Integer, Long> value = future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS).get();
                assertEquals("Key mismatch", key, value.f0.intValue());
                if (expected == value.f1) {
                    success = true;
                } else {
                    // Retry
                    Thread.sleep(RETRY_TIMEOUT);
                }
            }
            assertTrue("Did not succeed query", success);
        }
    }
}
Also used : ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) Deadline(org.apache.flink.api.common.time.Deadline) KeySelector(org.apache.flink.api.java.functions.KeySelector) ReducingState(org.apache.flink.api.common.state.ReducingState) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 87 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class AbstractQueryableStateTestBase method testValueStateDefault.

/**
 * Tests simple value state queryable state instance with a default value set. Each source emits
 * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, the key is mapped to 1 but key 0 is
 * queried which should throw a {@link UnknownKeyOrNamespaceException} exception.
 *
 * @throws UnknownKeyOrNamespaceException thrown due querying a non-existent key
 */
@Test(expected = UnknownKeyOrNamespaceException.class)
public void testValueStateDefault() throws Throwable {
    final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
    final long numElements = 1024L;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStateBackend(stateBackend);
    env.setParallelism(maxParallelism);
    // Very important, because cluster is shared between tests and we
    // don't explicitly check that all slots are available before
    // submitting.
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
    DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
    ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), Tuple2.of(0, 1337L));
    // only expose key "1"
    QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

        private static final long serialVersionUID = 4509274556892655887L;

        @Override
        public Integer getKey(Tuple2<Integer, Long> value) {
            return 1;
        }
    }).asQueryableState("hakuna", valueState);
    try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
        final JobID jobId = autoCancellableJob.getJobId();
        final JobGraph jobGraph = autoCancellableJob.getJobGraph();
        clusterClient.submitJob(jobGraph).get();
        // Now query
        int key = 0;
        CompletableFuture<ValueState<Tuple2<Integer, Long>>> future = getKvState(deadline, client, jobId, queryableState.getQueryableStateName(), key, BasicTypeInfo.INT_TYPE_INFO, valueState, true, executor);
        try {
            future.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS);
        } catch (ExecutionException | CompletionException e) {
            // exception in an ExecutionException.
            throw e.getCause();
        }
    }
}
Also used : Deadline(org.apache.flink.api.common.time.Deadline) KeySelector(org.apache.flink.api.java.functions.KeySelector) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) ValueState(org.apache.flink.api.common.state.ValueState) Tuple2(org.apache.flink.api.java.tuple.Tuple2) CompletionException(java.util.concurrent.CompletionException) AtomicLong(java.util.concurrent.atomic.AtomicLong) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ExecutionException(java.util.concurrent.ExecutionException) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 88 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class CoGroupOperatorTest method testCoGroupKeySelectors1.

@Test
public void testCoGroupKeySelectors1() {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<CustomType> ds1 = env.fromCollection(customTypeData);
    DataSet<CustomType> ds2 = env.fromCollection(customTypeData);
    // should work
    try {
        ds1.coGroup(ds2).where(new KeySelector<CustomType, Long>() {

            @Override
            public Long getKey(CustomType value) {
                return value.myLong;
            }
        }).equalTo(new KeySelector<CustomType, Long>() {

            @Override
            public Long getKey(CustomType value) {
                return value.myLong;
            }
        });
    } catch (Exception e) {
        Assert.fail();
    }
}
Also used : CustomType(org.apache.flink.api.java.operator.JoinOperatorTest.CustomType) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Test(org.junit.Test)

Example 89 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class GroupingTest method testGroupSortByKeySelector1.

@SuppressWarnings("serial")
@Test
public void testGroupSortByKeySelector1() {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<Tuple4<Integer, Long, CustomType, Long[]>> tupleDs = env.fromCollection(tupleWithCustomData, tupleWithCustomInfo);
    // should not work
    tupleDs.groupBy(new KeySelector<Tuple4<Integer, Long, CustomType, Long[]>, Long>() {

        @Override
        public Long getKey(Tuple4<Integer, Long, CustomType, Long[]> value) throws Exception {
            return value.f1;
        }
    }).sortGroup(new KeySelector<Tuple4<Integer, Long, CustomType, Long[]>, Integer>() {

        @Override
        public Integer getKey(Tuple4<Integer, Long, CustomType, Long[]> value) throws Exception {
            return value.f0;
        }
    }, Order.ASCENDING);
}
Also used : Tuple4(org.apache.flink.api.java.tuple.Tuple4) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Test(org.junit.Test)

Example 90 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class CoGroupOperatorTest method testCoGroupKeyMixing1.

@Test
public void testCoGroupKeyMixing1() {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<CustomType> ds1 = env.fromCollection(customTypeData);
    DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);
    // should work
    try {
        ds1.coGroup(ds2).where(new KeySelector<CustomType, Long>() {

            @Override
            public Long getKey(CustomType value) {
                return value.myLong;
            }
        }).equalTo(3);
    } catch (Exception e) {
        Assert.fail();
    }
}
Also used : CustomType(org.apache.flink.api.java.operator.JoinOperatorTest.CustomType) Tuple5(org.apache.flink.api.java.tuple.Tuple5) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) Test(org.junit.Test)

Aggregations

KeySelector (org.apache.flink.api.java.functions.KeySelector)120 Test (org.junit.Test)113 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)45 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)44 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)39 Watermark (org.apache.flink.streaming.api.watermark.Watermark)30 List (java.util.List)29 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)28 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)22 JobID (org.apache.flink.api.common.JobID)22 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)22 IOException (java.io.IOException)21 Arrays (java.util.Arrays)21 AtomicLong (java.util.concurrent.atomic.AtomicLong)21 Configuration (org.apache.flink.configuration.Configuration)21 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)21 ArrayList (java.util.ArrayList)18 Map (java.util.Map)18 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)18 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)16