Search in sources :

Example 46 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class JoinITCase method testDefaultJoinOnTwoCustomTypeInputsWithInnerClassKeyExtractorsClosureCleaner.

@Test
public void testDefaultJoinOnTwoCustomTypeInputsWithInnerClassKeyExtractorsClosureCleaner() throws Exception {
    /*
         * (Default) Join on two custom type inputs with key extractors, implemented as inner classes to test closure
         * cleaning
         */
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<CustomType> ds1 = CollectionDataSets.getCustomTypeDataSet(env);
    DataSet<CustomType> ds2 = CollectionDataSets.getSmallCustomTypeDataSet(env);
    DataSet<Tuple2<CustomType, CustomType>> joinDs = ds1.join(ds2).where(new KeySelector<CustomType, Integer>() {

        @Override
        public Integer getKey(CustomType value) {
            return value.myInt;
        }
    }).equalTo(new KeySelector<CustomType, Integer>() {

        @Override
        public Integer getKey(CustomType value) throws Exception {
            return value.myInt;
        }
    });
    List<Tuple2<CustomType, CustomType>> result = joinDs.collect();
    String expected = "1,0,Hi,1,0,Hi\n" + "2,1,Hello,2,1,Hello\n" + "2,1,Hello,2,2,Hello world\n" + "2,2,Hello world,2,1,Hello\n" + "2,2,Hello world,2,2,Hello world\n";
    compareResultAsTuples(result, expected);
}
Also used : CustomType(org.apache.flink.test.operators.util.CollectionDataSets.CustomType) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Tuple2(org.apache.flink.api.java.tuple.Tuple2) KeySelector(org.apache.flink.api.java.functions.KeySelector) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) IOException(java.io.IOException) Test(org.junit.Test)

Example 47 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class DataStreamTest method testPOJOnoHashCodeKeyRejection.

@Test
public void testPOJOnoHashCodeKeyRejection() {
    KeySelector<POJOWithoutHashCode, POJOWithoutHashCode> keySelector = new KeySelector<POJOWithoutHashCode, POJOWithoutHashCode>() {

        @Override
        public POJOWithoutHashCode getKey(POJOWithoutHashCode value) throws Exception {
            return value;
        }
    };
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<POJOWithoutHashCode> input = env.fromElements(new POJOWithoutHashCode(new int[] { 1, 2 }));
    // adjust the rule
    expectedException.expect(InvalidProgramException.class);
    input.keyBy(keySelector);
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) Test(org.junit.Test)

Example 48 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class DataStreamTest method testPOJOWithNestedArrayAndHashCodeWorkAround.

@Test
public void testPOJOWithNestedArrayAndHashCodeWorkAround() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<POJOWithHashCode> input = env.fromElements(new POJOWithHashCode(new int[] { 1, 2 }));
    input.keyBy(new KeySelector<POJOWithHashCode, POJOWithHashCode>() {

        @Override
        public POJOWithHashCode getKey(POJOWithHashCode value) throws Exception {
            return value;
        }
    }).addSink(new SinkFunction<POJOWithHashCode>() {

        @Override
        public void invoke(POJOWithHashCode value) throws Exception {
            Assert.assertEquals(value.getId(), new int[] { 1, 2 });
        }
    });
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ExpectedException(org.junit.rules.ExpectedException) Test(org.junit.Test)

Example 49 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class DataStreamTest method sinkKeyTest.

@Test
public void sinkKeyTest() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSink<Long> sink = env.generateSequence(1, 100).print();
    assertEquals(0, getStreamGraph(env).getStreamNode(sink.getTransformation().getId()).getStatePartitioners().length);
    assertTrue(getStreamGraph(env).getStreamNode(sink.getTransformation().getId()).getInEdges().get(0).getPartitioner() instanceof ForwardPartitioner);
    KeySelector<Long, Long> key1 = new KeySelector<Long, Long>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Long getKey(Long value) throws Exception {
            return (long) 0;
        }
    };
    DataStreamSink<Long> sink2 = env.generateSequence(1, 100).keyBy(key1).print();
    assertEquals(1, getStreamGraph(env).getStreamNode(sink2.getTransformation().getId()).getStatePartitioners().length);
    assertNotNull(getStreamGraph(env).getStreamNode(sink2.getTransformation().getId()).getStateKeySerializer());
    assertNotNull(getStreamGraph(env).getStreamNode(sink2.getTransformation().getId()).getStateKeySerializer());
    assertEquals(key1, getStreamGraph(env).getStreamNode(sink2.getTransformation().getId()).getStatePartitioners()[0]);
    assertTrue(getStreamGraph(env).getStreamNode(sink2.getTransformation().getId()).getInEdges().get(0).getPartitioner() instanceof KeyGroupStreamPartitioner);
    KeySelector<Long, Long> key2 = new KeySelector<Long, Long>() {

        private static final long serialVersionUID = 1L;

        @Override
        public Long getKey(Long value) throws Exception {
            return (long) 0;
        }
    };
    DataStreamSink<Long> sink3 = env.generateSequence(1, 100).keyBy(key2).print();
    assertEquals(1, getStreamGraph(env).getStreamNode(sink3.getTransformation().getId()).getStatePartitioners().length);
    assertEquals(key2, getStreamGraph(env).getStreamNode(sink3.getTransformation().getId()).getStatePartitioners()[0]);
    assertTrue(getStreamGraph(env).getStreamNode(sink3.getTransformation().getId()).getInEdges().get(0).getPartitioner() instanceof KeyGroupStreamPartitioner);
}
Also used : StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) KeySelector(org.apache.flink.api.java.functions.KeySelector) KeyGroupStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.KeyGroupStreamPartitioner) Test(org.junit.Test)

Example 50 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class NotifyCheckpointAbortedITCase method testNotifyCheckpointAborted.

/**
 * Verify operators would be notified as checkpoint aborted.
 *
 * <p>The job would run with at least two checkpoints. The 1st checkpoint would fail due to add
 * checkpoint to store, and the 2nd checkpoint would decline by async checkpoint phase of
 * 'DeclineSink'.
 *
 * <p>The job graph looks like: NormalSource --> keyBy --> NormalMap --> DeclineSink
 */
@Test(timeout = TEST_TIMEOUT)
public void testNotifyCheckpointAborted() throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
    env.getCheckpointConfig().enableUnalignedCheckpoints(unalignedCheckpointEnabled);
    env.getCheckpointConfig().setTolerableCheckpointFailureNumber(1);
    env.disableOperatorChaining();
    env.setParallelism(1);
    final StateBackend failingStateBackend = new DeclineSinkFailingStateBackend(checkpointPath);
    env.setStateBackend(failingStateBackend);
    env.addSource(new NormalSource()).name("NormalSource").keyBy((KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0).transform("NormalMap", TypeInformation.of(Integer.class), new NormalMap()).transform(DECLINE_SINK_NAME, TypeInformation.of(Object.class), new DeclineSink());
    final ClusterClient<?> clusterClient = cluster.getClusterClient();
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    JobID jobID = jobGraph.getJobID();
    clusterClient.submitJob(jobGraph).get();
    TestingCompletedCheckpointStore.addCheckpointLatch.await();
    log.info("The checkpoint to abort is ready to add to checkpoint store.");
    TestingCompletedCheckpointStore.abortCheckpointLatch.trigger();
    log.info("Verifying whether all operators have been notified of checkpoint-1 aborted.");
    verifyAllOperatorsNotifyAborted();
    log.info("Verified that all operators have been notified of checkpoint-1 aborted.");
    resetAllOperatorsNotifyAbortedLatches();
    verifyAllOperatorsNotifyAbortedTimes(1);
    NormalSource.waitLatch.trigger();
    log.info("Verifying whether all operators have been notified of checkpoint-2 aborted.");
    verifyAllOperatorsNotifyAborted();
    log.info("Verified that all operators have been notified of checkpoint-2 aborted.");
    verifyAllOperatorsNotifyAbortedTimes(2);
    clusterClient.cancel(jobID).get();
    log.info("Test is verified successfully as expected.");
}
Also used : Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) EmbeddedHaServicesWithLeadershipControl(org.apache.flink.runtime.highavailability.nonha.embedded.EmbeddedHaServicesWithLeadershipControl) ASYNCHRONOUS(org.apache.flink.runtime.state.SnapshotExecutionType.ASYNCHRONOUS) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) CheckpointingMode(org.apache.flink.streaming.api.CheckpointingMode) FunctionSnapshotContext(org.apache.flink.runtime.state.FunctionSnapshotContext) SnapshotResources(org.apache.flink.runtime.state.SnapshotResources) MapFunction(org.apache.flink.api.common.functions.MapFunction) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) StateBackend(org.apache.flink.runtime.state.StateBackend) CloseableRegistry(org.apache.flink.core.fs.CloseableRegistry) PerJobCheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.PerJobCheckpointRecoveryFactory) Path(org.apache.flink.core.fs.Path) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) TestLogger(org.apache.flink.util.TestLogger) OperatorStateBackend(org.apache.flink.runtime.state.OperatorStateBackend) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) CompletedCheckpoint(org.apache.flink.runtime.checkpoint.CompletedCheckpoint) DefaultOperatorStateBackend(org.apache.flink.runtime.state.DefaultOperatorStateBackend) ClassRule(org.junit.ClassRule) SnapshotStrategyRunner(org.apache.flink.runtime.state.SnapshotStrategyRunner) Parameterized(org.junit.runners.Parameterized) KeySelector(org.apache.flink.api.java.functions.KeySelector) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) CheckpointedFunction(org.apache.flink.streaming.api.checkpoint.CheckpointedFunction) ExpectedTestException(org.apache.flink.runtime.operators.testutils.ExpectedTestException) FunctionInitializationContext(org.apache.flink.runtime.state.FunctionInitializationContext) Collection(java.util.Collection) SnapshotStrategy(org.apache.flink.runtime.state.SnapshotStrategy) CheckpointOptions(org.apache.flink.runtime.checkpoint.CheckpointOptions) FsStateBackend(org.apache.flink.runtime.state.filesystem.FsStateBackend) CheckpointingOptions(org.apache.flink.configuration.CheckpointingOptions) ValueState(org.apache.flink.api.common.state.ValueState) ClusterClient(org.apache.flink.client.program.ClusterClient) DefaultOperatorStateBackendBuilder(org.apache.flink.runtime.state.DefaultOperatorStateBackendBuilder) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Environment(org.apache.flink.runtime.execution.Environment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) OneShotLatch(org.apache.flink.core.testutils.OneShotLatch) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) BackendBuildingException(org.apache.flink.runtime.state.BackendBuildingException) SourceFunction(org.apache.flink.streaming.api.functions.source.SourceFunction) ReadableConfig(org.apache.flink.configuration.ReadableConfig) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle) StreamMap(org.apache.flink.streaming.api.operators.StreamMap) Nonnull(javax.annotation.Nonnull) StandaloneCompletedCheckpointStore(org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore) StreamSink(org.apache.flink.streaming.api.operators.StreamSink) Before(org.junit.Before) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) SnapshotResult(org.apache.flink.runtime.state.SnapshotResult) Executor(java.util.concurrent.Executor) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) CheckpointsCleaner(org.apache.flink.runtime.checkpoint.CheckpointsCleaner) Test(org.junit.Test) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) JobID(org.apache.flink.api.common.JobID) HighAvailabilityServicesFactory(org.apache.flink.runtime.highavailability.HighAvailabilityServicesFactory) CheckpointStreamFactory(org.apache.flink.runtime.state.CheckpointStreamFactory) HighAvailabilityOptions(org.apache.flink.configuration.HighAvailabilityOptions) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) StateBackend(org.apache.flink.runtime.state.StateBackend) OperatorStateBackend(org.apache.flink.runtime.state.OperatorStateBackend) DefaultOperatorStateBackend(org.apache.flink.runtime.state.DefaultOperatorStateBackend) FsStateBackend(org.apache.flink.runtime.state.filesystem.FsStateBackend) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

KeySelector (org.apache.flink.api.java.functions.KeySelector)120 Test (org.junit.Test)113 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)45 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)44 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)39 Watermark (org.apache.flink.streaming.api.watermark.Watermark)30 List (java.util.List)29 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)28 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)22 JobID (org.apache.flink.api.common.JobID)22 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)22 IOException (java.io.IOException)21 Arrays (java.util.Arrays)21 AtomicLong (java.util.concurrent.atomic.AtomicLong)21 Configuration (org.apache.flink.configuration.Configuration)21 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)21 ArrayList (java.util.ArrayList)18 Map (java.util.Map)18 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)18 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)16