Search in sources :

Example 26 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class CEPMigrationTest method writeAndOrSubtypConditionsPatternAfterMigrationSnapshot.

/**
 * Manually run this to write binary snapshot data.
 */
@Ignore
@Test
public void writeAndOrSubtypConditionsPatternAfterMigrationSnapshot() throws Exception {
    KeySelector<Event, Integer> keySelector = new KeySelector<Event, Integer>() {

        private static final long serialVersionUID = -4873366487571254798L;

        @Override
        public Integer getKey(Event value) throws Exception {
            return value.getId();
        }
    };
    final Event startEvent1 = new SubEvent(42, "start", 1.0, 6.0);
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness = new KeyedOneInputStreamOperatorTestHarness<>(CepOperatorTestUtilities.getKeyedCepOperator(false, new NFAComplexConditionsFactory()), keySelector, BasicTypeInfo.INT_TYPE_INFO);
    try {
        harness.setup();
        harness.open();
        harness.processElement(new StreamRecord<>(startEvent1, 5));
        harness.processWatermark(new Watermark(6));
        // do snapshot and save to file
        OperatorSubtaskState snapshot = harness.snapshot(0L, 0L);
        OperatorSnapshotUtil.writeStateHandle(snapshot, "src/test/resources/cep-migration-conditions-flink" + flinkGenerateSavepointVersion + "-snapshot");
    } finally {
        harness.close();
    }
}
Also used : SubEvent(org.apache.flink.cep.SubEvent) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) KeySelector(org.apache.flink.api.java.functions.KeySelector) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 27 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class UnionTranslationTest method translateUnion2Group.

@Test
public void translateUnion2Group() {
    try {
        final int parallelism = 4;
        ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
        DataSet<Tuple3<Double, StringValue, LongValue>> dataset1 = getSourceDataSet(env, 3);
        DataSet<Tuple3<Double, StringValue, LongValue>> dataset2 = getSourceDataSet(env, 2);
        dataset1.union(dataset2).groupBy((KeySelector<Tuple3<Double, StringValue, LongValue>, String>) value -> "").reduceGroup((GroupReduceFunction<Tuple3<Double, StringValue, LongValue>, String>) (values, out) -> {
        }).returns(String.class).output(new DiscardingOutputFormat<>());
        Plan p = env.createProgramPlan();
        // The plan should look like the following one.
        // 
        // DataSet1(3) - MapOperator(3)-+
        // |- Union(-1) - SingleInputOperator - Sink
        // DataSet2(2) - MapOperator(2)-+
        GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
        Union unionOperator = (Union) ((SingleInputOperator) sink.getInput()).getInput();
        // The key mappers should be added to both of the two input streams for union.
        assertTrue(unionOperator.getFirstInput() instanceof MapOperatorBase<?, ?, ?>);
        assertTrue(unionOperator.getSecondInput() instanceof MapOperatorBase<?, ?, ?>);
        // The parallelisms of the key mappers should be equal to those of their inputs.
        assertEquals(unionOperator.getFirstInput().getParallelism(), 3);
        assertEquals(unionOperator.getSecondInput().getParallelism(), 2);
        // The union should always have the default parallelism.
        assertEquals(unionOperator.getParallelism(), ExecutionConfig.PARALLELISM_DEFAULT);
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Test caused an error: " + e.getMessage());
    }
}
Also used : KeySelector(org.apache.flink.api.java.functions.KeySelector) Tuple3(org.apache.flink.api.java.tuple.Tuple3) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) LongValue(org.apache.flink.types.LongValue) GroupReduceFunction(org.apache.flink.api.common.functions.GroupReduceFunction) MapOperatorBase(org.apache.flink.api.common.operators.base.MapOperatorBase) Union(org.apache.flink.api.common.operators.Union) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) SingleInputOperator(org.apache.flink.api.common.operators.SingleInputOperator) DataSet(org.apache.flink.api.java.DataSet) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) StringValue(org.apache.flink.types.StringValue) GenericDataSinkBase(org.apache.flink.api.common.operators.GenericDataSinkBase) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Plan(org.apache.flink.api.common.Plan) Assert.fail(org.junit.Assert.fail) Order(org.apache.flink.api.common.operators.Order) Assert.assertEquals(org.junit.Assert.assertEquals) ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) Plan(org.apache.flink.api.common.Plan) Union(org.apache.flink.api.common.operators.Union) Tuple3(org.apache.flink.api.java.tuple.Tuple3) LongValue(org.apache.flink.types.LongValue) StringValue(org.apache.flink.types.StringValue) Test(org.junit.Test)

Example 28 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class StateBootstrapTransformationTest method testStreamConfig.

@Test
public void testStreamConfig() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStream<String> input = env.fromElements("");
    StateBootstrapTransformation<String> transformation = OperatorTransformation.bootstrapWith(input).keyBy(new CustomKeySelector()).transform(new ExampleKeyedStateBootstrapFunction());
    StreamConfig config = transformation.getConfig(OperatorIDGenerator.fromUid("uid"), new HashMapStateBackend(), new Configuration(), null);
    KeySelector selector = config.getStatePartitioner(0, Thread.currentThread().getContextClassLoader());
    Assert.assertEquals("Incorrect key selector forwarded to stream operator", CustomKeySelector.class, selector.getClass());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMapStateBackend(org.apache.flink.runtime.state.hashmap.HashMapStateBackend) KeySelector(org.apache.flink.api.java.functions.KeySelector) Test(org.junit.Test)

Example 29 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class AbstractQueryableStateTestBase method testValueState.

/**
 * Tests simple value state queryable state instance. Each source emits (subtaskIndex,
 * 0)..(subtaskIndex, numElements) tuples, which are then queried. The tests succeeds after each
 * subtask index is queried with value numElements (the latest element updated the state).
 */
@Test
public void testValueState() throws Exception {
    final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
    final long numElements = 1024L;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStateBackend(stateBackend);
    env.setParallelism(maxParallelism);
    // Very important, because cluster is shared between tests and we
    // don't explicitly check that all slots are available before
    // submitting.
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
    DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
    // Value state
    ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType());
    source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

        private static final long serialVersionUID = 7662520075515707428L;

        @Override
        public Integer getKey(Tuple2<Integer, Long> value) {
            return value.f0;
        }
    }).asQueryableState("hakuna", valueState);
    try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
        final JobID jobId = autoCancellableJob.getJobId();
        final JobGraph jobGraph = autoCancellableJob.getJobGraph();
        clusterClient.submitJob(jobGraph).get();
        executeValueQuery(deadline, client, jobId, "hakuna", valueState, numElements);
    }
}
Also used : Deadline(org.apache.flink.api.common.time.Deadline) KeySelector(org.apache.flink.api.java.functions.KeySelector) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 30 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class AbstractQueryableStateTestBase method testQueryNonStartedJobState.

/**
 * Similar tests as {@link #testValueState()} but before submitting the job, we already issue
 * one request which fails.
 */
@Test
public void testQueryNonStartedJobState() throws Exception {
    final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT);
    final long numElements = 1024L;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStateBackend(stateBackend);
    env.setParallelism(maxParallelism);
    // Very important, because clusterClient is shared between tests and we
    // don't explicitly check that all slots are available before
    // submitting.
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L));
    DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements));
    ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType(), null);
    QueryableStateStream<Integer, Tuple2<Integer, Long>> queryableState = source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() {

        private static final long serialVersionUID = 7480503339992214681L;

        @Override
        public Integer getKey(Tuple2<Integer, Long> value) {
            return value.f0;
        }
    }).asQueryableState("hakuna", valueState);
    try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) {
        final JobID jobId = autoCancellableJob.getJobId();
        final JobGraph jobGraph = autoCancellableJob.getJobGraph();
        long expected = numElements;
        // query once
        client.getKvState(autoCancellableJob.getJobId(), queryableState.getQueryableStateName(), 0, BasicTypeInfo.INT_TYPE_INFO, valueState);
        clusterClient.submitJob(jobGraph).get();
        executeValueQuery(deadline, client, jobId, "hakuna", valueState, expected);
    }
}
Also used : Deadline(org.apache.flink.api.common.time.Deadline) KeySelector(org.apache.flink.api.java.functions.KeySelector) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Tuple2(org.apache.flink.api.java.tuple.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Aggregations

KeySelector (org.apache.flink.api.java.functions.KeySelector)120 Test (org.junit.Test)113 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)45 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)44 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)39 Watermark (org.apache.flink.streaming.api.watermark.Watermark)30 List (java.util.List)29 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)28 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)22 JobID (org.apache.flink.api.common.JobID)22 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)22 IOException (java.io.IOException)21 Arrays (java.util.Arrays)21 AtomicLong (java.util.concurrent.atomic.AtomicLong)21 Configuration (org.apache.flink.configuration.Configuration)21 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)21 ArrayList (java.util.ArrayList)18 Map (java.util.Map)18 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)18 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)16