Search in sources :

Example 76 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class CEPMigrationTest method writeSinglePatternAfterMigrationSnapshot.

/**
 * Manually run this to write binary snapshot data.
 */
@Ignore
@Test
public void writeSinglePatternAfterMigrationSnapshot() throws Exception {
    KeySelector<Event, Integer> keySelector = new KeySelector<Event, Integer>() {

        private static final long serialVersionUID = -4873366487571254798L;

        @Override
        public Integer getKey(Event value) throws Exception {
            return value.getId();
        }
    };
    final Event startEvent1 = new Event(42, "start", 1.0);
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness = new KeyedOneInputStreamOperatorTestHarness<>(CepOperatorTestUtilities.getKeyedCepOperator(false, new SinglePatternNFAFactory()), keySelector, BasicTypeInfo.INT_TYPE_INFO);
    try {
        harness.setup();
        harness.open();
        harness.processWatermark(new Watermark(5));
        // do snapshot and save to file
        OperatorSubtaskState snapshot = harness.snapshot(0L, 0L);
        OperatorSnapshotUtil.writeStateHandle(snapshot, "src/test/resources/cep-migration-single-pattern-afterwards-flink" + flinkGenerateSavepointVersion + "-snapshot");
    } finally {
        harness.close();
    }
}
Also used : Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) KeySelector(org.apache.flink.api.java.functions.KeySelector) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 77 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class CEPRescalingTest method testCEPFunctionScalingUp.

@Test
public void testCEPFunctionScalingUp() throws Exception {
    int maxParallelism = 10;
    KeySelector<Event, Integer> keySelector = new KeySelector<Event, Integer>() {

        private static final long serialVersionUID = -4873366487571254798L;

        @Override
        public Integer getKey(Event value) throws Exception {
            return value.getId();
        }
    };
    // valid pattern events belong to different keygroups
    // that will be shipped to different tasks when changing parallelism.
    Event startEvent1 = new Event(7, "start", 1.0);
    SubEvent middleEvent1 = new SubEvent(7, "foo", 1.0, 10.0);
    Event endEvent1 = new Event(7, "end", 1.0);
    int keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent1), maxParallelism);
    assertEquals(1, keygroup);
    assertEquals(0, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // this will go to task index 2
    Event startEvent2 = new Event(10, "start", 1.0);
    SubEvent middleEvent2 = new SubEvent(10, "foo", 1.0, 10.0);
    Event endEvent2 = new Event(10, "end", 1.0);
    keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent2), maxParallelism);
    assertEquals(9, keygroup);
    assertEquals(1, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // now we start the test, we go from parallelism 1 to 2.
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness = null;
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness1 = null;
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness2 = null;
    try {
        harness = getTestHarness(maxParallelism, 1, 0);
        harness.open();
        // valid element
        harness.processElement(new StreamRecord<>(startEvent1, 1));
        harness.processElement(new StreamRecord<>(new Event(7, "foobar", 1.0), 2));
        // valid element
        harness.processElement(new StreamRecord<>(startEvent2, 3));
        // valid element
        harness.processElement(new StreamRecord<Event>(middleEvent2, 4));
        // take a snapshot with some elements in internal sorting queue
        OperatorSubtaskState snapshot = harness.snapshot(0, 0);
        harness.close();
        // initialize two sub-tasks with the previously snapshotted state to simulate scaling up
        // we know that the valid element will go to index 0,
        // so we initialize the two tasks and we put the rest of
        // the valid elements for the pattern on task 0.
        OperatorSubtaskState initState1 = AbstractStreamOperatorTestHarness.repartitionOperatorState(snapshot, maxParallelism, 1, 2, 0);
        OperatorSubtaskState initState2 = AbstractStreamOperatorTestHarness.repartitionOperatorState(snapshot, maxParallelism, 1, 2, 1);
        harness1 = getTestHarness(maxParallelism, 2, 0);
        harness1.setup();
        harness1.initializeState(initState1);
        harness1.open();
        // if element timestamps are not correctly checkpointed/restored this will lead to
        // a pruning time underflow exception in NFA
        harness1.processWatermark(new Watermark(2));
        // valid element
        harness1.processElement(new StreamRecord<Event>(middleEvent1, 3));
        // valid element
        harness1.processElement(new StreamRecord<>(endEvent1, 5));
        harness1.processWatermark(new Watermark(Long.MAX_VALUE));
        // watermarks and the result
        assertEquals(3, harness1.getOutput().size());
        verifyWatermark(harness1.getOutput().poll(), 2);
        verifyPattern(harness1.getOutput().poll(), startEvent1, middleEvent1, endEvent1);
        harness2 = getTestHarness(maxParallelism, 2, 1);
        harness2.setup();
        harness2.initializeState(initState2);
        harness2.open();
        // now we move to the second parallel task
        harness2.processWatermark(new Watermark(2));
        harness2.processElement(new StreamRecord<>(endEvent2, 5));
        harness2.processElement(new StreamRecord<>(new Event(42, "start", 1.0), 4));
        harness2.processWatermark(new Watermark(Long.MAX_VALUE));
        assertEquals(3, harness2.getOutput().size());
        verifyWatermark(harness2.getOutput().poll(), 2);
        verifyPattern(harness2.getOutput().poll(), startEvent2, middleEvent2, endEvent2);
    } finally {
        closeSilently(harness);
        closeSilently(harness1);
        closeSilently(harness2);
    }
}
Also used : SubEvent(org.apache.flink.cep.SubEvent) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) KeySelector(org.apache.flink.api.java.functions.KeySelector) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.Test)

Example 78 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class CEPRescalingTest method testCEPFunctionScalingDown.

@Test
public void testCEPFunctionScalingDown() throws Exception {
    int maxParallelism = 10;
    KeySelector<Event, Integer> keySelector = new KeySelector<Event, Integer>() {

        private static final long serialVersionUID = -4873366487571254798L;

        @Override
        public Integer getKey(Event value) throws Exception {
            return value.getId();
        }
    };
    // create some valid pattern events on predetermined key groups and task indices
    // this will go to task index 0
    Event startEvent1 = new Event(7, "start", 1.0);
    SubEvent middleEvent1 = new SubEvent(7, "foo", 1.0, 10.0);
    Event endEvent1 = new Event(7, "end", 1.0);
    // verification of the key choice
    int keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent1), maxParallelism);
    assertEquals(1, keygroup);
    assertEquals(0, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 3, keygroup));
    assertEquals(0, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // this will go to task index 1
    Event startEvent2 = new Event(45, "start", 1.0);
    SubEvent middleEvent2 = new SubEvent(45, "foo", 1.0, 10.0);
    Event endEvent2 = new Event(45, "end", 1.0);
    keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent2), maxParallelism);
    assertEquals(6, keygroup);
    assertEquals(1, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 3, keygroup));
    assertEquals(1, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // this will go to task index 0
    Event startEvent3 = new Event(90, "start", 1.0);
    SubEvent middleEvent3 = new SubEvent(90, "foo", 1.0, 10.0);
    Event endEvent3 = new Event(90, "end", 1.0);
    keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent3), maxParallelism);
    assertEquals(2, keygroup);
    assertEquals(0, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 3, keygroup));
    assertEquals(0, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // this will go to task index 2
    Event startEvent4 = new Event(10, "start", 1.0);
    SubEvent middleEvent4 = new SubEvent(10, "foo", 1.0, 10.0);
    Event endEvent4 = new Event(10, "end", 1.0);
    keygroup = KeyGroupRangeAssignment.assignToKeyGroup(keySelector.getKey(startEvent4), maxParallelism);
    assertEquals(9, keygroup);
    assertEquals(2, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 3, keygroup));
    assertEquals(1, KeyGroupRangeAssignment.computeOperatorIndexForKeyGroup(maxParallelism, 2, keygroup));
    // starting the test, we will go from parallelism of 3 to parallelism of 2
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness1 = getTestHarness(maxParallelism, 3, 0);
    harness1.open();
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness2 = getTestHarness(maxParallelism, 3, 1);
    harness2.open();
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness3 = getTestHarness(maxParallelism, 3, 2);
    harness3.open();
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness4 = null;
    OneInputStreamOperatorTestHarness<Event, Map<String, List<Event>>> harness5 = null;
    try {
        harness1.processWatermark(Long.MIN_VALUE);
        harness2.processWatermark(Long.MIN_VALUE);
        harness3.processWatermark(Long.MIN_VALUE);
        // valid element
        harness1.processElement(new StreamRecord<>(startEvent1, 1));
        harness1.processElement(new StreamRecord<>(new Event(7, "foobar", 1.0), 2));
        // valid element
        harness1.processElement(new StreamRecord<Event>(middleEvent1, 3));
        // valid element
        harness1.processElement(new StreamRecord<>(endEvent1, 5));
        // till here we have a valid sequence, so after creating the
        // new instance and sending it a watermark, we expect it to fire,
        // even with no new elements.
        harness1.processElement(new StreamRecord<>(startEvent3, 10));
        harness1.processElement(new StreamRecord<>(startEvent1, 10));
        harness2.processElement(new StreamRecord<>(startEvent2, 7));
        harness2.processElement(new StreamRecord<Event>(middleEvent2, 8));
        harness3.processElement(new StreamRecord<>(startEvent4, 15));
        harness3.processElement(new StreamRecord<Event>(middleEvent4, 16));
        harness3.processElement(new StreamRecord<>(endEvent4, 17));
        // so far we only have the initial watermark
        assertEquals(1, harness1.getOutput().size());
        verifyWatermark(harness1.getOutput().poll(), Long.MIN_VALUE);
        assertEquals(1, harness2.getOutput().size());
        verifyWatermark(harness2.getOutput().poll(), Long.MIN_VALUE);
        assertEquals(1, harness3.getOutput().size());
        verifyWatermark(harness3.getOutput().poll(), Long.MIN_VALUE);
        // we take a snapshot and make it look as a single operator
        // this will be the initial state of all downstream tasks.
        OperatorSubtaskState snapshot = AbstractStreamOperatorTestHarness.repackageState(harness2.snapshot(0, 0), harness1.snapshot(0, 0), harness3.snapshot(0, 0));
        OperatorSubtaskState initState1 = AbstractStreamOperatorTestHarness.repartitionOperatorState(snapshot, maxParallelism, 3, 2, 0);
        OperatorSubtaskState initState2 = AbstractStreamOperatorTestHarness.repartitionOperatorState(snapshot, maxParallelism, 3, 2, 1);
        harness4 = getTestHarness(maxParallelism, 2, 0);
        harness4.setup();
        harness4.initializeState(initState1);
        harness4.open();
        harness5 = getTestHarness(maxParallelism, 2, 1);
        harness5.setup();
        harness5.initializeState(initState2);
        harness5.open();
        harness5.processElement(new StreamRecord<>(endEvent2, 11));
        harness5.processWatermark(new Watermark(12));
        verifyPattern(harness5.getOutput().poll(), startEvent2, middleEvent2, endEvent2);
        verifyWatermark(harness5.getOutput().poll(), 12);
        // if element timestamps are not correctly checkpointed/restored this will lead to
        // a pruning time underflow exception in NFA
        harness4.processWatermark(new Watermark(12));
        assertEquals(2, harness4.getOutput().size());
        verifyPattern(harness4.getOutput().poll(), startEvent1, middleEvent1, endEvent1);
        verifyWatermark(harness4.getOutput().poll(), 12);
        // valid element
        harness4.processElement(new StreamRecord<Event>(middleEvent3, 15));
        // valid element
        harness4.processElement(new StreamRecord<>(endEvent3, 16));
        // valid element
        harness4.processElement(new StreamRecord<Event>(middleEvent1, 15));
        // valid element
        harness4.processElement(new StreamRecord<>(endEvent1, 16));
        harness4.processWatermark(new Watermark(Long.MAX_VALUE));
        harness5.processWatermark(new Watermark(Long.MAX_VALUE));
        // verify result
        assertEquals(3, harness4.getOutput().size());
        // check the order of the events in the output
        Queue<Object> output = harness4.getOutput();
        StreamRecord<?> resultRecord = (StreamRecord<?>) output.peek();
        assertTrue(resultRecord.getValue() instanceof Map);
        @SuppressWarnings("unchecked") Map<String, List<Event>> patternMap = (Map<String, List<Event>>) resultRecord.getValue();
        if (patternMap.get("start").get(0).getId() == 7) {
            verifyPattern(harness4.getOutput().poll(), startEvent1, middleEvent1, endEvent1);
            verifyPattern(harness4.getOutput().poll(), startEvent3, middleEvent3, endEvent3);
        } else {
            verifyPattern(harness4.getOutput().poll(), startEvent3, middleEvent3, endEvent3);
            verifyPattern(harness4.getOutput().poll(), startEvent1, middleEvent1, endEvent1);
        }
        // after scaling down this should end up here
        assertEquals(2, harness5.getOutput().size());
        verifyPattern(harness5.getOutput().poll(), startEvent4, middleEvent4, endEvent4);
    } finally {
        closeSilently(harness1);
        closeSilently(harness2);
        closeSilently(harness3);
        closeSilently(harness4);
        closeSilently(harness5);
    }
}
Also used : SubEvent(org.apache.flink.cep.SubEvent) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) KeySelector(org.apache.flink.api.java.functions.KeySelector) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Event(org.apache.flink.cep.Event) SubEvent(org.apache.flink.cep.SubEvent) List(java.util.List) Map(java.util.Map) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test)

Example 79 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class BootstrapTransformationTest method testStreamConfig.

@Test
public void testStreamConfig() {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSource<String> input = env.fromElements("");
    BootstrapTransformation<String> transformation = OperatorTransformation.bootstrapWith(input).keyBy(new CustomKeySelector()).transform(new ExampleKeyedStateBootstrapFunction());
    StreamConfig config = transformation.getConfig(OperatorIDGenerator.fromUid("uid"), new MemoryStateBackend(), new Configuration(), null);
    KeySelector selector = config.getStatePartitioner(0, Thread.currentThread().getContextClassLoader());
    Assert.assertEquals("Incorrect key selector forwarded to stream operator", CustomKeySelector.class, selector.getClass());
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) MemoryStateBackend(org.apache.flink.runtime.state.memory.MemoryStateBackend) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) KeySelector(org.apache.flink.api.java.functions.KeySelector) Test(org.junit.Test)

Example 80 with KeySelector

use of org.apache.flink.api.java.functions.KeySelector in project flink by apache.

the class ReduceCompilationTest method testGroupedReduceWithHint.

@Test
public void testGroupedReduceWithHint() {
    try {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(8);
        DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
        data.groupBy(new KeySelector<Tuple2<String, Double>, String>() {

            public String getKey(Tuple2<String, Double> value) {
                return value.f0;
            }
        }).reduce(new RichReduceFunction<Tuple2<String, Double>>() {

            @Override
            public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
                return null;
            }
        }).setCombineHint(CombineHint.HASH).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
        Plan p = env.createProgramPlan();
        OptimizedPlan op = compileNoStats(p);
        OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
        // get the original nodes
        SourcePlanNode sourceNode = resolver.getNode("source");
        SingleInputPlanNode reduceNode = resolver.getNode("reducer");
        SinkPlanNode sinkNode = resolver.getNode("sink");
        // get the combiner
        SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
        // get the key extractors and projectors
        SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
        SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
        // check wiring
        assertEquals(sourceNode, keyExtractor.getInput().getSource());
        assertEquals(keyProjector, sinkNode.getInput().getSource());
        // check the strategies
        assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
        assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
        // check the keys
        assertEquals(new FieldList(0), reduceNode.getKeys(0));
        assertEquals(new FieldList(0), combineNode.getKeys(0));
        assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
        // check parallelism
        assertEquals(6, sourceNode.getParallelism());
        assertEquals(6, keyExtractor.getParallelism());
        assertEquals(6, combineNode.getParallelism());
        assertEquals(8, reduceNode.getParallelism());
        assertEquals(8, keyProjector.getParallelism());
        assertEquals(8, sinkNode.getParallelism());
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) KeySelector(org.apache.flink.api.java.functions.KeySelector) Plan(org.apache.flink.api.common.Plan) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) DiscardingOutputFormat(org.apache.flink.api.java.io.DiscardingOutputFormat) OptimizedPlan(org.apache.flink.optimizer.plan.OptimizedPlan) FieldList(org.apache.flink.api.common.operators.util.FieldList) SingleInputPlanNode(org.apache.flink.optimizer.plan.SingleInputPlanNode) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SourcePlanNode(org.apache.flink.optimizer.plan.SourcePlanNode) SinkPlanNode(org.apache.flink.optimizer.plan.SinkPlanNode) Test(org.junit.Test)

Aggregations

KeySelector (org.apache.flink.api.java.functions.KeySelector)120 Test (org.junit.Test)113 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)45 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)44 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)39 Watermark (org.apache.flink.streaming.api.watermark.Watermark)30 List (java.util.List)29 StreamRecord (org.apache.flink.streaming.runtime.streamrecord.StreamRecord)28 InvalidProgramException (org.apache.flink.api.common.InvalidProgramException)22 JobID (org.apache.flink.api.common.JobID)22 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)22 IOException (java.io.IOException)21 Arrays (java.util.Arrays)21 AtomicLong (java.util.concurrent.atomic.AtomicLong)21 Configuration (org.apache.flink.configuration.Configuration)21 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)21 ArrayList (java.util.ArrayList)18 Map (java.util.Map)18 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)18 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)16