Search in sources :

Example 26 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class KafkaConsumerTestBase method readSequence.

// ------------------------------------------------------------------------
//  Reading writing test data sets
// ------------------------------------------------------------------------
/**
	 * Runs a job using the provided environment to read a sequence of records from a single Kafka topic.
	 * The method allows to individually specify the expected starting offset and total read value count of each partition.
	 * The job will be considered successful only if all partition read results match the start offset and value count criteria.
	 */
protected void readSequence(final StreamExecutionEnvironment env, final StartupMode startupMode, final Map<KafkaTopicPartition, Long> specificStartupOffsets, final Properties cc, final String topicName, final Map<Integer, Tuple2<Integer, Integer>> partitionsToValuesCountAndStartOffset) throws Exception {
    final int sourceParallelism = partitionsToValuesCountAndStartOffset.keySet().size();
    int finalCountTmp = 0;
    for (Map.Entry<Integer, Tuple2<Integer, Integer>> valuesCountAndStartOffset : partitionsToValuesCountAndStartOffset.entrySet()) {
        finalCountTmp += valuesCountAndStartOffset.getValue().f0;
    }
    final int finalCount = finalCountTmp;
    final TypeInformation<Tuple2<Integer, Integer>> intIntTupleType = TypeInfoParser.parse("Tuple2<Integer, Integer>");
    final TypeInformationSerializationSchema<Tuple2<Integer, Integer>> deser = new TypeInformationSerializationSchema<>(intIntTupleType, env.getConfig());
    // create the consumer
    cc.putAll(secureProps);
    FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> consumer = kafkaServer.getConsumer(topicName, deser, cc);
    switch(startupMode) {
        case EARLIEST:
            consumer.setStartFromEarliest();
            break;
        case LATEST:
            consumer.setStartFromLatest();
            break;
        case SPECIFIC_OFFSETS:
            consumer.setStartFromSpecificOffsets(specificStartupOffsets);
            break;
        case GROUP_OFFSETS:
            consumer.setStartFromGroupOffsets();
            break;
    }
    DataStream<Tuple2<Integer, Integer>> source = env.addSource(consumer).setParallelism(sourceParallelism).map(new ThrottledMapper<Tuple2<Integer, Integer>>(20)).setParallelism(sourceParallelism);
    // verify data
    source.flatMap(new RichFlatMapFunction<Tuple2<Integer, Integer>, Integer>() {

        private HashMap<Integer, BitSet> partitionsToValueCheck;

        private int count = 0;

        @Override
        public void open(Configuration parameters) throws Exception {
            partitionsToValueCheck = new HashMap<>();
            for (Integer partition : partitionsToValuesCountAndStartOffset.keySet()) {
                partitionsToValueCheck.put(partition, new BitSet());
            }
        }

        @Override
        public void flatMap(Tuple2<Integer, Integer> value, Collector<Integer> out) throws Exception {
            int partition = value.f0;
            int val = value.f1;
            BitSet bitSet = partitionsToValueCheck.get(partition);
            if (bitSet == null) {
                throw new RuntimeException("Got a record from an unknown partition");
            } else {
                bitSet.set(val - partitionsToValuesCountAndStartOffset.get(partition).f1);
            }
            count++;
            LOG.info("Received message {}, total {} messages", value, count);
            // verify if we've seen everything
            if (count == finalCount) {
                for (Map.Entry<Integer, BitSet> partitionsToValueCheck : this.partitionsToValueCheck.entrySet()) {
                    BitSet check = partitionsToValueCheck.getValue();
                    int expectedValueCount = partitionsToValuesCountAndStartOffset.get(partitionsToValueCheck.getKey()).f0;
                    if (check.cardinality() != expectedValueCount) {
                        throw new RuntimeException("Expected cardinality to be " + expectedValueCount + ", but was " + check.cardinality());
                    } else if (check.nextClearBit(0) != expectedValueCount) {
                        throw new RuntimeException("Expected next clear bit to be " + expectedValueCount + ", but was " + check.cardinality());
                    }
                }
                // test has passed
                throw new SuccessException();
            }
        }
    }).setParallelism(1);
    tryExecute(env, "Read data from Kafka");
    LOG.info("Successfully read sequence for verification");
}
Also used : Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) BitSet(java.util.BitSet) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) ThrottledMapper(org.apache.flink.streaming.connectors.kafka.testutils.ThrottledMapper) TypeInformationSerializationSchema(org.apache.flink.streaming.util.serialization.TypeInformationSerializationSchema) Tuple2(org.apache.flink.api.java.tuple.Tuple2) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) Collector(org.apache.flink.util.Collector) SuccessException(org.apache.flink.test.util.SuccessException) Map(java.util.Map) HashMap(java.util.HashMap)

Example 27 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class KafkaConsumerTestBase method runProduceConsumeMultipleTopics.

/**
	 * Test producing and consuming into multiple topics
	 * @throws java.lang.Exception
	 */
public void runProduceConsumeMultipleTopics() throws java.lang.Exception {
    final int NUM_TOPICS = 5;
    final int NUM_ELEMENTS = 20;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.getConfig().disableSysoutLogging();
    // create topics with content
    final List<String> topics = new ArrayList<>();
    for (int i = 0; i < NUM_TOPICS; i++) {
        final String topic = "topic-" + i;
        topics.add(topic);
        // create topic
        createTestTopic(topic, i + 1, /*partitions*/
        1);
    }
    // run first job, producing into all topics
    DataStream<Tuple3<Integer, Integer, String>> stream = env.addSource(new RichParallelSourceFunction<Tuple3<Integer, Integer, String>>() {

        @Override
        public void run(SourceContext<Tuple3<Integer, Integer, String>> ctx) throws Exception {
            int partition = getRuntimeContext().getIndexOfThisSubtask();
            for (int topicId = 0; topicId < NUM_TOPICS; topicId++) {
                for (int i = 0; i < NUM_ELEMENTS; i++) {
                    ctx.collect(new Tuple3<>(partition, i, "topic-" + topicId));
                }
            }
        }

        @Override
        public void cancel() {
        }
    });
    Tuple2WithTopicSchema schema = new Tuple2WithTopicSchema(env.getConfig());
    Properties props = new Properties();
    props.putAll(standardProps);
    props.putAll(secureProps);
    kafkaServer.produceIntoKafka(stream, "dummy", schema, props, null);
    env.execute("Write to topics");
    // run second job consuming from multiple topics
    env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
    env.getConfig().disableSysoutLogging();
    stream = env.addSource(kafkaServer.getConsumer(topics, schema, props));
    stream.flatMap(new FlatMapFunction<Tuple3<Integer, Integer, String>, Integer>() {

        Map<String, Integer> countPerTopic = new HashMap<>(NUM_TOPICS);

        @Override
        public void flatMap(Tuple3<Integer, Integer, String> value, Collector<Integer> out) throws Exception {
            Integer count = countPerTopic.get(value.f2);
            if (count == null) {
                count = 1;
            } else {
                count++;
            }
            countPerTopic.put(value.f2, count);
            // check map:
            for (Map.Entry<String, Integer> el : countPerTopic.entrySet()) {
                if (el.getValue() < NUM_ELEMENTS) {
                    // not enough yet
                    break;
                }
                if (el.getValue() > NUM_ELEMENTS) {
                    throw new RuntimeException("There is a failure in the test. I've read " + el.getValue() + " from topic " + el.getKey());
                }
            }
            // we've seen messages from all topics
            throw new SuccessException();
        }
    }).setParallelism(1);
    tryExecute(env, "Count elements from the topics");
    // delete all topics again
    for (int i = 0; i < NUM_TOPICS; i++) {
        final String topic = "topic-" + i;
        deleteTestTopic(topic);
    }
}
Also used : ArrayList(java.util.ArrayList) Properties(java.util.Properties) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) RetryOnException(org.apache.flink.testutils.junit.RetryOnException) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) SuccessException(org.apache.flink.test.util.SuccessException) NoResourceAvailableException(org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException) JobExecutionException(org.apache.flink.runtime.client.JobExecutionException) TimeoutException(org.apache.kafka.common.errors.TimeoutException) JobCancellationException(org.apache.flink.runtime.client.JobCancellationException) IOException(java.io.IOException) Tuple3(org.apache.flink.api.java.tuple.Tuple3) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) Collector(org.apache.flink.util.Collector) SuccessException(org.apache.flink.test.util.SuccessException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Map(java.util.Map) HashMap(java.util.HashMap)

Example 28 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class InnerJoinOperatorBaseTest method testJoinRich.

@Test
public void testJoinRich() {
    final AtomicBoolean opened = new AtomicBoolean(false);
    final AtomicBoolean closed = new AtomicBoolean(false);
    final String taskName = "Test rich join function";
    final RichFlatJoinFunction<String, String, Integer> joiner = new RichFlatJoinFunction<String, String, Integer>() {

        @Override
        public void open(Configuration parameters) throws Exception {
            opened.compareAndSet(false, true);
            assertEquals(0, getRuntimeContext().getIndexOfThisSubtask());
            assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
        }

        @Override
        public void close() throws Exception {
            closed.compareAndSet(false, true);
        }

        @Override
        public void join(String first, String second, Collector<Integer> out) throws Exception {
            out.collect(first.length());
            out.collect(second.length());
        }
    };
    InnerJoinOperatorBase<String, String, Integer, RichFlatJoinFunction<String, String, Integer>> base = new InnerJoinOperatorBase<String, String, Integer, RichFlatJoinFunction<String, String, Integer>>(joiner, new BinaryOperatorInformation<String, String, Integer>(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), new int[0], new int[0], taskName);
    final List<String> inputData1 = new ArrayList<String>(Arrays.asList("foo", "bar", "foobar"));
    final List<String> inputData2 = new ArrayList<String>(Arrays.asList("foobar", "foo"));
    final List<Integer> expected = new ArrayList<Integer>(Arrays.asList(3, 3, 6, 6));
    try {
        final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
        final HashMap<String, Accumulator<?, ?>> accumulatorMap = new HashMap<String, Accumulator<?, ?>>();
        final HashMap<String, Future<Path>> cpTasks = new HashMap<>();
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Integer> resultSafe = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, cpTasks, accumulatorMap, new UnregisteredMetricsGroup()), executionConfig);
        executionConfig.enableObjectReuse();
        List<Integer> resultRegular = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, cpTasks, accumulatorMap, new UnregisteredMetricsGroup()), executionConfig);
        assertEquals(expected, resultSafe);
        assertEquals(expected, resultRegular);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertTrue(opened.get());
    assertTrue(closed.get());
}
Also used : Accumulator(org.apache.flink.api.common.accumulators.Accumulator) RichFlatJoinFunction(org.apache.flink.api.common.functions.RichFlatJoinFunction) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TaskInfo(org.apache.flink.api.common.TaskInfo) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Collector(org.apache.flink.util.Collector) RuntimeUDFContext(org.apache.flink.api.common.functions.util.RuntimeUDFContext) Future(java.util.concurrent.Future) Test(org.junit.Test)

Example 29 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class InnerJoinOperatorBaseTest method testJoinPlain.

@Test
public void testJoinPlain() {
    final FlatJoinFunction<String, String, Integer> joiner = new FlatJoinFunction<String, String, Integer>() {

        @Override
        public void join(String first, String second, Collector<Integer> out) throws Exception {
            out.collect(first.length());
            out.collect(second.length());
        }
    };
    @SuppressWarnings({ "rawtypes", "unchecked" }) InnerJoinOperatorBase<String, String, Integer, FlatJoinFunction<String, String, Integer>> base = new InnerJoinOperatorBase(joiner, new BinaryOperatorInformation(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), new int[0], new int[0], "TestJoiner");
    List<String> inputData1 = new ArrayList<String>(Arrays.asList("foo", "bar", "foobar"));
    List<String> inputData2 = new ArrayList<String>(Arrays.asList("foobar", "foo"));
    List<Integer> expected = new ArrayList<Integer>(Arrays.asList(3, 3, 6, 6));
    try {
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Integer> resultSafe = base.executeOnCollections(inputData1, inputData2, null, executionConfig);
        executionConfig.enableObjectReuse();
        List<Integer> resultRegular = base.executeOnCollections(inputData1, inputData2, null, executionConfig);
        assertEquals(expected, resultSafe);
        assertEquals(expected, resultRegular);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ArrayList(java.util.ArrayList) FlatJoinFunction(org.apache.flink.api.common.functions.FlatJoinFunction) RichFlatJoinFunction(org.apache.flink.api.common.functions.RichFlatJoinFunction) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) Collector(org.apache.flink.util.Collector) BinaryOperatorInformation(org.apache.flink.api.common.operators.BinaryOperatorInformation) Test(org.junit.Test)

Example 30 with Collector

use of org.apache.flink.util.Collector in project flink by apache.

the class PartitionMapOperatorTest method testMapPartitionWithRuntimeContext.

@Test
public void testMapPartitionWithRuntimeContext() {
    try {
        final String taskName = "Test Task";
        final AtomicBoolean opened = new AtomicBoolean();
        final AtomicBoolean closed = new AtomicBoolean();
        final MapPartitionFunction<String, Integer> parser = new RichMapPartitionFunction<String, Integer>() {

            @Override
            public void open(Configuration parameters) throws Exception {
                opened.set(true);
                RuntimeContext ctx = getRuntimeContext();
                assertEquals(0, ctx.getIndexOfThisSubtask());
                assertEquals(1, ctx.getNumberOfParallelSubtasks());
                assertEquals(taskName, ctx.getTaskName());
            }

            @Override
            public void mapPartition(Iterable<String> values, Collector<Integer> out) {
                for (String s : values) {
                    out.collect(Integer.parseInt(s));
                }
            }

            @Override
            public void close() throws Exception {
                closed.set(true);
            }
        };
        MapPartitionOperatorBase<String, Integer, MapPartitionFunction<String, Integer>> op = new MapPartitionOperatorBase<String, Integer, MapPartitionFunction<String, Integer>>(parser, new UnaryOperatorInformation<String, Integer>(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), taskName);
        List<String> input = new ArrayList<String>(asList("1", "2", "3", "4", "5", "6"));
        final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
        ExecutionConfig executionConfig = new ExecutionConfig();
        executionConfig.disableObjectReuse();
        List<Integer> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()), executionConfig);
        executionConfig.enableObjectReuse();
        List<Integer> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()), executionConfig);
        assertEquals(asList(1, 2, 3, 4, 5, 6), resultMutableSafe);
        assertEquals(asList(1, 2, 3, 4, 5, 6), resultRegular);
        assertTrue(opened.get());
        assertTrue(closed.get());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Path(org.apache.flink.core.fs.Path) UnregisteredMetricsGroup(org.apache.flink.metrics.groups.UnregisteredMetricsGroup) Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) RichMapPartitionFunction(org.apache.flink.api.common.functions.RichMapPartitionFunction) ArrayList(java.util.ArrayList) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) TaskInfo(org.apache.flink.api.common.TaskInfo) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RichMapPartitionFunction(org.apache.flink.api.common.functions.RichMapPartitionFunction) MapPartitionFunction(org.apache.flink.api.common.functions.MapPartitionFunction) Collector(org.apache.flink.util.Collector) RuntimeUDFContext(org.apache.flink.api.common.functions.util.RuntimeUDFContext) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) Test(org.junit.Test)

Aggregations

Collector (org.apache.flink.util.Collector)50 Test (org.junit.Test)38 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)20 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)16 Configuration (org.apache.flink.configuration.Configuration)16 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)15 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)14 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)14 TimeWindow (org.apache.flink.streaming.api.windowing.windows.TimeWindow)11 ArrayList (java.util.ArrayList)8 HashMap (java.util.HashMap)8 FlatMapFunction (org.apache.flink.api.common.functions.FlatMapFunction)8 Plan (org.apache.flink.api.common.Plan)7 HashSet (java.util.HashSet)6 RichGroupReduceFunction (org.apache.flink.api.common.functions.RichGroupReduceFunction)6 Tuple3 (org.apache.flink.api.java.tuple.Tuple3)6 GroupReduceFunction (org.apache.flink.api.common.functions.GroupReduceFunction)5 Map (java.util.Map)4 TaskInfo (org.apache.flink.api.common.TaskInfo)4 CoGroupFunction (org.apache.flink.api.common.functions.CoGroupFunction)4