use of org.apache.flink.util.Collector in project flink by apache.
the class KafkaConsumerTestBase method readSequence.
// ------------------------------------------------------------------------
// Reading writing test data sets
// ------------------------------------------------------------------------
/**
* Runs a job using the provided environment to read a sequence of records from a single Kafka topic.
* The method allows to individually specify the expected starting offset and total read value count of each partition.
* The job will be considered successful only if all partition read results match the start offset and value count criteria.
*/
protected void readSequence(final StreamExecutionEnvironment env, final StartupMode startupMode, final Map<KafkaTopicPartition, Long> specificStartupOffsets, final Properties cc, final String topicName, final Map<Integer, Tuple2<Integer, Integer>> partitionsToValuesCountAndStartOffset) throws Exception {
final int sourceParallelism = partitionsToValuesCountAndStartOffset.keySet().size();
int finalCountTmp = 0;
for (Map.Entry<Integer, Tuple2<Integer, Integer>> valuesCountAndStartOffset : partitionsToValuesCountAndStartOffset.entrySet()) {
finalCountTmp += valuesCountAndStartOffset.getValue().f0;
}
final int finalCount = finalCountTmp;
final TypeInformation<Tuple2<Integer, Integer>> intIntTupleType = TypeInfoParser.parse("Tuple2<Integer, Integer>");
final TypeInformationSerializationSchema<Tuple2<Integer, Integer>> deser = new TypeInformationSerializationSchema<>(intIntTupleType, env.getConfig());
// create the consumer
cc.putAll(secureProps);
FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> consumer = kafkaServer.getConsumer(topicName, deser, cc);
switch(startupMode) {
case EARLIEST:
consumer.setStartFromEarliest();
break;
case LATEST:
consumer.setStartFromLatest();
break;
case SPECIFIC_OFFSETS:
consumer.setStartFromSpecificOffsets(specificStartupOffsets);
break;
case GROUP_OFFSETS:
consumer.setStartFromGroupOffsets();
break;
}
DataStream<Tuple2<Integer, Integer>> source = env.addSource(consumer).setParallelism(sourceParallelism).map(new ThrottledMapper<Tuple2<Integer, Integer>>(20)).setParallelism(sourceParallelism);
// verify data
source.flatMap(new RichFlatMapFunction<Tuple2<Integer, Integer>, Integer>() {
private HashMap<Integer, BitSet> partitionsToValueCheck;
private int count = 0;
@Override
public void open(Configuration parameters) throws Exception {
partitionsToValueCheck = new HashMap<>();
for (Integer partition : partitionsToValuesCountAndStartOffset.keySet()) {
partitionsToValueCheck.put(partition, new BitSet());
}
}
@Override
public void flatMap(Tuple2<Integer, Integer> value, Collector<Integer> out) throws Exception {
int partition = value.f0;
int val = value.f1;
BitSet bitSet = partitionsToValueCheck.get(partition);
if (bitSet == null) {
throw new RuntimeException("Got a record from an unknown partition");
} else {
bitSet.set(val - partitionsToValuesCountAndStartOffset.get(partition).f1);
}
count++;
LOG.info("Received message {}, total {} messages", value, count);
// verify if we've seen everything
if (count == finalCount) {
for (Map.Entry<Integer, BitSet> partitionsToValueCheck : this.partitionsToValueCheck.entrySet()) {
BitSet check = partitionsToValueCheck.getValue();
int expectedValueCount = partitionsToValuesCountAndStartOffset.get(partitionsToValueCheck.getKey()).f0;
if (check.cardinality() != expectedValueCount) {
throw new RuntimeException("Expected cardinality to be " + expectedValueCount + ", but was " + check.cardinality());
} else if (check.nextClearBit(0) != expectedValueCount) {
throw new RuntimeException("Expected next clear bit to be " + expectedValueCount + ", but was " + check.cardinality());
}
}
// test has passed
throw new SuccessException();
}
}
}).setParallelism(1);
tryExecute(env, "Read data from Kafka");
LOG.info("Successfully read sequence for verification");
}
use of org.apache.flink.util.Collector in project flink by apache.
the class KafkaConsumerTestBase method runProduceConsumeMultipleTopics.
/**
* Test producing and consuming into multiple topics
* @throws java.lang.Exception
*/
public void runProduceConsumeMultipleTopics() throws java.lang.Exception {
final int NUM_TOPICS = 5;
final int NUM_ELEMENTS = 20;
StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.getConfig().disableSysoutLogging();
// create topics with content
final List<String> topics = new ArrayList<>();
for (int i = 0; i < NUM_TOPICS; i++) {
final String topic = "topic-" + i;
topics.add(topic);
// create topic
createTestTopic(topic, i + 1, /*partitions*/
1);
}
// run first job, producing into all topics
DataStream<Tuple3<Integer, Integer, String>> stream = env.addSource(new RichParallelSourceFunction<Tuple3<Integer, Integer, String>>() {
@Override
public void run(SourceContext<Tuple3<Integer, Integer, String>> ctx) throws Exception {
int partition = getRuntimeContext().getIndexOfThisSubtask();
for (int topicId = 0; topicId < NUM_TOPICS; topicId++) {
for (int i = 0; i < NUM_ELEMENTS; i++) {
ctx.collect(new Tuple3<>(partition, i, "topic-" + topicId));
}
}
}
@Override
public void cancel() {
}
});
Tuple2WithTopicSchema schema = new Tuple2WithTopicSchema(env.getConfig());
Properties props = new Properties();
props.putAll(standardProps);
props.putAll(secureProps);
kafkaServer.produceIntoKafka(stream, "dummy", schema, props, null);
env.execute("Write to topics");
// run second job consuming from multiple topics
env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", flinkPort);
env.getConfig().disableSysoutLogging();
stream = env.addSource(kafkaServer.getConsumer(topics, schema, props));
stream.flatMap(new FlatMapFunction<Tuple3<Integer, Integer, String>, Integer>() {
Map<String, Integer> countPerTopic = new HashMap<>(NUM_TOPICS);
@Override
public void flatMap(Tuple3<Integer, Integer, String> value, Collector<Integer> out) throws Exception {
Integer count = countPerTopic.get(value.f2);
if (count == null) {
count = 1;
} else {
count++;
}
countPerTopic.put(value.f2, count);
// check map:
for (Map.Entry<String, Integer> el : countPerTopic.entrySet()) {
if (el.getValue() < NUM_ELEMENTS) {
// not enough yet
break;
}
if (el.getValue() > NUM_ELEMENTS) {
throw new RuntimeException("There is a failure in the test. I've read " + el.getValue() + " from topic " + el.getKey());
}
}
// we've seen messages from all topics
throw new SuccessException();
}
}).setParallelism(1);
tryExecute(env, "Count elements from the topics");
// delete all topics again
for (int i = 0; i < NUM_TOPICS; i++) {
final String topic = "topic-" + i;
deleteTestTopic(topic);
}
}
use of org.apache.flink.util.Collector in project flink by apache.
the class InnerJoinOperatorBaseTest method testJoinRich.
@Test
public void testJoinRich() {
final AtomicBoolean opened = new AtomicBoolean(false);
final AtomicBoolean closed = new AtomicBoolean(false);
final String taskName = "Test rich join function";
final RichFlatJoinFunction<String, String, Integer> joiner = new RichFlatJoinFunction<String, String, Integer>() {
@Override
public void open(Configuration parameters) throws Exception {
opened.compareAndSet(false, true);
assertEquals(0, getRuntimeContext().getIndexOfThisSubtask());
assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks());
}
@Override
public void close() throws Exception {
closed.compareAndSet(false, true);
}
@Override
public void join(String first, String second, Collector<Integer> out) throws Exception {
out.collect(first.length());
out.collect(second.length());
}
};
InnerJoinOperatorBase<String, String, Integer, RichFlatJoinFunction<String, String, Integer>> base = new InnerJoinOperatorBase<String, String, Integer, RichFlatJoinFunction<String, String, Integer>>(joiner, new BinaryOperatorInformation<String, String, Integer>(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), new int[0], new int[0], taskName);
final List<String> inputData1 = new ArrayList<String>(Arrays.asList("foo", "bar", "foobar"));
final List<String> inputData2 = new ArrayList<String>(Arrays.asList("foobar", "foo"));
final List<Integer> expected = new ArrayList<Integer>(Arrays.asList(3, 3, 6, 6));
try {
final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
final HashMap<String, Accumulator<?, ?>> accumulatorMap = new HashMap<String, Accumulator<?, ?>>();
final HashMap<String, Future<Path>> cpTasks = new HashMap<>();
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<Integer> resultSafe = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, cpTasks, accumulatorMap, new UnregisteredMetricsGroup()), executionConfig);
executionConfig.enableObjectReuse();
List<Integer> resultRegular = base.executeOnCollections(inputData1, inputData2, new RuntimeUDFContext(taskInfo, null, executionConfig, cpTasks, accumulatorMap, new UnregisteredMetricsGroup()), executionConfig);
assertEquals(expected, resultSafe);
assertEquals(expected, resultRegular);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(opened.get());
assertTrue(closed.get());
}
use of org.apache.flink.util.Collector in project flink by apache.
the class InnerJoinOperatorBaseTest method testJoinPlain.
@Test
public void testJoinPlain() {
final FlatJoinFunction<String, String, Integer> joiner = new FlatJoinFunction<String, String, Integer>() {
@Override
public void join(String first, String second, Collector<Integer> out) throws Exception {
out.collect(first.length());
out.collect(second.length());
}
};
@SuppressWarnings({ "rawtypes", "unchecked" }) InnerJoinOperatorBase<String, String, Integer, FlatJoinFunction<String, String, Integer>> base = new InnerJoinOperatorBase(joiner, new BinaryOperatorInformation(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), new int[0], new int[0], "TestJoiner");
List<String> inputData1 = new ArrayList<String>(Arrays.asList("foo", "bar", "foobar"));
List<String> inputData2 = new ArrayList<String>(Arrays.asList("foobar", "foo"));
List<Integer> expected = new ArrayList<Integer>(Arrays.asList(3, 3, 6, 6));
try {
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<Integer> resultSafe = base.executeOnCollections(inputData1, inputData2, null, executionConfig);
executionConfig.enableObjectReuse();
List<Integer> resultRegular = base.executeOnCollections(inputData1, inputData2, null, executionConfig);
assertEquals(expected, resultSafe);
assertEquals(expected, resultRegular);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.util.Collector in project flink by apache.
the class PartitionMapOperatorTest method testMapPartitionWithRuntimeContext.
@Test
public void testMapPartitionWithRuntimeContext() {
try {
final String taskName = "Test Task";
final AtomicBoolean opened = new AtomicBoolean();
final AtomicBoolean closed = new AtomicBoolean();
final MapPartitionFunction<String, Integer> parser = new RichMapPartitionFunction<String, Integer>() {
@Override
public void open(Configuration parameters) throws Exception {
opened.set(true);
RuntimeContext ctx = getRuntimeContext();
assertEquals(0, ctx.getIndexOfThisSubtask());
assertEquals(1, ctx.getNumberOfParallelSubtasks());
assertEquals(taskName, ctx.getTaskName());
}
@Override
public void mapPartition(Iterable<String> values, Collector<Integer> out) {
for (String s : values) {
out.collect(Integer.parseInt(s));
}
}
@Override
public void close() throws Exception {
closed.set(true);
}
};
MapPartitionOperatorBase<String, Integer, MapPartitionFunction<String, Integer>> op = new MapPartitionOperatorBase<String, Integer, MapPartitionFunction<String, Integer>>(parser, new UnaryOperatorInformation<String, Integer>(BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO), taskName);
List<String> input = new ArrayList<String>(asList("1", "2", "3", "4", "5", "6"));
final TaskInfo taskInfo = new TaskInfo(taskName, 1, 0, 1, 0);
ExecutionConfig executionConfig = new ExecutionConfig();
executionConfig.disableObjectReuse();
List<Integer> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()), executionConfig);
executionConfig.enableObjectReuse();
List<Integer> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<String, Future<Path>>(), new HashMap<String, Accumulator<?, ?>>(), new UnregisteredMetricsGroup()), executionConfig);
assertEquals(asList(1, 2, 3, 4, 5, 6), resultMutableSafe);
assertEquals(asList(1, 2, 3, 4, 5, 6), resultRegular);
assertTrue(opened.get());
assertTrue(closed.get());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations