Search in sources :

Example 31 with KafkaTopicPartition

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.

the class FlinkKafkaConsumerBase method logPartitionInfo.

/**
	 * Logs the partition information in INFO level.
	 * 
	 * @param logger The logger to log to.
	 * @param partitionInfos List of subscribed partitions
	 */
protected static void logPartitionInfo(Logger logger, List<KafkaTopicPartition> partitionInfos) {
    Map<String, Integer> countPerTopic = new HashMap<>();
    for (KafkaTopicPartition partition : partitionInfos) {
        Integer count = countPerTopic.get(partition.getTopic());
        if (count == null) {
            count = 1;
        } else {
            count++;
        }
        countPerTopic.put(partition.getTopic(), count);
    }
    StringBuilder sb = new StringBuilder("Consumer is going to read the following topics (with number of partitions): ");
    for (Map.Entry<String, Integer> e : countPerTopic.entrySet()) {
        sb.append(e.getKey()).append(" (").append(e.getValue()).append("), ");
    }
    logger.info(sb.toString());
}
Also used : HashMap(java.util.HashMap) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) HashMap(java.util.HashMap) Map(java.util.Map) LinkedMap(org.apache.commons.collections.map.LinkedMap)

Example 32 with KafkaTopicPartition

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.

the class KafkaConsumerTestBase method readSequence.

// ------------------------------------------------------------------------
// Reading writing test data sets
// ------------------------------------------------------------------------
/**
 * Runs a job using the provided environment to read a sequence of records from a single Kafka
 * topic. The method allows to individually specify the expected starting offset and total read
 * value count of each partition. The job will be considered successful only if all partition
 * read results match the start offset and value count criteria.
 */
protected void readSequence(final StreamExecutionEnvironment env, final StartupMode startupMode, final Map<KafkaTopicPartition, Long> specificStartupOffsets, final Long startupTimestamp, final Properties cc, final String topicName, final Map<Integer, Tuple2<Integer, Integer>> partitionsToValuesCountAndStartOffset) throws Exception {
    final int sourceParallelism = partitionsToValuesCountAndStartOffset.keySet().size();
    int finalCountTmp = 0;
    for (Map.Entry<Integer, Tuple2<Integer, Integer>> valuesCountAndStartOffset : partitionsToValuesCountAndStartOffset.entrySet()) {
        finalCountTmp += valuesCountAndStartOffset.getValue().f0;
    }
    final int finalCount = finalCountTmp;
    final TypeInformation<Tuple2<Integer, Integer>> intIntTupleType = TypeInformation.of(new TypeHint<Tuple2<Integer, Integer>>() {
    });
    final TypeInformationSerializationSchema<Tuple2<Integer, Integer>> deser = new TypeInformationSerializationSchema<>(intIntTupleType, env.getConfig());
    // create the consumer
    cc.putAll(secureProps);
    DataStreamSource<Tuple2<Integer, Integer>> source;
    if (useNewSource) {
        KafkaSourceBuilder<Tuple2<Integer, Integer>> sourceBuilder = kafkaServer.getSourceBuilder(topicName, deser, cc);
        Map<TopicPartition, Long> startOffsets = new HashMap<>();
        if (specificStartupOffsets != null) {
            specificStartupOffsets.forEach((ktp, offset) -> startOffsets.put(new TopicPartition(ktp.getTopic(), ktp.getPartition()), offset));
        }
        setKafkaSourceOffset(startupMode, sourceBuilder, startOffsets, startupTimestamp);
        source = env.fromSource(sourceBuilder.build(), WatermarkStrategy.noWatermarks(), "KafkaSource");
    } else {
        FlinkKafkaConsumerBase<Tuple2<Integer, Integer>> consumer = kafkaServer.getConsumer(topicName, deser, cc);
        setKafkaConsumerOffset(startupMode, consumer, specificStartupOffsets, startupTimestamp);
        source = env.addSource(consumer);
    }
    source.setParallelism(sourceParallelism).map(new ThrottledMapper<>(20)).setParallelism(sourceParallelism).flatMap(new RichFlatMapFunction<Tuple2<Integer, Integer>, Integer>() {

        private HashMap<Integer, BitSet> partitionsToValueCheck;

        private int count = 0;

        @Override
        public void open(Configuration parameters) throws Exception {
            partitionsToValueCheck = new HashMap<>();
            for (Integer partition : partitionsToValuesCountAndStartOffset.keySet()) {
                partitionsToValueCheck.put(partition, new BitSet());
            }
        }

        @Override
        public void flatMap(Tuple2<Integer, Integer> value, Collector<Integer> out) throws Exception {
            int partition = value.f0;
            int val = value.f1;
            BitSet bitSet = partitionsToValueCheck.get(partition);
            if (bitSet == null) {
                throw new RuntimeException("Got a record from an unknown partition");
            } else {
                bitSet.set(val - partitionsToValuesCountAndStartOffset.get(partition).f1);
            }
            count++;
            LOG.info("Received message {}, total {} messages", value, count);
            // verify if we've seen everything
            if (count == finalCount) {
                for (Map.Entry<Integer, BitSet> partitionsToValueCheck : this.partitionsToValueCheck.entrySet()) {
                    BitSet check = partitionsToValueCheck.getValue();
                    int expectedValueCount = partitionsToValuesCountAndStartOffset.get(partitionsToValueCheck.getKey()).f0;
                    if (check.cardinality() != expectedValueCount) {
                        throw new RuntimeException("Expected cardinality to be " + expectedValueCount + ", but was " + check.cardinality());
                    } else if (check.nextClearBit(0) != expectedValueCount) {
                        throw new RuntimeException("Expected next clear bit to be " + expectedValueCount + ", but was " + check.cardinality());
                    }
                }
                // test has passed
                throw new SuccessException();
            }
        }
    }).setParallelism(1);
    tryExecute(env, "Read data from Kafka");
    LOG.info("Successfully read sequence for verification");
}
Also used : Configuration(org.apache.flink.configuration.Configuration) HashMap(java.util.HashMap) ThrottledMapper(org.apache.flink.streaming.connectors.kafka.testutils.ThrottledMapper) Collector(org.apache.flink.util.Collector) BitSet(java.util.BitSet) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) TypeInformationSerializationSchema(org.apache.flink.api.common.serialization.TypeInformationSerializationSchema) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TopicPartition(org.apache.kafka.common.TopicPartition) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) RichFlatMapFunction(org.apache.flink.api.common.functions.RichFlatMapFunction) SuccessException(org.apache.flink.test.util.SuccessException) Map(java.util.Map) HashMap(java.util.HashMap)

Example 33 with KafkaTopicPartition

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.

the class KafkaConnectorOptionsUtil method getStartupOptions.

public static StartupOptions getStartupOptions(ReadableConfig tableOptions) {
    final Map<KafkaTopicPartition, Long> specificOffsets = new HashMap<>();
    final StartupMode startupMode = tableOptions.getOptional(SCAN_STARTUP_MODE).map(KafkaConnectorOptionsUtil::fromOption).orElse(StartupMode.GROUP_OFFSETS);
    if (startupMode == StartupMode.SPECIFIC_OFFSETS) {
        // It will be refactored after support specific offset for multiple topics in
        // FLINK-18602. We have already checked tableOptions.get(TOPIC) contains one topic in
        // validateScanStartupMode().
        buildSpecificOffsets(tableOptions, tableOptions.get(TOPIC).get(0), specificOffsets);
    }
    final StartupOptions options = new StartupOptions();
    options.startupMode = startupMode;
    options.specificOffsets = specificOffsets;
    if (startupMode == StartupMode.TIMESTAMP) {
        options.startupTimestampMillis = tableOptions.get(SCAN_STARTUP_TIMESTAMP_MILLIS);
    }
    return options;
}
Also used : HashMap(java.util.HashMap) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) ScanStartupMode(org.apache.flink.streaming.connectors.kafka.table.KafkaConnectorOptions.ScanStartupMode) StartupMode(org.apache.flink.streaming.connectors.kafka.config.StartupMode)

Example 34 with KafkaTopicPartition

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.

the class FlinkKafkaConsumerBaseTest method testSnapshotStateWithCommitOnCheckpointsEnabled.

@Test
@SuppressWarnings("unchecked")
public void testSnapshotStateWithCommitOnCheckpointsEnabled() throws Exception {
    // --------------------------------------------------------------------
    // prepare fake states
    // --------------------------------------------------------------------
    final HashMap<KafkaTopicPartition, Long> state1 = new HashMap<>();
    state1.put(new KafkaTopicPartition("abc", 13), 16768L);
    state1.put(new KafkaTopicPartition("def", 7), 987654321L);
    final HashMap<KafkaTopicPartition, Long> state2 = new HashMap<>();
    state2.put(new KafkaTopicPartition("abc", 13), 16770L);
    state2.put(new KafkaTopicPartition("def", 7), 987654329L);
    final HashMap<KafkaTopicPartition, Long> state3 = new HashMap<>();
    state3.put(new KafkaTopicPartition("abc", 13), 16780L);
    state3.put(new KafkaTopicPartition("def", 7), 987654377L);
    // --------------------------------------------------------------------
    final MockFetcher<String> fetcher = new MockFetcher<>(state1, state2, state3);
    final FlinkKafkaConsumerBase<String> consumer = new DummyFlinkKafkaConsumer<>(fetcher, mock(AbstractPartitionDiscoverer.class), false);
    final TestingListState<Serializable> listState = new TestingListState<>();
    // setup and run the consumer; wait until the consumer reaches the main fetch loop before
    // continuing test
    setupConsumer(consumer, false, listState, true, 0, 1);
    final CheckedThread runThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            consumer.run(new TestSourceContext<>());
        }
    };
    runThread.start();
    fetcher.waitUntilRun();
    assertEquals(0, consumer.getPendingOffsetsToCommit().size());
    // checkpoint 1
    consumer.snapshotState(new StateSnapshotContextSynchronousImpl(138, 138));
    HashMap<KafkaTopicPartition, Long> snapshot1 = new HashMap<>();
    for (Serializable serializable : listState.get()) {
        Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
        snapshot1.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
    }
    assertEquals(state1, snapshot1);
    assertEquals(1, consumer.getPendingOffsetsToCommit().size());
    assertEquals(state1, consumer.getPendingOffsetsToCommit().get(138L));
    // checkpoint 2
    consumer.snapshotState(new StateSnapshotContextSynchronousImpl(140, 140));
    HashMap<KafkaTopicPartition, Long> snapshot2 = new HashMap<>();
    for (Serializable serializable : listState.get()) {
        Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
        snapshot2.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
    }
    assertEquals(state2, snapshot2);
    assertEquals(2, consumer.getPendingOffsetsToCommit().size());
    assertEquals(state2, consumer.getPendingOffsetsToCommit().get(140L));
    // ack checkpoint 1
    consumer.notifyCheckpointComplete(138L);
    assertEquals(1, consumer.getPendingOffsetsToCommit().size());
    assertTrue(consumer.getPendingOffsetsToCommit().containsKey(140L));
    assertEquals(state1, fetcher.getAndClearLastCommittedOffsets());
    assertEquals(1, fetcher.getCommitCount());
    // checkpoint 3
    consumer.snapshotState(new StateSnapshotContextSynchronousImpl(141, 141));
    HashMap<KafkaTopicPartition, Long> snapshot3 = new HashMap<>();
    for (Serializable serializable : listState.get()) {
        Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
        snapshot3.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
    }
    assertEquals(state3, snapshot3);
    assertEquals(2, consumer.getPendingOffsetsToCommit().size());
    assertEquals(state3, consumer.getPendingOffsetsToCommit().get(141L));
    // ack checkpoint 3, subsumes number 2
    consumer.notifyCheckpointComplete(141L);
    assertEquals(0, consumer.getPendingOffsetsToCommit().size());
    assertEquals(state3, fetcher.getAndClearLastCommittedOffsets());
    assertEquals(2, fetcher.getCommitCount());
    // invalid checkpoint
    consumer.notifyCheckpointComplete(666);
    assertEquals(0, consumer.getPendingOffsetsToCommit().size());
    assertNull(fetcher.getAndClearLastCommittedOffsets());
    assertEquals(2, fetcher.getCommitCount());
    consumer.cancel();
    runThread.sync();
}
Also used : Serializable(java.io.Serializable) HashMap(java.util.HashMap) StateSnapshotContextSynchronousImpl(org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) AbstractPartitionDiscoverer(org.apache.flink.streaming.connectors.kafka.internals.AbstractPartitionDiscoverer) CheckedThread(org.apache.flink.core.testutils.CheckedThread) Tuple2(org.apache.flink.api.java.tuple.Tuple2) OptionalLong(java.util.OptionalLong) Test(org.junit.Test)

Example 35 with KafkaTopicPartition

use of org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition in project flink by apache.

the class FlinkKafkaConsumerBaseTest method testSnapshotStateWithCommitOnCheckpointsDisabled.

@Test
@SuppressWarnings("unchecked")
public void testSnapshotStateWithCommitOnCheckpointsDisabled() throws Exception {
    // --------------------------------------------------------------------
    // prepare fake states
    // --------------------------------------------------------------------
    final HashMap<KafkaTopicPartition, Long> state1 = new HashMap<>();
    state1.put(new KafkaTopicPartition("abc", 13), 16768L);
    state1.put(new KafkaTopicPartition("def", 7), 987654321L);
    final HashMap<KafkaTopicPartition, Long> state2 = new HashMap<>();
    state2.put(new KafkaTopicPartition("abc", 13), 16770L);
    state2.put(new KafkaTopicPartition("def", 7), 987654329L);
    final HashMap<KafkaTopicPartition, Long> state3 = new HashMap<>();
    state3.put(new KafkaTopicPartition("abc", 13), 16780L);
    state3.put(new KafkaTopicPartition("def", 7), 987654377L);
    // --------------------------------------------------------------------
    final MockFetcher<String> fetcher = new MockFetcher<>(state1, state2, state3);
    final FlinkKafkaConsumerBase<String> consumer = new DummyFlinkKafkaConsumer<>(fetcher, mock(AbstractPartitionDiscoverer.class), false);
    // disable offset committing
    consumer.setCommitOffsetsOnCheckpoints(false);
    final TestingListState<Serializable> listState = new TestingListState<>();
    // setup and run the consumer; wait until the consumer reaches the main fetch loop before
    // continuing test
    setupConsumer(consumer, false, listState, true, 0, 1);
    final CheckedThread runThread = new CheckedThread() {

        @Override
        public void go() throws Exception {
            consumer.run(new TestSourceContext<>());
        }
    };
    runThread.start();
    fetcher.waitUntilRun();
    assertEquals(0, consumer.getPendingOffsetsToCommit().size());
    // checkpoint 1
    consumer.snapshotState(new StateSnapshotContextSynchronousImpl(138, 138));
    HashMap<KafkaTopicPartition, Long> snapshot1 = new HashMap<>();
    for (Serializable serializable : listState.get()) {
        Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
        snapshot1.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
    }
    assertEquals(state1, snapshot1);
    assertEquals(0, consumer.getPendingOffsetsToCommit().size());
    // checkpoint 2
    consumer.snapshotState(new StateSnapshotContextSynchronousImpl(140, 140));
    HashMap<KafkaTopicPartition, Long> snapshot2 = new HashMap<>();
    for (Serializable serializable : listState.get()) {
        Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
        snapshot2.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
    }
    assertEquals(state2, snapshot2);
    assertEquals(0, consumer.getPendingOffsetsToCommit().size());
    // ack checkpoint 1
    consumer.notifyCheckpointComplete(138L);
    assertEquals(0, fetcher.getCommitCount());
    // no offsets should be committed
    assertNull(fetcher.getAndClearLastCommittedOffsets());
    // checkpoint 3
    consumer.snapshotState(new StateSnapshotContextSynchronousImpl(141, 141));
    HashMap<KafkaTopicPartition, Long> snapshot3 = new HashMap<>();
    for (Serializable serializable : listState.get()) {
        Tuple2<KafkaTopicPartition, Long> kafkaTopicPartitionLongTuple2 = (Tuple2<KafkaTopicPartition, Long>) serializable;
        snapshot3.put(kafkaTopicPartitionLongTuple2.f0, kafkaTopicPartitionLongTuple2.f1);
    }
    assertEquals(state3, snapshot3);
    assertEquals(0, consumer.getPendingOffsetsToCommit().size());
    // ack checkpoint 3, subsumes number 2
    consumer.notifyCheckpointComplete(141L);
    assertEquals(0, fetcher.getCommitCount());
    // no offsets should be committed
    assertNull(fetcher.getAndClearLastCommittedOffsets());
    // invalid checkpoint
    consumer.notifyCheckpointComplete(666);
    assertEquals(0, fetcher.getCommitCount());
    // no offsets should be committed
    assertNull(fetcher.getAndClearLastCommittedOffsets());
    consumer.cancel();
    runThread.sync();
}
Also used : Serializable(java.io.Serializable) HashMap(java.util.HashMap) StateSnapshotContextSynchronousImpl(org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) AbstractPartitionDiscoverer(org.apache.flink.streaming.connectors.kafka.internals.AbstractPartitionDiscoverer) CheckedThread(org.apache.flink.core.testutils.CheckedThread) Tuple2(org.apache.flink.api.java.tuple.Tuple2) OptionalLong(java.util.OptionalLong) Test(org.junit.Test)

Aggregations

KafkaTopicPartition (org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition)35 HashMap (java.util.HashMap)26 Test (org.junit.Test)18 ArrayList (java.util.ArrayList)14 Map (java.util.Map)8 Properties (java.util.Properties)8 AtomicReference (java.util.concurrent.atomic.AtomicReference)7 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)7 UnregisteredMetricsGroup (org.apache.flink.metrics.groups.UnregisteredMetricsGroup)7 TestProcessingTimeService (org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService)7 TopicPartition (org.apache.kafka.common.TopicPartition)7 KafkaConsumerThread (org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread)6 AbstractStreamOperatorTestHarness (org.apache.flink.streaming.util.AbstractStreamOperatorTestHarness)6 KeyedDeserializationSchemaWrapper (org.apache.flink.streaming.util.serialization.KeyedDeserializationSchemaWrapper)6 SimpleStringSchema (org.apache.flink.streaming.util.serialization.SimpleStringSchema)6 ConsumerRecords (org.apache.kafka.clients.consumer.ConsumerRecords)6 InvocationOnMock (org.mockito.invocation.InvocationOnMock)6 OptionalLong (java.util.OptionalLong)5 StreamSource (org.apache.flink.streaming.api.operators.StreamSource)5 Mockito.anyLong (org.mockito.Mockito.anyLong)5