Search in sources :

Example 21 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project kafka by apache.

the class DefaultPartitionerTest method testRoundRobin.

@Test
public void testRoundRobin() throws InterruptedException {
    final String topicA = "topicA";
    final String topicB = "topicB";
    List<PartitionInfo> allPartitions = asList(new PartitionInfo(topicA, 0, node0, nodes, nodes), new PartitionInfo(topicA, 1, node1, nodes, nodes), new PartitionInfo(topicA, 2, node2, nodes, nodes), new PartitionInfo(topicB, 0, node0, nodes, nodes));
    Cluster testCluster = new Cluster("clusterId", asList(node0, node1, node2), allPartitions, Collections.<String>emptySet(), Collections.<String>emptySet());
    final Map<Integer, Integer> partitionCount = new HashMap<>();
    for (int i = 0; i < 30; ++i) {
        int partition = partitioner.partition(topicA, null, null, null, null, testCluster);
        Integer count = partitionCount.get(partition);
        if (null == count)
            count = 0;
        partitionCount.put(partition, count + 1);
        if (i % 5 == 0) {
            partitioner.partition(topicB, null, null, null, null, testCluster);
        }
    }
    assertEquals(10, (int) partitionCount.get(0));
    assertEquals(10, (int) partitionCount.get(1));
    assertEquals(10, (int) partitionCount.get(2));
}
Also used : HashMap(java.util.HashMap) Cluster(org.apache.kafka.common.Cluster) PartitionInfo(org.apache.kafka.common.PartitionInfo) Test(org.junit.Test)

Example 22 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project flink by apache.

the class FlinkKafkaProducerBase method open.

// ----------------------------------- Utilities --------------------------
/**
	 * Initializes the connection to Kafka.
	 */
@Override
public void open(Configuration configuration) {
    producer = getKafkaProducer(this.producerConfig);
    RuntimeContext ctx = getRuntimeContext();
    if (partitioner != null) {
        // the fetched list is immutable, so we're creating a mutable copy in order to sort it
        List<PartitionInfo> partitionsList = new ArrayList<>(producer.partitionsFor(defaultTopicId));
        // sort the partitions by partition id to make sure the fetched partition list is the same across subtasks
        Collections.sort(partitionsList, new Comparator<PartitionInfo>() {

            @Override
            public int compare(PartitionInfo o1, PartitionInfo o2) {
                return Integer.compare(o1.partition(), o2.partition());
            }
        });
        partitions = new int[partitionsList.size()];
        for (int i = 0; i < partitions.length; i++) {
            partitions[i] = partitionsList.get(i).partition();
        }
        partitioner.open(ctx.getIndexOfThisSubtask(), ctx.getNumberOfParallelSubtasks(), partitions);
    }
    LOG.info("Starting FlinkKafkaProducer ({}/{}) to produce into topic {}", ctx.getIndexOfThisSubtask() + 1, ctx.getNumberOfParallelSubtasks(), defaultTopicId);
    // register Kafka metrics to Flink accumulators
    if (!Boolean.parseBoolean(producerConfig.getProperty(KEY_DISABLE_METRICS, "false"))) {
        Map<MetricName, ? extends Metric> metrics = this.producer.metrics();
        if (metrics == null) {
            // MapR's Kafka implementation returns null here.
            LOG.info("Producer implementation does not support metrics");
        } else {
            final MetricGroup kafkaMetricGroup = getRuntimeContext().getMetricGroup().addGroup("KafkaProducer");
            for (Map.Entry<MetricName, ? extends Metric> metric : metrics.entrySet()) {
                kafkaMetricGroup.gauge(metric.getKey().name(), new KafkaMetricWrapper(metric.getValue()));
            }
        }
    }
    if (flushOnCheckpoint && !((StreamingRuntimeContext) this.getRuntimeContext()).isCheckpointingEnabled()) {
        LOG.warn("Flushing on checkpoint is enabled, but checkpointing is not enabled. Disabling flushing.");
        flushOnCheckpoint = false;
    }
    if (logFailuresOnly) {
        callback = new Callback() {

            @Override
            public void onCompletion(RecordMetadata metadata, Exception e) {
                if (e != null) {
                    LOG.error("Error while sending record to Kafka: " + e.getMessage(), e);
                }
                acknowledgeMessage();
            }
        };
    } else {
        callback = new Callback() {

            @Override
            public void onCompletion(RecordMetadata metadata, Exception exception) {
                if (exception != null && asyncException == null) {
                    asyncException = exception;
                }
                acknowledgeMessage();
            }
        };
    }
}
Also used : StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) ArrayList(java.util.ArrayList) MetricGroup(org.apache.flink.metrics.MetricGroup) RecordMetadata(org.apache.kafka.clients.producer.RecordMetadata) MetricName(org.apache.kafka.common.MetricName) Callback(org.apache.kafka.clients.producer.Callback) KafkaMetricWrapper(org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper) PartitionInfo(org.apache.kafka.common.PartitionInfo) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) StreamingRuntimeContext(org.apache.flink.streaming.api.operators.StreamingRuntimeContext) Map(java.util.Map)

Example 23 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project beam by apache.

the class KafkaIOTest method mkMockConsumer.

// Update mock consumer with records distributed among the given topics, each with given number
// of partitions. Records are assigned in round-robin order among the partitions.
private static MockConsumer<byte[], byte[]> mkMockConsumer(List<String> topics, int partitionsPerTopic, int numElements, OffsetResetStrategy offsetResetStrategy) {
    final List<TopicPartition> partitions = new ArrayList<>();
    final Map<TopicPartition, List<ConsumerRecord<byte[], byte[]>>> records = new HashMap<>();
    Map<String, List<PartitionInfo>> partitionMap = new HashMap<>();
    for (String topic : topics) {
        List<PartitionInfo> partIds = new ArrayList<>(partitionsPerTopic);
        for (int i = 0; i < partitionsPerTopic; i++) {
            TopicPartition tp = new TopicPartition(topic, i);
            partitions.add(tp);
            partIds.add(new PartitionInfo(topic, i, null, null, null));
            records.put(tp, new ArrayList<ConsumerRecord<byte[], byte[]>>());
        }
        partitionMap.put(topic, partIds);
    }
    int numPartitions = partitions.size();
    final long[] offsets = new long[numPartitions];
    for (int i = 0; i < numElements; i++) {
        int pIdx = i % numPartitions;
        TopicPartition tp = partitions.get(pIdx);
        records.get(tp).add(new ConsumerRecord<>(tp.topic(), tp.partition(), offsets[pIdx]++, // key is 4 byte record id
        ByteBuffer.wrap(new byte[4]).putInt(i).array(), // value is 8 byte record id
        ByteBuffer.wrap(new byte[8]).putLong(i).array()));
    }
    // This is updated when reader assigns partitions.
    final AtomicReference<List<TopicPartition>> assignedPartitions = new AtomicReference<>(Collections.<TopicPartition>emptyList());
    final MockConsumer<byte[], byte[]> consumer = new MockConsumer<byte[], byte[]>(offsetResetStrategy) {

        // override assign() in order to set offset limits & to save assigned partitions.
        //remove keyword '@Override' here, it can work with Kafka client 0.9 and 0.10 as:
        //1. SpEL can find this function, either input is List or Collection;
        //2. List extends Collection, so super.assign() could find either assign(List)
        //  or assign(Collection).
        public void assign(final List<TopicPartition> assigned) {
            super.assign(assigned);
            assignedPartitions.set(ImmutableList.copyOf(assigned));
            for (TopicPartition tp : assigned) {
                updateBeginningOffsets(ImmutableMap.of(tp, 0L));
                updateEndOffsets(ImmutableMap.of(tp, (long) records.get(tp).size()));
            }
        }

        // Override offsetsForTimes() in order to look up the offsets by timestamp.
        // Remove keyword '@Override' here, Kafka client 0.10.1.0 previous versions does not have
        // this method.
        // Should return Map<TopicPartition, OffsetAndTimestamp>, but 0.10.1.0 previous versions
        // does not have the OffsetAndTimestamp class. So return a raw type and use reflection
        // here.
        @SuppressWarnings("unchecked")
        public Map offsetsForTimes(Map<TopicPartition, Long> timestampsToSearch) {
            HashMap<TopicPartition, Object> result = new HashMap<>();
            try {
                Class<?> cls = Class.forName("org.apache.kafka.clients.consumer.OffsetAndTimestamp");
                // OffsetAndTimestamp(long offset, long timestamp)
                Constructor constructor = cls.getDeclaredConstructor(long.class, long.class);
                // In test scope, timestamp == offset.
                for (Map.Entry<TopicPartition, Long> entry : timestampsToSearch.entrySet()) {
                    long maxOffset = offsets[partitions.indexOf(entry.getKey())];
                    Long offset = entry.getValue();
                    if (offset >= maxOffset) {
                        offset = null;
                    }
                    result.put(entry.getKey(), constructor.newInstance(entry.getValue(), offset));
                }
                return result;
            } catch (ClassNotFoundException | IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
                throw new RuntimeException(e);
            }
        }
    };
    for (String topic : topics) {
        consumer.updatePartitions(topic, partitionMap.get(topic));
    }
    // MockConsumer does not maintain any relationship between partition seek position and the
    // records added. e.g. if we add 10 records to a partition and then seek to end of the
    // partition, MockConsumer is still going to return the 10 records in next poll. It is
    // our responsibility to make sure currently enqueued records sync with partition offsets.
    // The following task will be called inside each invocation to MockConsumer.poll().
    // We enqueue only the records with the offset >= partition's current position.
    Runnable recordEnqueueTask = new Runnable() {

        @Override
        public void run() {
            // add all the records with offset >= current partition position.
            for (TopicPartition tp : assignedPartitions.get()) {
                long curPos = consumer.position(tp);
                for (ConsumerRecord<byte[], byte[]> r : records.get(tp)) {
                    if (r.offset() >= curPos) {
                        consumer.addRecord(r);
                    }
                }
            }
            consumer.schedulePollTask(this);
        }
    };
    consumer.schedulePollTask(recordEnqueueTask);
    return consumer;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) PCollectionList(org.apache.beam.sdk.values.PCollectionList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) PartitionInfo(org.apache.kafka.common.PartitionInfo) MockConsumer(org.apache.kafka.clients.consumer.MockConsumer) Constructor(java.lang.reflect.Constructor) AtomicReference(java.util.concurrent.atomic.AtomicReference) ConsumerRecord(org.apache.kafka.clients.consumer.ConsumerRecord) InvocationTargetException(java.lang.reflect.InvocationTargetException) TopicPartition(org.apache.kafka.common.TopicPartition) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 24 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project distributedlog by twitter.

the class KafkaDistributedLogProducer method partitionsFor.

@Override
public List<PartitionInfo> partitionsFor(String s) {
    String[] streams = getStreamsForTopic(s);
    List<PartitionInfo> partitions = Lists.newArrayListWithExpectedSize(streams.length);
    for (int i = 0; i < streams.length; i++) {
        // TODO: maybe add getOwner from dl write proxy to return the owner of the partition
        partitions.add(new PartitionInfo(s, i, null, null, null));
    }
    return partitions;
}
Also used : PartitionInfo(org.apache.kafka.common.PartitionInfo)

Example 25 with PartitionInfo

use of org.apache.kafka.common.PartitionInfo in project flink by apache.

the class FlinkKafkaConsumer09 method convertToFlinkKafkaTopicPartition.

// ------------------------------------------------------------------------
//  Utilities 
// ------------------------------------------------------------------------
/**
	 * Converts a list of Kafka PartitionInfo's to Flink's KafkaTopicPartition (which are serializable)
	 * 
	 * @param partitions A list of Kafka PartitionInfos.
	 * @return A list of KafkaTopicPartitions
	 */
private static List<KafkaTopicPartition> convertToFlinkKafkaTopicPartition(List<PartitionInfo> partitions) {
    checkNotNull(partitions);
    List<KafkaTopicPartition> ret = new ArrayList<>(partitions.size());
    for (PartitionInfo pi : partitions) {
        ret.add(new KafkaTopicPartition(pi.topic(), pi.partition()));
    }
    return ret;
}
Also used : ArrayList(java.util.ArrayList) KafkaTopicPartition(org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition) PartitionInfo(org.apache.kafka.common.PartitionInfo)

Aggregations

PartitionInfo (org.apache.kafka.common.PartitionInfo)49 TopicPartition (org.apache.kafka.common.TopicPartition)30 Test (org.junit.Test)23 HashMap (java.util.HashMap)17 ArrayList (java.util.ArrayList)15 Node (org.apache.kafka.common.Node)12 Map (java.util.Map)11 Cluster (org.apache.kafka.common.Cluster)11 HashSet (java.util.HashSet)10 Set (java.util.Set)7 TaskId (org.apache.kafka.streams.processor.TaskId)7 StreamsConfig (org.apache.kafka.streams.StreamsConfig)6 MockTime (org.apache.kafka.common.utils.MockTime)5 List (java.util.List)4 Properties (java.util.Properties)4 KStreamBuilder (org.apache.kafka.streams.kstream.KStreamBuilder)4 HostInfo (org.apache.kafka.streams.state.HostInfo)4 StreamsMetadata (org.apache.kafka.streams.state.StreamsMetadata)4 File (java.io.File)3 MockConsumer (org.apache.kafka.clients.consumer.MockConsumer)3