Search in sources :

Example 6 with KafkaStreamMetadata

use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.

the class PinotLLCRealtimeSegmentManager method createConsumingSegment.

/**
   * Create a consuming segment for the kafka partitions that are missing one.
   *
   * @param realtimeTableName is the name of the realtime table (e.g. "table_REALTIME")
   * @param nonConsumingPartitions is a set of integers (kafka partitions that do not have a consuming segment)
   * @param llcSegments is a list of segment names in the ideal state as was observed last.
   */
public void createConsumingSegment(final String realtimeTableName, final Set<Integer> nonConsumingPartitions, final List<String> llcSegments, final AbstractTableConfig tableConfig) {
    final KafkaStreamMetadata kafkaStreamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
    final ZNRecord partitionAssignment = getKafkaPartitionAssignment(realtimeTableName);
    final HashMap<Integer, LLCSegmentName> ncPartitionToLatestSegment = new HashMap<>(nonConsumingPartitions.size());
    // Number of replicas (should be same for all partitions)
    final int nReplicas = partitionAssignment.getListField("0").size();
    // (null if there is none).
    for (String segmentId : llcSegments) {
        LLCSegmentName segmentName = new LLCSegmentName(segmentId);
        int partitionId = segmentName.getPartitionId();
        if (nonConsumingPartitions.contains(partitionId)) {
            LLCSegmentName hashedSegName = ncPartitionToLatestSegment.get(partitionId);
            if (hashedSegName == null || hashedSegName.getSequenceNumber() < segmentName.getSequenceNumber()) {
                ncPartitionToLatestSegment.put(partitionId, segmentName);
            }
        }
    }
    // and completed), or the table configuration (smallest/largest).
    for (int partition : nonConsumingPartitions) {
        try {
            LLCSegmentName latestSegment = ncPartitionToLatestSegment.get(partition);
            long startOffset;
            int nextSeqNum;
            List<String> instances = partitionAssignment.getListField(Integer.toString(partition));
            if (latestSegment == null) {
                // No segment yet in partition, Create a new one with a starting offset as per table config specification.
                nextSeqNum = STARTING_SEQUENCE_NUMBER;
                LOGGER.info("Creating CONSUMING segment for {} partition {} with seq {}", realtimeTableName, partition, nextSeqNum);
                String consumerStartOffsetSpec = kafkaStreamMetadata.getKafkaConsumerProperties().get(CommonConstants.Helix.DataSource.Realtime.Kafka.AUTO_OFFSET_RESET);
                startOffset = getKafkaPartitionOffset(kafkaStreamMetadata, consumerStartOffsetSpec, partition);
                LOGGER.info("Found kafka offset {} for table {} for partition {}", startOffset, realtimeTableName, partition);
            } else {
                nextSeqNum = latestSegment.getSequenceNumber() + 1;
                LOGGER.info("Creating CONSUMING segment for {} partition {} with seq {}", realtimeTableName, partition, nextSeqNum);
                // To begin with, set startOffset to the oldest available offset in kafka. Fix it to be the one we want,
                // depending on what the prev segment had.
                startOffset = getKafkaPartitionOffset(kafkaStreamMetadata, "smallest", partition);
                LOGGER.info("Found kafka offset {} for table {} for partition {}", startOffset, realtimeTableName, partition);
                startOffset = getBetterStartOffsetIfNeeded(realtimeTableName, partition, latestSegment, startOffset, nextSeqNum);
            }
            createSegment(realtimeTableName, nReplicas, partition, nextSeqNum, instances, startOffset, partitionAssignment);
        } catch (Exception e) {
            LOGGER.error("Exception creating CONSUMING segment for {} partition {}", realtimeTableName, partition, e);
        }
    }
}
Also used : KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) Object2IntLinkedOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntLinkedOpenHashMap) HashMap(java.util.HashMap) LLCSegmentName(com.linkedin.pinot.common.utils.LLCSegmentName) ZNRecord(org.apache.helix.ZNRecord) TimeoutException(java.util.concurrent.TimeoutException)

Example 7 with KafkaStreamMetadata

use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.

the class PinotLLCRealtimeSegmentManagerTest method testAutoReplaceConsumingSegment.

public void testAutoReplaceConsumingSegment(final String tableConfigStartOffset) throws Exception {
    FakePinotLLCRealtimeSegmentManager segmentManager = new FakePinotLLCRealtimeSegmentManager(true, null);
    final int nPartitions = 8;
    final int nInstances = 3;
    final int nReplicas = 2;
    final String topic = "someTopic";
    final String rtTableName = "table_REALTIME";
    List<String> instances = getInstanceList(nInstances);
    final String startOffset = KAFKA_OFFSET;
    IdealState idealState = PinotTableIdealStateBuilder.buildEmptyKafkaConsumerRealtimeIdealStateFor(rtTableName, nReplicas);
    segmentManager.setupHelixEntries(topic, rtTableName, nPartitions, instances, nReplicas, startOffset, DUMMY_HOST, idealState, false, 10000);
    // Add another segment for each partition
    long now = System.currentTimeMillis();
    List<String> existingSegments = new ArrayList<>(segmentManager._idealStateEntries.keySet());
    final int partitionToBeFixed = 3;
    final int partitionWithHigherOffset = 4;
    final int emptyPartition = 5;
    final long smallestPartitionOffset = 0x259080984568L;
    final long largestPartitionOffset = smallestPartitionOffset + 100000;
    final long higherOffset = smallestPartitionOffset + 100;
    for (String segmentNameStr : existingSegments) {
        LLCSegmentName segmentName = new LLCSegmentName(segmentNameStr);
        switch(segmentName.getPartitionId()) {
            case partitionToBeFixed:
                // Do nothing, we will test adding a new segment for this partition when there is only one segment in there.
                break;
            case emptyPartition:
                // Remove existing segment, so we can test adding a new segment for this partition when none exists
                segmentManager._idealStateEntries.remove(segmentNameStr);
                break;
            case partitionWithHigherOffset:
                // Set segment metadata for this segment such that its offset is higher than startOffset we get from kafka.
                // In that case, we should choose the new segment offset as this one rather than the one kafka hands us.
                LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata();
                metadata.setSegmentName(segmentName.getSegmentName());
                metadata.setEndOffset(higherOffset);
                metadata.setStatus(CommonConstants.Segment.Realtime.Status.DONE);
                segmentManager._metadataMap.put(segmentName.getSegmentName(), metadata);
                break;
            default:
                // Add a second segment for this partition. It will not be repaired.
                LLCSegmentName newSegmentName = new LLCSegmentName(segmentName.getTableName(), segmentName.getPartitionId(), segmentName.getSequenceNumber() + 1, now);
                List<String> hosts = segmentManager._idealStateEntries.get(segmentNameStr);
                segmentManager._idealStateEntries.put(newSegmentName.getSegmentName(), hosts);
                break;
        }
    }
    Map<String, String> streamPropMap = new HashMap<>(1);
    streamPropMap.put(StringUtil.join(".", CommonConstants.Helix.DataSource.STREAM_PREFIX, CommonConstants.Helix.DataSource.Realtime.Kafka.CONSUMER_TYPE), "simple");
    streamPropMap.put(StringUtil.join(".", CommonConstants.Helix.DataSource.STREAM_PREFIX, CommonConstants.Helix.DataSource.Realtime.Kafka.KAFKA_CONSUMER_PROPS_PREFIX, CommonConstants.Helix.DataSource.Realtime.Kafka.AUTO_OFFSET_RESET), tableConfigStartOffset);
    KafkaStreamMetadata kafkaStreamMetadata = new KafkaStreamMetadata(streamPropMap);
    AbstractTableConfig tableConfig = mock(AbstractTableConfig.class);
    IndexingConfig indexingConfig = mock(IndexingConfig.class);
    when(indexingConfig.getStreamConfigs()).thenReturn(streamPropMap);
    when(tableConfig.getIndexingConfig()).thenReturn(indexingConfig);
    Set<Integer> nonConsumingPartitions = new HashSet<>(1);
    nonConsumingPartitions.add(partitionToBeFixed);
    nonConsumingPartitions.add(partitionWithHigherOffset);
    nonConsumingPartitions.add(emptyPartition);
    segmentManager._kafkaSmallestOffsetToReturn = smallestPartitionOffset;
    segmentManager._kafkaLargestOffsetToReturn = largestPartitionOffset;
    existingSegments = new ArrayList<>(segmentManager._idealStateEntries.keySet());
    segmentManager._paths.clear();
    segmentManager._records.clear();
    segmentManager.createConsumingSegment(rtTableName, nonConsumingPartitions, existingSegments, tableConfig);
    Assert.assertEquals(segmentManager._paths.size(), 3);
    Assert.assertEquals(segmentManager._records.size(), 3);
    Assert.assertEquals(segmentManager._oldSegmentNameStr.size(), 3);
    Assert.assertEquals(segmentManager._newSegmentNameStr.size(), 3);
    int found = 0;
    int index = 0;
    while (index < segmentManager._paths.size()) {
        String znodePath = segmentManager._paths.get(index);
        int slash = znodePath.lastIndexOf('/');
        String segmentNameStr = znodePath.substring(slash + 1);
        LLCSegmentName segmentName = new LLCSegmentName(segmentNameStr);
        ZNRecord znRecord;
        LLCRealtimeSegmentZKMetadata metadata;
        switch(segmentName.getPartitionId()) {
            case partitionToBeFixed:
                // We had left this partition with one segment. So, a second one should be created with a sequence number one
                // higher than starting. Its start offset should be what kafka returns.
                found++;
                Assert.assertEquals(segmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 1);
                znRecord = segmentManager._records.get(index);
                metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
                Assert.assertEquals(metadata.getNumReplicas(), 2);
                Assert.assertEquals(metadata.getStartOffset(), smallestPartitionOffset);
                break;
            case emptyPartition:
                // We had removed any segments in this partition. A new one should be created with the offset as returned
                // by kafka and with the starting sequence number.
                found++;
                Assert.assertEquals(segmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER);
                znRecord = segmentManager._records.get(index);
                metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
                Assert.assertEquals(metadata.getNumReplicas(), 2);
                if (tableConfigStartOffset.equals("smallest")) {
                    Assert.assertEquals(metadata.getStartOffset(), smallestPartitionOffset);
                } else {
                    Assert.assertEquals(metadata.getStartOffset(), largestPartitionOffset);
                }
                break;
            case partitionWithHigherOffset:
                // We had left this partition with one segment. In addition, we had the end-offset of the first segment set to
                // a value higher than that returned by kafka. So, a second one should be created with a sequence number one
                // equal to the end offset of the first one.
                found++;
                Assert.assertEquals(segmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 1);
                znRecord = segmentManager._records.get(index);
                metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
                Assert.assertEquals(metadata.getNumReplicas(), 2);
                Assert.assertEquals(metadata.getStartOffset(), higherOffset);
                break;
        }
        index++;
    }
    // We should see all three cases here.
    Assert.assertEquals(3, found);
    // Now, if we make 'partitionToBeFixed' a non-consuming partition, a second one should get added with the same start offset as
    // as the first one, since the kafka offset to return has not changed.
    Set<Integer> ncPartitions = new HashSet<>(1);
    ncPartitions.add(partitionToBeFixed);
    segmentManager.createConsumingSegment(rtTableName, ncPartitions, segmentManager.getExistingSegments(rtTableName), tableConfig);
    Assert.assertEquals(segmentManager._paths.size(), 4);
    Assert.assertEquals(segmentManager._records.size(), 4);
    Assert.assertEquals(segmentManager._oldSegmentNameStr.size(), 4);
    Assert.assertEquals(segmentManager._newSegmentNameStr.size(), 4);
    // The latest zn record should be that of the new one we added.
    ZNRecord znRecord = segmentManager._records.get(3);
    LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
    Assert.assertEquals(metadata.getNumReplicas(), 2);
    Assert.assertEquals(metadata.getStartOffset(), smallestPartitionOffset);
    LLCSegmentName llcSegmentName = new LLCSegmentName(metadata.getSegmentName());
    Assert.assertEquals(llcSegmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 2);
    Assert.assertEquals(llcSegmentName.getPartitionId(), partitionToBeFixed);
    // Now pretend the prev segment ended successfully, and set the end offset
    metadata.setEndOffset(metadata.getStartOffset() + 10);
    metadata.setStatus(CommonConstants.Segment.Realtime.Status.DONE);
    segmentManager._records.remove(3);
    segmentManager._records.add(metadata.toZNRecord());
    segmentManager._metadataMap.put(metadata.getSegmentName(), metadata);
    segmentManager._kafkaLargestOffsetToReturn *= 2;
    segmentManager._kafkaSmallestOffsetToReturn *= 2;
    ncPartitions.clear();
    ncPartitions.add(partitionToBeFixed);
    segmentManager.createConsumingSegment(rtTableName, ncPartitions, segmentManager.getExistingSegments(rtTableName), tableConfig);
    Assert.assertEquals(segmentManager._paths.size(), 5);
    Assert.assertEquals(segmentManager._records.size(), 5);
    Assert.assertEquals(segmentManager._oldSegmentNameStr.size(), 5);
    Assert.assertEquals(segmentManager._newSegmentNameStr.size(), 5);
    znRecord = segmentManager._records.get(4);
    metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
    Assert.assertEquals(metadata.getNumReplicas(), 2);
    // In this case, since we have data loss, we will always put the smallest kafka partition available.
    Assert.assertEquals(metadata.getStartOffset(), segmentManager.getKafkaPartitionOffset(null, "smallest", partitionToBeFixed));
    llcSegmentName = new LLCSegmentName(metadata.getSegmentName());
    Assert.assertEquals(llcSegmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 3);
    Assert.assertEquals(llcSegmentName.getPartitionId(), partitionToBeFixed);
}
Also used : KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) IndexingConfig(com.linkedin.pinot.common.config.IndexingConfig) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LLCSegmentName(com.linkedin.pinot.common.utils.LLCSegmentName) IdealState(org.apache.helix.model.IdealState) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig) LLCRealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata) ZNRecord(org.apache.helix.ZNRecord) HashSet(java.util.HashSet)

Example 8 with KafkaStreamMetadata

use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.

the class PinotTableIdealStateBuilder method buildLowLevelRealtimeIdealStateFor.

public static void buildLowLevelRealtimeIdealStateFor(String realtimeTableName, AbstractTableConfig realtimeTableConfig, HelixAdmin helixAdmin, String helixClusterName, IdealState idealState) {
    String realtimeServerTenant = ControllerTenantNameBuilder.getRealtimeTenantNameForTenant(realtimeTableConfig.getTenantConfig().getServer());
    final List<String> realtimeInstances = helixAdmin.getInstancesInClusterWithTag(helixClusterName, realtimeServerTenant);
    boolean create = false;
    final String replicasPerPartitionStr = realtimeTableConfig.getValidationConfig().getReplicasPerPartition();
    if (replicasPerPartitionStr == null || replicasPerPartitionStr.isEmpty()) {
        throw new RuntimeException("Null or empty value for replicasPerPartition, expected a number");
    }
    final int nReplicas;
    try {
        nReplicas = Integer.valueOf(replicasPerPartitionStr);
    } catch (NumberFormatException e) {
        throw new RuntimeException("Invalid value for replicasPerPartition, expected a number: " + replicasPerPartitionStr, e);
    }
    if (idealState == null) {
        idealState = buildEmptyKafkaConsumerRealtimeIdealStateFor(realtimeTableName, nReplicas);
        create = true;
    }
    LOGGER.info("Assigning partitions to instances for simple consumer for table {}", realtimeTableName);
    final KafkaStreamMetadata kafkaMetadata = new KafkaStreamMetadata(realtimeTableConfig.getIndexingConfig().getStreamConfigs());
    final String topicName = kafkaMetadata.getKafkaTopicName();
    final PinotLLCRealtimeSegmentManager segmentManager = PinotLLCRealtimeSegmentManager.getInstance();
    final int nPartitions = getPartitionCount(kafkaMetadata);
    LOGGER.info("Assigning {} partitions to instances for simple consumer for table {}", nPartitions, realtimeTableName);
    segmentManager.setupHelixEntries(topicName, realtimeTableName, nPartitions, realtimeInstances, nReplicas, kafkaMetadata.getKafkaConsumerProperties().get(Helix.DataSource.Realtime.Kafka.AUTO_OFFSET_RESET), kafkaMetadata.getBootstrapHosts(), idealState, create, PinotLLCRealtimeSegmentManager.getRealtimeTableFlushSize(realtimeTableConfig));
}
Also used : KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) PinotLLCRealtimeSegmentManager(com.linkedin.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager)

Example 9 with KafkaStreamMetadata

use of com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata in project pinot by linkedin.

the class KafkaHighLevelStreamProviderConfig method init.

@Override
public void init(AbstractTableConfig tableConfig, InstanceZKMetadata instanceMetadata, Schema schema) {
    this.indexingSchema = schema;
    if (instanceMetadata != null) {
        // For LL segments, instanceZkMetadata will be null
        this.groupId = instanceMetadata.getGroupId(tableConfig.getTableName());
    }
    KafkaStreamMetadata kafkaMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
    this.kafkaTopicName = kafkaMetadata.getKafkaTopicName();
    this.decodeKlass = kafkaMetadata.getDecoderClass();
    this.decoderProps = kafkaMetadata.getDecoderProperties();
    this.kafkaConsumerProps = kafkaMetadata.getKafkaConsumerProperties();
    this.zkString = kafkaMetadata.getZkBrokerUrl();
    if (tableConfig.getIndexingConfig().getStreamConfigs().containsKey(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_SIZE)) {
        realtimeRecordsThreshold = Integer.parseInt(tableConfig.getIndexingConfig().getStreamConfigs().get(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_SIZE));
    }
    if (tableConfig.getIndexingConfig().getStreamConfigs().containsKey(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_TIME)) {
        segmentTimeInMillis = convertToMs(tableConfig.getIndexingConfig().getStreamConfigs().get(Helix.DataSource.Realtime.REALTIME_SEGMENT_FLUSH_TIME));
    }
}
Also used : KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata)

Aggregations

KafkaStreamMetadata (com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata)9 ArrayList (java.util.ArrayList)5 ZNRecord (org.apache.helix.ZNRecord)5 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)4 RealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata)3 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 IdealState (org.apache.helix.model.IdealState)3 LLCSegmentName (com.linkedin.pinot.common.utils.LLCSegmentName)2 IOException (java.io.IOException)2 List (java.util.List)2 TimeoutException (java.util.concurrent.TimeoutException)2 JSONException (org.json.JSONException)2 IndexingConfig (com.linkedin.pinot.common.config.IndexingConfig)1 InstanceZKMetadata (com.linkedin.pinot.common.metadata.instance.InstanceZKMetadata)1 LLCRealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata)1 SegmentMetadata (com.linkedin.pinot.common.segment.SegmentMetadata)1 TableType (com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType)1 HLCSegmentName (com.linkedin.pinot.common.utils.HLCSegmentName)1 PinotLLCRealtimeSegmentManager (com.linkedin.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager)1