Search in sources :

Example 1 with LLCRealtimeSegmentZKMetadata

use of com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata in project pinot by linkedin.

the class ZKMetadataProvider method getRealtimeSegmentZKMetadata.

@Nullable
public static RealtimeSegmentZKMetadata getRealtimeSegmentZKMetadata(ZkHelixPropertyStore<ZNRecord> propertyStore, String tableName, String segmentName) {
    String realtimeTableName = TableNameBuilder.REALTIME_TABLE_NAME_BUILDER.forTable(tableName);
    ZNRecord znRecord = propertyStore.get(constructPropertyStorePathForSegment(realtimeTableName, segmentName), null, AccessOption.PERSISTENT);
    // It is possible that the segment metadata has just been deleted due to retention.
    if (znRecord == null) {
        return null;
    }
    if (SegmentName.isHighLevelConsumerSegmentName(segmentName)) {
        return new RealtimeSegmentZKMetadata(znRecord);
    } else {
        return new LLCRealtimeSegmentZKMetadata(znRecord);
    }
}
Also used : RealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata) LLCRealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata) LLCRealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata) ZNRecord(org.apache.helix.ZNRecord) Nullable(javax.annotation.Nullable)

Example 2 with LLCRealtimeSegmentZKMetadata

use of com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata in project pinot by linkedin.

the class PinotLLCRealtimeSegmentManagerTest method testAutoReplaceConsumingSegment.

public void testAutoReplaceConsumingSegment(final String tableConfigStartOffset) throws Exception {
    FakePinotLLCRealtimeSegmentManager segmentManager = new FakePinotLLCRealtimeSegmentManager(true, null);
    final int nPartitions = 8;
    final int nInstances = 3;
    final int nReplicas = 2;
    final String topic = "someTopic";
    final String rtTableName = "table_REALTIME";
    List<String> instances = getInstanceList(nInstances);
    final String startOffset = KAFKA_OFFSET;
    IdealState idealState = PinotTableIdealStateBuilder.buildEmptyKafkaConsumerRealtimeIdealStateFor(rtTableName, nReplicas);
    segmentManager.setupHelixEntries(topic, rtTableName, nPartitions, instances, nReplicas, startOffset, DUMMY_HOST, idealState, false, 10000);
    // Add another segment for each partition
    long now = System.currentTimeMillis();
    List<String> existingSegments = new ArrayList<>(segmentManager._idealStateEntries.keySet());
    final int partitionToBeFixed = 3;
    final int partitionWithHigherOffset = 4;
    final int emptyPartition = 5;
    final long smallestPartitionOffset = 0x259080984568L;
    final long largestPartitionOffset = smallestPartitionOffset + 100000;
    final long higherOffset = smallestPartitionOffset + 100;
    for (String segmentNameStr : existingSegments) {
        LLCSegmentName segmentName = new LLCSegmentName(segmentNameStr);
        switch(segmentName.getPartitionId()) {
            case partitionToBeFixed:
                // Do nothing, we will test adding a new segment for this partition when there is only one segment in there.
                break;
            case emptyPartition:
                // Remove existing segment, so we can test adding a new segment for this partition when none exists
                segmentManager._idealStateEntries.remove(segmentNameStr);
                break;
            case partitionWithHigherOffset:
                // Set segment metadata for this segment such that its offset is higher than startOffset we get from kafka.
                // In that case, we should choose the new segment offset as this one rather than the one kafka hands us.
                LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata();
                metadata.setSegmentName(segmentName.getSegmentName());
                metadata.setEndOffset(higherOffset);
                metadata.setStatus(CommonConstants.Segment.Realtime.Status.DONE);
                segmentManager._metadataMap.put(segmentName.getSegmentName(), metadata);
                break;
            default:
                // Add a second segment for this partition. It will not be repaired.
                LLCSegmentName newSegmentName = new LLCSegmentName(segmentName.getTableName(), segmentName.getPartitionId(), segmentName.getSequenceNumber() + 1, now);
                List<String> hosts = segmentManager._idealStateEntries.get(segmentNameStr);
                segmentManager._idealStateEntries.put(newSegmentName.getSegmentName(), hosts);
                break;
        }
    }
    Map<String, String> streamPropMap = new HashMap<>(1);
    streamPropMap.put(StringUtil.join(".", CommonConstants.Helix.DataSource.STREAM_PREFIX, CommonConstants.Helix.DataSource.Realtime.Kafka.CONSUMER_TYPE), "simple");
    streamPropMap.put(StringUtil.join(".", CommonConstants.Helix.DataSource.STREAM_PREFIX, CommonConstants.Helix.DataSource.Realtime.Kafka.KAFKA_CONSUMER_PROPS_PREFIX, CommonConstants.Helix.DataSource.Realtime.Kafka.AUTO_OFFSET_RESET), tableConfigStartOffset);
    KafkaStreamMetadata kafkaStreamMetadata = new KafkaStreamMetadata(streamPropMap);
    AbstractTableConfig tableConfig = mock(AbstractTableConfig.class);
    IndexingConfig indexingConfig = mock(IndexingConfig.class);
    when(indexingConfig.getStreamConfigs()).thenReturn(streamPropMap);
    when(tableConfig.getIndexingConfig()).thenReturn(indexingConfig);
    Set<Integer> nonConsumingPartitions = new HashSet<>(1);
    nonConsumingPartitions.add(partitionToBeFixed);
    nonConsumingPartitions.add(partitionWithHigherOffset);
    nonConsumingPartitions.add(emptyPartition);
    segmentManager._kafkaSmallestOffsetToReturn = smallestPartitionOffset;
    segmentManager._kafkaLargestOffsetToReturn = largestPartitionOffset;
    existingSegments = new ArrayList<>(segmentManager._idealStateEntries.keySet());
    segmentManager._paths.clear();
    segmentManager._records.clear();
    segmentManager.createConsumingSegment(rtTableName, nonConsumingPartitions, existingSegments, tableConfig);
    Assert.assertEquals(segmentManager._paths.size(), 3);
    Assert.assertEquals(segmentManager._records.size(), 3);
    Assert.assertEquals(segmentManager._oldSegmentNameStr.size(), 3);
    Assert.assertEquals(segmentManager._newSegmentNameStr.size(), 3);
    int found = 0;
    int index = 0;
    while (index < segmentManager._paths.size()) {
        String znodePath = segmentManager._paths.get(index);
        int slash = znodePath.lastIndexOf('/');
        String segmentNameStr = znodePath.substring(slash + 1);
        LLCSegmentName segmentName = new LLCSegmentName(segmentNameStr);
        ZNRecord znRecord;
        LLCRealtimeSegmentZKMetadata metadata;
        switch(segmentName.getPartitionId()) {
            case partitionToBeFixed:
                // We had left this partition with one segment. So, a second one should be created with a sequence number one
                // higher than starting. Its start offset should be what kafka returns.
                found++;
                Assert.assertEquals(segmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 1);
                znRecord = segmentManager._records.get(index);
                metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
                Assert.assertEquals(metadata.getNumReplicas(), 2);
                Assert.assertEquals(metadata.getStartOffset(), smallestPartitionOffset);
                break;
            case emptyPartition:
                // We had removed any segments in this partition. A new one should be created with the offset as returned
                // by kafka and with the starting sequence number.
                found++;
                Assert.assertEquals(segmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER);
                znRecord = segmentManager._records.get(index);
                metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
                Assert.assertEquals(metadata.getNumReplicas(), 2);
                if (tableConfigStartOffset.equals("smallest")) {
                    Assert.assertEquals(metadata.getStartOffset(), smallestPartitionOffset);
                } else {
                    Assert.assertEquals(metadata.getStartOffset(), largestPartitionOffset);
                }
                break;
            case partitionWithHigherOffset:
                // We had left this partition with one segment. In addition, we had the end-offset of the first segment set to
                // a value higher than that returned by kafka. So, a second one should be created with a sequence number one
                // equal to the end offset of the first one.
                found++;
                Assert.assertEquals(segmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 1);
                znRecord = segmentManager._records.get(index);
                metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
                Assert.assertEquals(metadata.getNumReplicas(), 2);
                Assert.assertEquals(metadata.getStartOffset(), higherOffset);
                break;
        }
        index++;
    }
    // We should see all three cases here.
    Assert.assertEquals(3, found);
    // Now, if we make 'partitionToBeFixed' a non-consuming partition, a second one should get added with the same start offset as
    // as the first one, since the kafka offset to return has not changed.
    Set<Integer> ncPartitions = new HashSet<>(1);
    ncPartitions.add(partitionToBeFixed);
    segmentManager.createConsumingSegment(rtTableName, ncPartitions, segmentManager.getExistingSegments(rtTableName), tableConfig);
    Assert.assertEquals(segmentManager._paths.size(), 4);
    Assert.assertEquals(segmentManager._records.size(), 4);
    Assert.assertEquals(segmentManager._oldSegmentNameStr.size(), 4);
    Assert.assertEquals(segmentManager._newSegmentNameStr.size(), 4);
    // The latest zn record should be that of the new one we added.
    ZNRecord znRecord = segmentManager._records.get(3);
    LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
    Assert.assertEquals(metadata.getNumReplicas(), 2);
    Assert.assertEquals(metadata.getStartOffset(), smallestPartitionOffset);
    LLCSegmentName llcSegmentName = new LLCSegmentName(metadata.getSegmentName());
    Assert.assertEquals(llcSegmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 2);
    Assert.assertEquals(llcSegmentName.getPartitionId(), partitionToBeFixed);
    // Now pretend the prev segment ended successfully, and set the end offset
    metadata.setEndOffset(metadata.getStartOffset() + 10);
    metadata.setStatus(CommonConstants.Segment.Realtime.Status.DONE);
    segmentManager._records.remove(3);
    segmentManager._records.add(metadata.toZNRecord());
    segmentManager._metadataMap.put(metadata.getSegmentName(), metadata);
    segmentManager._kafkaLargestOffsetToReturn *= 2;
    segmentManager._kafkaSmallestOffsetToReturn *= 2;
    ncPartitions.clear();
    ncPartitions.add(partitionToBeFixed);
    segmentManager.createConsumingSegment(rtTableName, ncPartitions, segmentManager.getExistingSegments(rtTableName), tableConfig);
    Assert.assertEquals(segmentManager._paths.size(), 5);
    Assert.assertEquals(segmentManager._records.size(), 5);
    Assert.assertEquals(segmentManager._oldSegmentNameStr.size(), 5);
    Assert.assertEquals(segmentManager._newSegmentNameStr.size(), 5);
    znRecord = segmentManager._records.get(4);
    metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
    Assert.assertEquals(metadata.getNumReplicas(), 2);
    // In this case, since we have data loss, we will always put the smallest kafka partition available.
    Assert.assertEquals(metadata.getStartOffset(), segmentManager.getKafkaPartitionOffset(null, "smallest", partitionToBeFixed));
    llcSegmentName = new LLCSegmentName(metadata.getSegmentName());
    Assert.assertEquals(llcSegmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 3);
    Assert.assertEquals(llcSegmentName.getPartitionId(), partitionToBeFixed);
}
Also used : KafkaStreamMetadata(com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata) IndexingConfig(com.linkedin.pinot.common.config.IndexingConfig) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LLCSegmentName(com.linkedin.pinot.common.utils.LLCSegmentName) IdealState(org.apache.helix.model.IdealState) AbstractTableConfig(com.linkedin.pinot.common.config.AbstractTableConfig) LLCRealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata) ZNRecord(org.apache.helix.ZNRecord) HashSet(java.util.HashSet)

Example 3 with LLCRealtimeSegmentZKMetadata

use of com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata in project pinot by linkedin.

the class PinotLLCRealtimeSegmentManagerTest method testInitialSegmentAssignments.

private void testInitialSegmentAssignments(final int nPartitions, final int nInstances, final int nReplicas, boolean existingIS) {
    FakePinotLLCRealtimeSegmentManager segmentManager = new FakePinotLLCRealtimeSegmentManager(true, null);
    final String topic = "someTopic";
    final String rtTableName = "table_REALTIME";
    List<String> instances = getInstanceList(nInstances);
    final String startOffset = KAFKA_OFFSET;
    IdealState idealState = PinotTableIdealStateBuilder.buildEmptyKafkaConsumerRealtimeIdealStateFor(rtTableName, nReplicas);
    segmentManager.setupHelixEntries(topic, rtTableName, nPartitions, instances, nReplicas, startOffset, DUMMY_HOST, idealState, !existingIS, 1000000);
    final String actualRtTableName = segmentManager._realtimeTableName;
    final Map<String, List<String>> idealStateEntries = segmentManager._idealStateEntries;
    final int idealStateNReplicas = segmentManager._nReplicas;
    final List<String> propStorePaths = segmentManager._paths;
    final List<ZNRecord> propStoreEntries = segmentManager._records;
    final boolean createNew = segmentManager._createNew;
    Assert.assertEquals(propStorePaths.size(), nPartitions);
    Assert.assertEquals(propStoreEntries.size(), nPartitions);
    Assert.assertEquals(idealStateEntries.size(), nPartitions);
    Assert.assertEquals(actualRtTableName, rtTableName);
    Assert.assertEquals(createNew, !existingIS);
    Assert.assertEquals(idealStateNReplicas, nReplicas);
    Map<Integer, ZNRecord> segmentPropStoreMap = new HashMap<>(propStorePaths.size());
    Map<Integer, String> segmentPathsMap = new HashMap<>(propStorePaths.size());
    for (String path : propStorePaths) {
        String segNameStr = path.split("/")[3];
        int partition = new LLCSegmentName(segNameStr).getPartitionId();
        segmentPathsMap.put(partition, path);
    }
    for (ZNRecord znRecord : propStoreEntries) {
        LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
        segmentPropStoreMap.put(new LLCSegmentName(metadata.getSegmentName()).getPartitionId(), znRecord);
    }
    Assert.assertEquals(segmentPathsMap.size(), nPartitions);
    Assert.assertEquals(segmentPropStoreMap.size(), nPartitions);
    for (int partition = 0; partition < nPartitions; partition++) {
        final LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata(segmentPropStoreMap.get(partition));
        // Just for coverage
        metadata.toString();
        ZNRecord znRecord = metadata.toZNRecord();
        LLCRealtimeSegmentZKMetadata metadataCopy = new LLCRealtimeSegmentZKMetadata(znRecord);
        Assert.assertEquals(metadata, metadataCopy);
        final String path = segmentPathsMap.get(partition);
        final String segmentName = metadata.getSegmentName();
        Assert.assertEquals(metadata.getStartOffset(), -1L);
        Assert.assertEquals(path, "/SEGMENTS/" + rtTableName + "/" + segmentName);
        LLCSegmentName llcSegmentName = new LLCSegmentName(segmentName);
        Assert.assertEquals(llcSegmentName.getPartitionId(), partition);
        Assert.assertEquals(llcSegmentName.getTableName(), TableNameBuilder.extractRawTableName(rtTableName));
        Assert.assertEquals(metadata.getNumReplicas(), nReplicas);
    }
}
Also used : HashMap(java.util.HashMap) LLCSegmentName(com.linkedin.pinot.common.utils.LLCSegmentName) IdealState(org.apache.helix.model.IdealState) ArrayList(java.util.ArrayList) List(java.util.List) LLCRealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata) ZNRecord(org.apache.helix.ZNRecord)

Example 4 with LLCRealtimeSegmentZKMetadata

use of com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata in project pinot by linkedin.

the class SegmentCompletionTest method testCaseSetup.

public void testCaseSetup(boolean isLeader) throws Exception {
    segmentManager = new MockPinotLLCRealtimeSegmentManager();
    final int partitionId = 23;
    final int seqId = 12;
    final long now = System.currentTimeMillis();
    final String tableName = "someTable";
    final LLCSegmentName segmentName = new LLCSegmentName(tableName, partitionId, seqId, now);
    segmentNameStr = segmentName.getSegmentName();
    final LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata();
    metadata.setStatus(CommonConstants.Segment.Realtime.Status.IN_PROGRESS);
    metadata.setNumReplicas(3);
    segmentManager._segmentMetadata = metadata;
    segmentCompletionMgr = new MockSegmentCompletionManager(segmentManager, isLeader);
    segmentManager._segmentCompletionMgr = segmentCompletionMgr;
    Field fsmMapField = SegmentCompletionManager.class.getDeclaredField("_fsmMap");
    fsmMapField.setAccessible(true);
    fsmMap = (Map<String, Object>) fsmMapField.get(segmentCompletionMgr);
    Field ctMapField = SegmentCompletionManager.class.getDeclaredField("_commitTimeMap");
    ctMapField.setAccessible(true);
    commitTimeMap = (Map<String, Long>) ctMapField.get(segmentCompletionMgr);
}
Also used : Field(java.lang.reflect.Field) LLCRealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata) LLCSegmentName(com.linkedin.pinot.common.utils.LLCSegmentName)

Example 5 with LLCRealtimeSegmentZKMetadata

use of com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata in project pinot by linkedin.

the class LLRealtimeSegmentDataManager method goOnlineFromConsuming.

public void goOnlineFromConsuming(RealtimeSegmentZKMetadata metadata) throws InterruptedException {
    LLCRealtimeSegmentZKMetadata llcMetadata = (LLCRealtimeSegmentZKMetadata) metadata;
    final long endOffset = llcMetadata.getEndOffset();
    segmentLogger.info("State: {}, transitioning from CONSUMING to ONLINE (startOffset: {}, endOffset: {})", _state.toString(), _startOffset, endOffset);
    stop();
    segmentLogger.info("Consumer thread stopped in state {}", _state.toString());
    switch(_state) {
        case COMMITTED:
        case RETAINED:
            // Nothing to do. we already built local segment and swapped it with in-memory data.
            segmentLogger.info("State {}. Nothing to do", _state.toString());
            break;
        case DISCARDED:
        case ERROR:
            segmentLogger.info("State {}. Downloading to replace", _state.toString());
            downloadSegmentAndReplace(llcMetadata);
            break;
        case CATCHING_UP:
        case HOLDING:
        case INITIAL_CONSUMING:
            // Allow to catch up upto final offset, and then replace.
            if (_currentOffset > endOffset) {
                // We moved ahead of the offset that is committed in ZK.
                segmentLogger.warn("Current offset {} ahead of the offset in zk {}. Downloading to replace", _currentOffset, endOffset);
                downloadSegmentAndReplace(llcMetadata);
            } else if (_currentOffset == endOffset) {
                segmentLogger.info("Current offset {} matches offset in zk {}. Replacing segment", _currentOffset, endOffset);
                buildSegmentAndReplace();
            } else {
                segmentLogger.info("Attempting to catch up from offset {} to {} ", _currentOffset, endOffset);
                boolean success = catchupToFinalOffset(endOffset, TimeUnit.MILLISECONDS.convert(_maxTimeForConsumingToOnlineSec, TimeUnit.SECONDS));
                if (success) {
                    segmentLogger.info("Caught up to offset {}", _currentOffset);
                    buildSegmentAndReplace();
                } else {
                    segmentLogger.info("Could not catch up to offset (current = {}). Downloading to replace", _currentOffset);
                    downloadSegmentAndReplace(llcMetadata);
                }
            }
            break;
        default:
            segmentLogger.info("Downloading to replace segment while in state {}", _state.toString());
            downloadSegmentAndReplace(llcMetadata);
            break;
    }
}
Also used : LLCRealtimeSegmentZKMetadata(com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata)

Aggregations

LLCRealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata)20 ZNRecord (org.apache.helix.ZNRecord)11 LLCSegmentName (com.linkedin.pinot.common.utils.LLCSegmentName)9 ArrayList (java.util.ArrayList)8 IdealState (org.apache.helix.model.IdealState)5 Test (org.testng.annotations.Test)4 HashMap (java.util.HashMap)3 BeforeTest (org.testng.annotations.BeforeTest)3 AbstractTableConfig (com.linkedin.pinot.common.config.AbstractTableConfig)2 Schema (com.linkedin.pinot.common.data.Schema)2 RealtimeSegmentZKMetadata (com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata)2 CommonConstants (com.linkedin.pinot.common.utils.CommonConstants)2 HashSet (java.util.HashSet)2 List (java.util.List)2 IndexingConfig (com.linkedin.pinot.common.config.IndexingConfig)1 InstanceZKMetadata (com.linkedin.pinot.common.metadata.instance.InstanceZKMetadata)1 KafkaStreamMetadata (com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata)1 ServerMetrics (com.linkedin.pinot.common.metrics.ServerMetrics)1 PinotHelixPropertyStoreZnRecordProvider (com.linkedin.pinot.common.utils.helix.PinotHelixPropertyStoreZnRecordProvider)1 SegmentDataManager (com.linkedin.pinot.core.data.manager.offline.SegmentDataManager)1