use of org.apache.helix.model.IdealState in project pinot by linkedin.
the class PinotHelixResourceManagerTest method testRebuildBrokerResourceFromHelixTags.
@Test
public void testRebuildBrokerResourceFromHelixTags() throws Exception {
AbstractTableConfig tableConfig = AbstractTableConfig.init(ControllerRequestBuilderUtil.buildCreateOfflineTableJSON("faketable", "serverTenant", "brokerTenant", 3).toString());
Tenant tenant = new Tenant();
tenant.setTenantName("brokerTenant");
tenant.setTenantRole("BROKER");
tenant.setNumberOfInstances(3);
pinotHelixResourceManager.createBrokerTenant(tenant);
pinotHelixResourceManager.addTable(tableConfig);
// Check that the broker ideal state has 3 brokers assigned to it for faketable_OFFLINE
IdealState idealState = pinotHelixResourceManager.getHelixAdmin().getResourceIdealState(HELIX_CLUSTER_NAME, CommonConstants.Helix.BROKER_RESOURCE_INSTANCE);
Assert.assertEquals(idealState.getInstanceStateMap("faketable_OFFLINE").size(), 3);
// Retag all instances current assigned to brokerTenant to be unassigned
Set<String> brokerInstances = pinotHelixResourceManager.getAllInstancesForBrokerTenant("brokerTenant");
for (String brokerInstance : brokerInstances) {
pinotHelixResourceManager.getHelixAdmin().removeInstanceTag(HELIX_CLUSTER_NAME, brokerInstance, "brokerTenant_BROKER");
pinotHelixResourceManager.getHelixAdmin().addInstanceTag(HELIX_CLUSTER_NAME, brokerInstance, CommonConstants.Helix.UNTAGGED_BROKER_INSTANCE);
}
// Rebuilding the broker tenant should update the ideal state size
pinotHelixResourceManager.rebuildBrokerResourceFromHelixTags("faketable_OFFLINE");
idealState = pinotHelixResourceManager.getHelixAdmin().getResourceIdealState(HELIX_CLUSTER_NAME, CommonConstants.Helix.BROKER_RESOURCE_INSTANCE);
Assert.assertEquals(idealState.getInstanceStateMap("faketable_OFFLINE").size(), 0);
// Tag five instances
int instancesRemainingToTag = 5;
List<String> instances = pinotHelixResourceManager.getAllInstanceNames();
for (String instance : instances) {
if (instance.startsWith(CommonConstants.Helix.PREFIX_OF_BROKER_INSTANCE)) {
pinotHelixResourceManager.getHelixAdmin().removeInstanceTag(HELIX_CLUSTER_NAME, instance, CommonConstants.Helix.UNTAGGED_BROKER_INSTANCE);
pinotHelixResourceManager.getHelixAdmin().addInstanceTag(HELIX_CLUSTER_NAME, instance, "brokerTenant_BROKER");
instancesRemainingToTag--;
if (instancesRemainingToTag == 0) {
break;
}
}
}
// Rebuilding the broker tenant should update the ideal state size
pinotHelixResourceManager.rebuildBrokerResourceFromHelixTags("faketable_OFFLINE");
idealState = pinotHelixResourceManager.getHelixAdmin().getResourceIdealState(HELIX_CLUSTER_NAME, CommonConstants.Helix.BROKER_RESOURCE_INSTANCE);
Assert.assertEquals(idealState.getInstanceStateMap("faketable_OFFLINE").size(), 5);
// Untag all instances for other tests
for (String instance : instances) {
if (instance.startsWith(CommonConstants.Helix.PREFIX_OF_BROKER_INSTANCE)) {
pinotHelixResourceManager.getHelixAdmin().removeInstanceTag(HELIX_CLUSTER_NAME, instance, "brokerTenant_BROKER");
pinotHelixResourceManager.getHelixAdmin().addInstanceTag(HELIX_CLUSTER_NAME, instance, CommonConstants.Helix.UNTAGGED_BROKER_INSTANCE);
}
}
// Delete table
pinotHelixResourceManager.deleteOfflineTable("faketable");
}
use of org.apache.helix.model.IdealState in project pinot by linkedin.
the class PinotLLCRealtimeSegmentManagerTest method testAutoReplaceConsumingSegment.
public void testAutoReplaceConsumingSegment(final String tableConfigStartOffset) throws Exception {
FakePinotLLCRealtimeSegmentManager segmentManager = new FakePinotLLCRealtimeSegmentManager(true, null);
final int nPartitions = 8;
final int nInstances = 3;
final int nReplicas = 2;
final String topic = "someTopic";
final String rtTableName = "table_REALTIME";
List<String> instances = getInstanceList(nInstances);
final String startOffset = KAFKA_OFFSET;
IdealState idealState = PinotTableIdealStateBuilder.buildEmptyKafkaConsumerRealtimeIdealStateFor(rtTableName, nReplicas);
segmentManager.setupHelixEntries(topic, rtTableName, nPartitions, instances, nReplicas, startOffset, DUMMY_HOST, idealState, false, 10000);
// Add another segment for each partition
long now = System.currentTimeMillis();
List<String> existingSegments = new ArrayList<>(segmentManager._idealStateEntries.keySet());
final int partitionToBeFixed = 3;
final int partitionWithHigherOffset = 4;
final int emptyPartition = 5;
final long smallestPartitionOffset = 0x259080984568L;
final long largestPartitionOffset = smallestPartitionOffset + 100000;
final long higherOffset = smallestPartitionOffset + 100;
for (String segmentNameStr : existingSegments) {
LLCSegmentName segmentName = new LLCSegmentName(segmentNameStr);
switch(segmentName.getPartitionId()) {
case partitionToBeFixed:
// Do nothing, we will test adding a new segment for this partition when there is only one segment in there.
break;
case emptyPartition:
// Remove existing segment, so we can test adding a new segment for this partition when none exists
segmentManager._idealStateEntries.remove(segmentNameStr);
break;
case partitionWithHigherOffset:
// Set segment metadata for this segment such that its offset is higher than startOffset we get from kafka.
// In that case, we should choose the new segment offset as this one rather than the one kafka hands us.
LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata();
metadata.setSegmentName(segmentName.getSegmentName());
metadata.setEndOffset(higherOffset);
metadata.setStatus(CommonConstants.Segment.Realtime.Status.DONE);
segmentManager._metadataMap.put(segmentName.getSegmentName(), metadata);
break;
default:
// Add a second segment for this partition. It will not be repaired.
LLCSegmentName newSegmentName = new LLCSegmentName(segmentName.getTableName(), segmentName.getPartitionId(), segmentName.getSequenceNumber() + 1, now);
List<String> hosts = segmentManager._idealStateEntries.get(segmentNameStr);
segmentManager._idealStateEntries.put(newSegmentName.getSegmentName(), hosts);
break;
}
}
Map<String, String> streamPropMap = new HashMap<>(1);
streamPropMap.put(StringUtil.join(".", CommonConstants.Helix.DataSource.STREAM_PREFIX, CommonConstants.Helix.DataSource.Realtime.Kafka.CONSUMER_TYPE), "simple");
streamPropMap.put(StringUtil.join(".", CommonConstants.Helix.DataSource.STREAM_PREFIX, CommonConstants.Helix.DataSource.Realtime.Kafka.KAFKA_CONSUMER_PROPS_PREFIX, CommonConstants.Helix.DataSource.Realtime.Kafka.AUTO_OFFSET_RESET), tableConfigStartOffset);
KafkaStreamMetadata kafkaStreamMetadata = new KafkaStreamMetadata(streamPropMap);
AbstractTableConfig tableConfig = mock(AbstractTableConfig.class);
IndexingConfig indexingConfig = mock(IndexingConfig.class);
when(indexingConfig.getStreamConfigs()).thenReturn(streamPropMap);
when(tableConfig.getIndexingConfig()).thenReturn(indexingConfig);
Set<Integer> nonConsumingPartitions = new HashSet<>(1);
nonConsumingPartitions.add(partitionToBeFixed);
nonConsumingPartitions.add(partitionWithHigherOffset);
nonConsumingPartitions.add(emptyPartition);
segmentManager._kafkaSmallestOffsetToReturn = smallestPartitionOffset;
segmentManager._kafkaLargestOffsetToReturn = largestPartitionOffset;
existingSegments = new ArrayList<>(segmentManager._idealStateEntries.keySet());
segmentManager._paths.clear();
segmentManager._records.clear();
segmentManager.createConsumingSegment(rtTableName, nonConsumingPartitions, existingSegments, tableConfig);
Assert.assertEquals(segmentManager._paths.size(), 3);
Assert.assertEquals(segmentManager._records.size(), 3);
Assert.assertEquals(segmentManager._oldSegmentNameStr.size(), 3);
Assert.assertEquals(segmentManager._newSegmentNameStr.size(), 3);
int found = 0;
int index = 0;
while (index < segmentManager._paths.size()) {
String znodePath = segmentManager._paths.get(index);
int slash = znodePath.lastIndexOf('/');
String segmentNameStr = znodePath.substring(slash + 1);
LLCSegmentName segmentName = new LLCSegmentName(segmentNameStr);
ZNRecord znRecord;
LLCRealtimeSegmentZKMetadata metadata;
switch(segmentName.getPartitionId()) {
case partitionToBeFixed:
// We had left this partition with one segment. So, a second one should be created with a sequence number one
// higher than starting. Its start offset should be what kafka returns.
found++;
Assert.assertEquals(segmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 1);
znRecord = segmentManager._records.get(index);
metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
Assert.assertEquals(metadata.getNumReplicas(), 2);
Assert.assertEquals(metadata.getStartOffset(), smallestPartitionOffset);
break;
case emptyPartition:
// We had removed any segments in this partition. A new one should be created with the offset as returned
// by kafka and with the starting sequence number.
found++;
Assert.assertEquals(segmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER);
znRecord = segmentManager._records.get(index);
metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
Assert.assertEquals(metadata.getNumReplicas(), 2);
if (tableConfigStartOffset.equals("smallest")) {
Assert.assertEquals(metadata.getStartOffset(), smallestPartitionOffset);
} else {
Assert.assertEquals(metadata.getStartOffset(), largestPartitionOffset);
}
break;
case partitionWithHigherOffset:
// We had left this partition with one segment. In addition, we had the end-offset of the first segment set to
// a value higher than that returned by kafka. So, a second one should be created with a sequence number one
// equal to the end offset of the first one.
found++;
Assert.assertEquals(segmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 1);
znRecord = segmentManager._records.get(index);
metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
Assert.assertEquals(metadata.getNumReplicas(), 2);
Assert.assertEquals(metadata.getStartOffset(), higherOffset);
break;
}
index++;
}
// We should see all three cases here.
Assert.assertEquals(3, found);
// Now, if we make 'partitionToBeFixed' a non-consuming partition, a second one should get added with the same start offset as
// as the first one, since the kafka offset to return has not changed.
Set<Integer> ncPartitions = new HashSet<>(1);
ncPartitions.add(partitionToBeFixed);
segmentManager.createConsumingSegment(rtTableName, ncPartitions, segmentManager.getExistingSegments(rtTableName), tableConfig);
Assert.assertEquals(segmentManager._paths.size(), 4);
Assert.assertEquals(segmentManager._records.size(), 4);
Assert.assertEquals(segmentManager._oldSegmentNameStr.size(), 4);
Assert.assertEquals(segmentManager._newSegmentNameStr.size(), 4);
// The latest zn record should be that of the new one we added.
ZNRecord znRecord = segmentManager._records.get(3);
LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
Assert.assertEquals(metadata.getNumReplicas(), 2);
Assert.assertEquals(metadata.getStartOffset(), smallestPartitionOffset);
LLCSegmentName llcSegmentName = new LLCSegmentName(metadata.getSegmentName());
Assert.assertEquals(llcSegmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 2);
Assert.assertEquals(llcSegmentName.getPartitionId(), partitionToBeFixed);
// Now pretend the prev segment ended successfully, and set the end offset
metadata.setEndOffset(metadata.getStartOffset() + 10);
metadata.setStatus(CommonConstants.Segment.Realtime.Status.DONE);
segmentManager._records.remove(3);
segmentManager._records.add(metadata.toZNRecord());
segmentManager._metadataMap.put(metadata.getSegmentName(), metadata);
segmentManager._kafkaLargestOffsetToReturn *= 2;
segmentManager._kafkaSmallestOffsetToReturn *= 2;
ncPartitions.clear();
ncPartitions.add(partitionToBeFixed);
segmentManager.createConsumingSegment(rtTableName, ncPartitions, segmentManager.getExistingSegments(rtTableName), tableConfig);
Assert.assertEquals(segmentManager._paths.size(), 5);
Assert.assertEquals(segmentManager._records.size(), 5);
Assert.assertEquals(segmentManager._oldSegmentNameStr.size(), 5);
Assert.assertEquals(segmentManager._newSegmentNameStr.size(), 5);
znRecord = segmentManager._records.get(4);
metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
Assert.assertEquals(metadata.getNumReplicas(), 2);
// In this case, since we have data loss, we will always put the smallest kafka partition available.
Assert.assertEquals(metadata.getStartOffset(), segmentManager.getKafkaPartitionOffset(null, "smallest", partitionToBeFixed));
llcSegmentName = new LLCSegmentName(metadata.getSegmentName());
Assert.assertEquals(llcSegmentName.getSequenceNumber(), PinotLLCRealtimeSegmentManager.STARTING_SEQUENCE_NUMBER + 3);
Assert.assertEquals(llcSegmentName.getPartitionId(), partitionToBeFixed);
}
use of org.apache.helix.model.IdealState in project pinot by linkedin.
the class PinotLLCRealtimeSegmentManagerTest method testInitialSegmentAssignments.
private void testInitialSegmentAssignments(final int nPartitions, final int nInstances, final int nReplicas, boolean existingIS) {
FakePinotLLCRealtimeSegmentManager segmentManager = new FakePinotLLCRealtimeSegmentManager(true, null);
final String topic = "someTopic";
final String rtTableName = "table_REALTIME";
List<String> instances = getInstanceList(nInstances);
final String startOffset = KAFKA_OFFSET;
IdealState idealState = PinotTableIdealStateBuilder.buildEmptyKafkaConsumerRealtimeIdealStateFor(rtTableName, nReplicas);
segmentManager.setupHelixEntries(topic, rtTableName, nPartitions, instances, nReplicas, startOffset, DUMMY_HOST, idealState, !existingIS, 1000000);
final String actualRtTableName = segmentManager._realtimeTableName;
final Map<String, List<String>> idealStateEntries = segmentManager._idealStateEntries;
final int idealStateNReplicas = segmentManager._nReplicas;
final List<String> propStorePaths = segmentManager._paths;
final List<ZNRecord> propStoreEntries = segmentManager._records;
final boolean createNew = segmentManager._createNew;
Assert.assertEquals(propStorePaths.size(), nPartitions);
Assert.assertEquals(propStoreEntries.size(), nPartitions);
Assert.assertEquals(idealStateEntries.size(), nPartitions);
Assert.assertEquals(actualRtTableName, rtTableName);
Assert.assertEquals(createNew, !existingIS);
Assert.assertEquals(idealStateNReplicas, nReplicas);
Map<Integer, ZNRecord> segmentPropStoreMap = new HashMap<>(propStorePaths.size());
Map<Integer, String> segmentPathsMap = new HashMap<>(propStorePaths.size());
for (String path : propStorePaths) {
String segNameStr = path.split("/")[3];
int partition = new LLCSegmentName(segNameStr).getPartitionId();
segmentPathsMap.put(partition, path);
}
for (ZNRecord znRecord : propStoreEntries) {
LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata(znRecord);
segmentPropStoreMap.put(new LLCSegmentName(metadata.getSegmentName()).getPartitionId(), znRecord);
}
Assert.assertEquals(segmentPathsMap.size(), nPartitions);
Assert.assertEquals(segmentPropStoreMap.size(), nPartitions);
for (int partition = 0; partition < nPartitions; partition++) {
final LLCRealtimeSegmentZKMetadata metadata = new LLCRealtimeSegmentZKMetadata(segmentPropStoreMap.get(partition));
// Just for coverage
metadata.toString();
ZNRecord znRecord = metadata.toZNRecord();
LLCRealtimeSegmentZKMetadata metadataCopy = new LLCRealtimeSegmentZKMetadata(znRecord);
Assert.assertEquals(metadata, metadataCopy);
final String path = segmentPathsMap.get(partition);
final String segmentName = metadata.getSegmentName();
Assert.assertEquals(metadata.getStartOffset(), -1L);
Assert.assertEquals(path, "/SEGMENTS/" + rtTableName + "/" + segmentName);
LLCSegmentName llcSegmentName = new LLCSegmentName(segmentName);
Assert.assertEquals(llcSegmentName.getPartitionId(), partition);
Assert.assertEquals(llcSegmentName.getTableName(), TableNameBuilder.extractRawTableName(rtTableName));
Assert.assertEquals(metadata.getNumReplicas(), nReplicas);
}
}
use of org.apache.helix.model.IdealState in project pinot by linkedin.
the class PinotLLCRealtimeSegmentManagerTest method testPreExistingSegments.
@Test
public void testPreExistingSegments() throws Exception {
LLCSegmentName existingSegmentName = new LLCSegmentName("someTable", 1, 31, 12355L);
String[] existingSegs = { existingSegmentName.getSegmentName() };
FakePinotLLCRealtimeSegmentManager segmentManager = new FakePinotLLCRealtimeSegmentManager(true, Arrays.asList(existingSegs));
final String topic = "someTopic";
final String rtTableName = "table_REALTIME";
List<String> instances = getInstanceList(3);
final String startOffset = KAFKA_OFFSET;
IdealState idealState = PinotTableIdealStateBuilder.buildEmptyKafkaConsumerRealtimeIdealStateFor(rtTableName, 10);
try {
segmentManager.setupHelixEntries(topic, rtTableName, 8, instances, 3, startOffset, DUMMY_HOST, idealState, false, 10000);
Assert.fail("Did not get expected exception when setting up helix with existing segments in propertystore");
} catch (RuntimeException e) {
// Expected
}
try {
segmentManager.setupHelixEntries(topic, rtTableName, 8, instances, 3, startOffset, DUMMY_HOST, idealState, true, 10000);
Assert.fail("Did not get expected exception when setting up helix with existing segments in propertystore");
} catch (RuntimeException e) {
// Expected
}
}
use of org.apache.helix.model.IdealState in project pinot by linkedin.
the class RetentionManagerTest method setupRealtimeTable.
// The most recent will be in
private Set<String> setupRealtimeTable(final int nSegments, final long now) throws Exception {
final int replicaCount = 1;
createRealtimeTableConfig(_realtimeTableName, replicaCount);
Set<String> remainingSegments = new HashSet<>();
IdealState idealState = PinotTableIdealStateBuilder.buildEmptyKafkaConsumerRealtimeIdealStateFor(_realtimeTableName, replicaCount);
final int kafkaPartition = 5;
final long millisInDays = TimeUnit.DAYS.toMillis(1);
final String serverName = "Server_localhost_0";
// If we set the segment creation time to a certain value and compare it as being X ms old,
// then we could get unpredictable results depending on whether it takes more or less than
// one millisecond to get to RetentionManager time comparison code. To be safe, set the
// milliseconds off by 1/2 day.
long segmentCreationTime = now - (nSegments + 1) * millisInDays + millisInDays / 2;
List<LLCRealtimeSegmentZKMetadata> segmentZKMetadatas = new ArrayList<>();
for (int seq = 1; seq <= nSegments; seq++) {
segmentCreationTime += millisInDays;
LLCRealtimeSegmentZKMetadata segmentMetadata = createSegmentMetadata(replicaCount, segmentCreationTime);
LLCSegmentName llcSegmentName = new LLCSegmentName(_testTableName, kafkaPartition, seq, segmentCreationTime);
final String segName = llcSegmentName.getSegmentName();
segmentMetadata.setSegmentName(segName);
if (seq == nSegments) {
// create consuming segment
segmentMetadata.setStatus(CommonConstants.Segment.Realtime.Status.IN_PROGRESS);
idealState.setPartitionState(segName, serverName, "CONSUMING");
remainingSegments.add(segName);
} else if (seq % 2 == 0) {
// create ONLINE segment
segmentMetadata.setStatus(CommonConstants.Segment.Realtime.Status.DONE);
idealState.setPartitionState(segName, serverName, "ONLINE");
remainingSegments.add(segName);
} else {
segmentMetadata.setStatus(CommonConstants.Segment.Realtime.Status.IN_PROGRESS);
idealState.setPartitionState(segName, serverName, "OFFLINE");
if (now - segmentCreationTime < TimeUnit.DAYS.toMillis(RetentionManager.getRetentionTimeForOldLLCSegmentsDays())) {
remainingSegments.add(segName);
}
}
final String znodePath = ZKMetadataProvider.constructPropertyStorePathForSegment(_realtimeTableName, segName);
_propertyStore.set(znodePath, segmentMetadata.toZNRecord(), AccessOption.PERSISTENT);
}
_helixAdmin.addResource(HELIX_CLUSTER_NAME, _realtimeTableName, idealState);
return remainingSegments;
}
Aggregations