use of org.apache.helix.HelixManager in project ambry by linkedin.
the class BlobStoreTest method storeErrorTriggerDisableReplicaTest.
/**
* Test that replica is correctly disabled when store is shut down due to disk I/O error.
* @throws Exception
*/
@Test
public void storeErrorTriggerDisableReplicaTest() throws Exception {
final String RESOURCE_NAME = "0";
final String CLUSTER_NAME = "BlobStoreTest";
// setup testing environment
store.shutdown();
List<TestUtils.ZkInfo> zkInfoList = new ArrayList<>();
zkInfoList.add(new TestUtils.ZkInfo(null, "DC1", (byte) 0, 2199, false));
JSONObject zkJson = constructZkLayoutJSON(zkInfoList);
properties.setProperty("clustermap.cluster.name", CLUSTER_NAME);
properties.setProperty("clustermap.datacenter.name", "DC1");
properties.setProperty("clustermap.host.name", "localhost");
properties.setProperty("clustermap.dcs.zk.connect.strings", zkJson.toString(2));
properties.setProperty("store.io.error.count.to.trigger.shutdown", "1");
properties.setProperty("store.replica.status.delegate.enable", "true");
properties.setProperty("store.set.local.partition.state.enabled", "true");
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(properties));
AtomicReference<InstanceConfig> instanceConfig = new AtomicReference<>(new InstanceConfig("localhost"));
instanceConfig.get().setPort("2222");
Map<String, List<String>> listMap = new HashMap<>();
listMap.put(storeId, null);
ZNRecord znRecord = new ZNRecord("localhost");
znRecord.setListFields(listMap);
IdealState idealState = new IdealState(znRecord);
idealState.setRebalanceMode(IdealState.RebalanceMode.SEMI_AUTO);
// mock helix related components
HelixAdmin mockHelixAdmin = mock(HelixAdmin.class);
when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).then(invocation -> instanceConfig.get());
when(mockHelixAdmin.getResourcesInCluster(eq(CLUSTER_NAME))).thenReturn(Collections.singletonList(RESOURCE_NAME));
when(mockHelixAdmin.getResourceIdealState(eq(CLUSTER_NAME), eq(RESOURCE_NAME))).thenReturn(idealState);
when(mockHelixAdmin.setInstanceConfig(any(), any(), any())).then(invocation -> {
instanceConfig.set(invocation.getArgument(2));
return true;
});
HelixManager mockHelixManager = mock(HelixManager.class);
when(mockHelixManager.getClusterManagmentTool()).thenReturn(mockHelixAdmin);
HelixFactory mockHelixFactory = new HelixFactory() {
@Override
public HelixManager getZKHelixManager(String clusterName, String instanceName, InstanceType instanceType, String zkAddr) {
return mockHelixManager;
}
};
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockParticipant = new MockHelixParticipant(clusterMapConfig, mockHelixFactory);
mockParticipant.overrideDisableReplicaMethod = false;
ReplicaStatusDelegate replicaStatusDelegate = new ReplicaStatusDelegate(mockParticipant);
BlobStore testStore = createBlobStore(getMockAmbryReplica(clusterMapConfig, tempDirStr), new StoreConfig(new VerifiableProperties(properties)), Collections.singletonList(replicaStatusDelegate));
testStore.start();
assertTrue("Store should start successfully", testStore.isStarted());
// create corrupted write set
MessageInfo corruptedInfo = new MessageInfo(getUniqueId(), PUT_RECORD_SIZE, Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), Utils.Infinite_Time);
MessageWriteSet corruptedWriteSet = new MockMessageWriteSet(Collections.singletonList(corruptedInfo), Collections.singletonList(ByteBuffer.allocate(PUT_RECORD_SIZE)), new StoreException(StoreException.IO_ERROR_STR, StoreErrorCodes.IOError));
// 1. mock failure case
when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).thenReturn(null);
// trigger store exception when calling store.put()
try {
testStore.put(corruptedWriteSet);
fail("should throw exception");
} catch (StoreException e) {
assertEquals("Mismatch in error code", StoreErrorCodes.IOError, e.getErrorCode());
}
assertNull("Disabled partition list should be null as disabling replica didn't succeed", instanceConfig.get().getDisabledPartitions(RESOURCE_NAME));
// 2. mock success case
when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).then(invocation -> instanceConfig.get());
testStore.start();
assertTrue("Store should start successfully", testStore.isStarted());
try {
testStore.put(corruptedWriteSet);
fail("should throw exception");
} catch (StoreException e) {
assertEquals("Mismatch in error code", StoreErrorCodes.IOError, e.getErrorCode());
}
assertEquals("Disabled partition name is not expected", storeId, instanceConfig.get().getDisabledPartitions(RESOURCE_NAME).get(0));
// verify "DISABLED" list in InstanceConfig has correct partition id.
assertEquals("Disabled replica list is not expected", Collections.singletonList(storeId), getDisabledReplicas(instanceConfig.get()));
// 3. mock disk is replaced case, restart should succeed
testStore.start();
assertNull("Disabled partition list should be null as restart will enable same replica", instanceConfig.get().getDisabledPartitions(RESOURCE_NAME));
assertTrue("Disabled replica list should be empty", getDisabledReplicas(instanceConfig.get()).isEmpty());
testStore.shutdown();
reloadStore();
}
use of org.apache.helix.HelixManager in project ambry by linkedin.
the class HelixClusterManagerTest method listenCrossColoTest.
/**
* Ensure that effects of the listenCrossColo config is as expected. When it is set to false, the Helix cluster manager
* initializes fine, but listens to subsequent InstanceConfig changes in the local colo only.
*/
@Test
public void listenCrossColoTest() {
assumeTrue(!useComposite);
HelixClusterManager helixClusterManager = (HelixClusterManager) clusterManager;
Counter instanceTriggerCounter = helixClusterManager.helixClusterManagerMetrics.instanceConfigChangeTriggerCount;
Map<String, DcInfo> dcInfosMap = helixClusterManager.getDcInfosMap();
Map<String, HelixManager> helixManagerMap = dcInfosMap.entrySet().stream().filter(e -> e.getValue() instanceof HelixDcInfo).collect(Collectors.toMap(Map.Entry::getKey, e -> ((HelixDcInfo) e.getValue()).helixManager));
for (Map.Entry<String, HelixManager> entry : helixManagerMap.entrySet()) {
if (entry.getKey().equals(localDc)) {
assertTrue("Helix cluster manager should always be connected to the local Helix manager", entry.getValue().isConnected());
} else {
assertEquals("Helix cluster manager should be connected to the remote Helix managers if and only if listenCrossColo is" + "set to true", listenCrossColo, entry.getValue().isConnected());
}
}
long instanceConfigChangeTriggerCount = instanceTriggerCounter.getCount();
helixCluster.triggerInstanceConfigChangeNotification();
assertEquals("Number of trigger count should be in accordance to listenCrossColo value", instanceConfigChangeTriggerCount + (listenCrossColo ? helixDcs.length : 1), instanceTriggerCounter.getCount());
InstanceConfig remoteInstanceConfig = helixCluster.getInstanceConfigsFromDcs(helixDcs).stream().filter(e -> ClusterMapUtils.getDcName(e).equals(remoteDc)).findAny().get();
DataNodeId remote = helixClusterManager.getDataNodeId(remoteInstanceConfig.getHostName(), Integer.parseInt(remoteInstanceConfig.getPort()));
Set<PartitionId> writablePartitions = new HashSet<>(helixClusterManager.getWritablePartitionIds(null));
PartitionId partitionIdToSealInRemote = helixClusterManager.getReplicaIds(remote).stream().filter(e -> writablePartitions.contains(e.getPartitionId())).findAny().get().getPartitionId();
remoteInstanceConfig.getRecord().setListField(SEALED_STR, Collections.singletonList(partitionIdToSealInRemote.toPathString()));
helixCluster.triggerInstanceConfigChangeNotification();
assertEquals("If replica in remote is sealed, partition should be sealed if and only if listenCrossColo is true " + "and override is disabled", !listenCrossColo || overrideEnabled, helixClusterManager.getWritablePartitionIds(null).contains(partitionIdToSealInRemote));
}
use of org.apache.helix.HelixManager in project ambry by linkedin.
the class AmbryStateModelFactoryTest method testAmbryPartitionStateModel.
/**
* Test that {@link HelixParticipantMetrics} keeps track of partition during state transition
*/
@Test
public void testAmbryPartitionStateModel() {
assumeTrue(stateModelDef.equals(ClusterMapConfig.AMBRY_STATE_MODEL_DEF));
MetricRegistry metricRegistry = new MetricRegistry();
MockHelixParticipant.metricRegistry = metricRegistry;
DataNodeConfig mockDataNodeConfig = Mockito.mock(DataNodeConfig.class);
Set<String> disabledPartitionSet = new HashSet<>();
Set<String> enabledPartitionSet = new HashSet<>();
when(mockDataNodeConfig.getDisabledReplicas()).thenReturn(disabledPartitionSet);
DataNodeConfigSource mockConfigSource = Mockito.mock(DataNodeConfigSource.class);
when(mockConfigSource.get(anyString())).thenReturn(mockDataNodeConfig);
HelixAdmin mockHelixAdmin = Mockito.mock(HelixAdmin.class);
InstanceConfig mockInstanceConfig = Mockito.mock(InstanceConfig.class);
doAnswer(invocation -> {
String partitionName = invocation.getArgument(1);
boolean enable = invocation.getArgument(2);
if (enable) {
enabledPartitionSet.add(partitionName);
}
return null;
}).when(mockInstanceConfig).setInstanceEnabledForPartition(any(), any(), anyBoolean());
when(mockHelixAdmin.getInstanceConfig(anyString(), anyString())).thenReturn(mockInstanceConfig);
when(mockHelixAdmin.setInstanceConfig(anyString(), anyString(), any())).thenReturn(true);
HelixManager mockHelixManager = Mockito.mock(HelixManager.class);
when(mockHelixManager.getClusterManagmentTool()).thenReturn(mockHelixAdmin);
MockHelixManagerFactory.overrideGetHelixManager = true;
MockHelixParticipant.mockHelixFactory = new MockHelixManagerFactory(mockConfigSource, mockHelixManager);
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(config);
HelixParticipantMetrics participantMetrics = mockHelixParticipant.getHelixParticipantMetrics();
String resourceName = "0";
String partitionName = "1";
Message mockMessage = Mockito.mock(Message.class);
when(mockMessage.getPartitionName()).thenReturn(partitionName);
when(mockMessage.getResourceName()).thenReturn(resourceName);
AmbryPartitionStateModel stateModel = new AmbryPartitionStateModel(resourceName, partitionName, mockHelixParticipant, config);
mockHelixParticipant.setInitialLocalPartitions(new HashSet<>(Collections.singletonList(partitionName)));
assertEquals("Offline count is not expected", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
// OFFLINE -> BOOTSTRAP
stateModel.onBecomeBootstrapFromOffline(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Bootstrap count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "bootstrapPartitionCount"));
// BOOTSTRAP -> STANDBY
stateModel.onBecomeStandbyFromBootstrap(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Standby count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
assertEquals("Bootstrap count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "bootstrapPartitionCount"));
// STANDBY -> LEADER
stateModel.onBecomeLeaderFromStandby(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Leader count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "leaderPartitionCount"));
assertEquals("Standby count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
// LEADER -> STANDBY
stateModel.onBecomeStandbyFromLeader(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Standby count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
assertEquals("Leader count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "leaderPartitionCount"));
// STANDBY -> INACTIVE
stateModel.onBecomeInactiveFromStandby(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Inactive count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "inactivePartitionCount"));
assertEquals("Standby count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
// INACTIVE -> OFFLINE
stateModel.onBecomeOfflineFromInactive(mockMessage, null);
assertEquals("Offline count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Inactive count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "inactivePartitionCount"));
// OFFLINE -> DROPPED
disabledPartitionSet.add(partitionName);
stateModel.onBecomeDroppedFromOffline(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Dropped count should be updated", 1, participantMetrics.partitionDroppedCount.getCount());
assertTrue("Partition should be removed from disabled partition set", disabledPartitionSet.isEmpty());
assertEquals("Mismatch in enabled partition", partitionName, enabledPartitionSet.iterator().next());
// ERROR -> DROPPED
stateModel.onBecomeDroppedFromError(mockMessage, null);
assertEquals("Dropped count should be updated", 2, participantMetrics.partitionDroppedCount.getCount());
// ERROR -> OFFLINE (this occurs when we use Helix API to reset certain partition in ERROR state)
stateModel.onBecomeOfflineFromError(mockMessage, null);
assertEquals("Offline count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
// reset method
stateModel.reset();
assertEquals("Offline count should be 1 after reset", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
// call reset method again to mock the case where same partition is reset multiple times during zk disconnection or shutdown
stateModel.reset();
assertEquals("Offline count should still be 1 after reset twice", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
MockHelixManagerFactory.overrideGetHelixManager = false;
}
use of org.apache.helix.HelixManager in project pinot by linkedin.
the class ControllerRequestBuilderUtil method addFakeBrokerInstancesToAutoJoinHelixCluster.
public static void addFakeBrokerInstancesToAutoJoinHelixCluster(String helixClusterName, String zkServer, int numInstances, boolean isSingleTenant) throws Exception {
for (int i = 0; i < numInstances; ++i) {
final String brokerId = "Broker_localhost_" + i;
final HelixManager helixZkManager = HelixManagerFactory.getZKHelixManager(helixClusterName, brokerId, InstanceType.PARTICIPANT, zkServer);
final StateMachineEngine stateMachineEngine = helixZkManager.getStateMachineEngine();
final StateModelFactory<?> stateModelFactory = new EmptyBrokerOnlineOfflineStateModelFactory();
stateMachineEngine.registerStateModelFactory(EmptyBrokerOnlineOfflineStateModelFactory.getStateModelDef(), stateModelFactory);
helixZkManager.connect();
if (isSingleTenant) {
helixZkManager.getClusterManagmentTool().addInstanceTag(helixClusterName, brokerId, ControllerTenantNameBuilder.getBrokerTenantNameForTenant(ControllerTenantNameBuilder.DEFAULT_TENANT_NAME));
} else {
helixZkManager.getClusterManagmentTool().addInstanceTag(helixClusterName, brokerId, UNTAGGED_BROKER_INSTANCE);
}
Thread.sleep(1000);
}
}
use of org.apache.helix.HelixManager in project pinot by linkedin.
the class ControllerRequestBuilderUtil method addFakeDataInstancesToAutoJoinHelixCluster.
public static void addFakeDataInstancesToAutoJoinHelixCluster(String helixClusterName, String zkServer, int numInstances, boolean isSingleTenant, int adminPort) throws Exception {
for (int i = 0; i < numInstances; ++i) {
final String instanceId = "Server_localhost_" + i;
final HelixManager helixZkManager = HelixManagerFactory.getZKHelixManager(helixClusterName, instanceId, InstanceType.PARTICIPANT, zkServer);
final StateMachineEngine stateMachineEngine = helixZkManager.getStateMachineEngine();
final StateModelFactory<?> stateModelFactory = new EmptySegmentOnlineOfflineStateModelFactory();
stateMachineEngine.registerStateModelFactory(EmptySegmentOnlineOfflineStateModelFactory.getStateModelDef(), stateModelFactory);
helixZkManager.connect();
if (isSingleTenant) {
helixZkManager.getClusterManagmentTool().addInstanceTag(helixClusterName, instanceId, TableNameBuilder.OFFLINE_TABLE_NAME_BUILDER.forTable(ControllerTenantNameBuilder.DEFAULT_TENANT_NAME));
helixZkManager.getClusterManagmentTool().addInstanceTag(helixClusterName, instanceId, TableNameBuilder.REALTIME_TABLE_NAME_BUILDER.forTable(ControllerTenantNameBuilder.DEFAULT_TENANT_NAME));
} else {
helixZkManager.getClusterManagmentTool().addInstanceTag(helixClusterName, instanceId, UNTAGGED_SERVER_INSTANCE);
}
HelixConfigScope scope = new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.PARTICIPANT, helixClusterName).forParticipant(instanceId).build();
Map<String, String> props = new HashMap<>();
props.put(CommonConstants.Helix.Instance.ADMIN_PORT_KEY, String.valueOf(adminPort + i));
helixZkManager.getClusterManagmentTool().setConfig(scope, props);
}
}
Aggregations