use of org.apache.helix.mock.participant.ErrTransition in project helix by apache.
the class TestResetInstance method testResetInstance.
@Test
public void testResetInstance() throws Exception {
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
String clusterName = className + "_" + methodName;
final int n = 5;
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
// participant port
TestHelper.setupCluster(// participant port
clusterName, // participant port
ZK_ADDR, // participant port
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
10, // number of nodes
n, // replicas
3, "MasterSlave", // do rebalance
true);
// start controller
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
Map<String, Set<String>> errPartitions = new HashMap<String, Set<String>>() {
{
put("SLAVE-MASTER", TestHelper.setOf("TestDB0_4"));
put("OFFLINE-SLAVE", TestHelper.setOf("TestDB0_8"));
}
};
// start mock participants
MockParticipantManager[] participants = new MockParticipantManager[n];
for (int i = 0; i < n; i++) {
String instanceName = "localhost_" + (12918 + i);
if (i == 0) {
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
participants[i].setTransition(new ErrTransition(errPartitions));
} else {
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
}
participants[i].syncStart();
}
// verify cluster
Map<String, Map<String, String>> errStateMap = new HashMap<String, Map<String, String>>();
errStateMap.put("TestDB0", new HashMap<String, String>());
errStateMap.get("TestDB0").put("TestDB0_4", "localhost_12918");
errStateMap.get("TestDB0").put("TestDB0_8", "localhost_12918");
boolean result = ClusterStateVerifier.verifyByZkCallback((new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName, errStateMap)));
Assert.assertTrue(result, "Cluster verification fails");
// reset node "localhost_12918"
participants[0].setTransition(null);
String hostName = "localhost_12918";
String instanceUrl = "http://localhost:" + ADMIN_PORT + "/clusters/" + clusterName + "/instances/" + hostName;
Map<String, String> paramMap = new HashMap<String, String>();
paramMap.put(JsonParameters.MANAGEMENT_COMMAND, ClusterSetup.resetInstance);
TestHelixAdminScenariosRest.assertSuccessPostOperation(instanceUrl, paramMap, false);
result = ClusterStateVerifier.verifyByZkCallback((new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName)));
Assert.assertTrue(result, "Cluster verification fails");
// clean up
controller.syncStop();
for (int i = 0; i < 5; i++) {
participants[i].syncStop();
}
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.mock.participant.ErrTransition in project helix by apache.
the class TestResetPartitionState method testResetPartitionState.
@Test()
public void testResetPartitionState() throws Exception {
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
String clusterName = className + "_" + methodName;
final int n = 5;
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
// participant port
TestHelper.setupCluster(// participant port
clusterName, // participant port
ZK_ADDR, // participant port
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
10, // number of nodes
n, // replicas
3, "MasterSlave", // do rebalance
true);
// start controller
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
Map<String, Set<String>> errPartitions = new HashMap<String, Set<String>>();
errPartitions.put("SLAVE-MASTER", TestHelper.setOf("TestDB0_4"));
errPartitions.put("OFFLINE-SLAVE", TestHelper.setOf("TestDB0_8"));
// start mock participants
MockParticipantManager[] participants = new MockParticipantManager[n];
for (int i = 0; i < n; i++) {
String instanceName = "localhost_" + (12918 + i);
if (i == 0) {
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
participants[i].setTransition(new ErrTransition(errPartitions));
} else {
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
}
participants[i].syncStart();
}
// verify cluster
Map<String, Map<String, String>> errStateMap = new HashMap<String, Map<String, String>>();
errStateMap.put("TestDB0", new HashMap<String, String>());
errStateMap.get("TestDB0").put("TestDB0_4", "localhost_12918");
errStateMap.get("TestDB0").put("TestDB0_8", "localhost_12918");
boolean result = ClusterStateVerifier.verifyByZkCallback((new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName, errStateMap)));
Assert.assertTrue(result, "Cluster verification fails");
// reset a non-exist partition, should throw exception
String hostName = "localhost_12918";
String instanceUrl = getInstanceUrl(clusterName, hostName);
Map<String, String> paramMap = new HashMap<String, String>();
paramMap.put(JsonParameters.MANAGEMENT_COMMAND, ClusterSetup.resetPartition);
paramMap.put(JsonParameters.PARTITION, "TestDB0_nonExist");
paramMap.put(JsonParameters.RESOURCE, "TestDB0");
LOG.info("IGNORABLE exception: test reset non-exist partition");
TestHelixAdminScenariosRest.assertSuccessPostOperation(instanceUrl, paramMap, true);
// reset 2 error partitions
errPartitions.clear();
participants[0].setTransition(new ErrTransitionWithResetCnt(errPartitions));
clearStatusUpdate(clusterName, "localhost_12918", "TestDB0", "TestDB0_4");
_errToOfflineInvoked.set(0);
paramMap.put(JsonParameters.PARTITION, "TestDB0_4 TestDB0_8");
TestHelixAdminScenariosRest.assertSuccessPostOperation(instanceUrl, paramMap, false);
for (int i = 0; i < 10; i++) {
// wait reset to be done
Thread.sleep(400);
LOG.info("IGNORABLE exception: test reset non-error partition");
TestHelixAdminScenariosRest.assertSuccessPostOperation(instanceUrl, paramMap, true);
result = ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
if (result == true) {
break;
}
}
Assert.assertTrue(result);
Assert.assertEquals(_errToOfflineInvoked.get(), 2, "reset() should be invoked 2 times");
// clean up
controller.syncStop();
for (int i = 0; i < 5; i++) {
participants[i].syncStop();
}
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.mock.participant.ErrTransition in project helix by apache.
the class TestResetPartitionState method testResetPartitionState.
@Test()
public void testResetPartitionState() throws Exception {
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
String clusterName = className + "_" + methodName;
final int n = 5;
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
// participant port
TestHelper.setupCluster(// participant port
clusterName, // participant port
ZK_ADDR, // participant port
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
10, // number of nodes
n, // replicas
3, "MasterSlave", // do rebalance
true);
// start controller
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
Map<String, Set<String>> errPartitions = new HashMap<String, Set<String>>() {
{
put("SLAVE-MASTER", TestHelper.setOf("TestDB0_4"));
put("OFFLINE-SLAVE", TestHelper.setOf("TestDB0_8"));
}
};
// start mock participants
MockParticipantManager[] participants = new MockParticipantManager[n];
for (int i = 0; i < n; i++) {
String instanceName = "localhost_" + (12918 + i);
if (i == 0) {
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
participants[i].setTransition(new ErrTransition(errPartitions));
} else {
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
}
participants[i].syncStart();
}
// verify cluster
Map<String, Map<String, String>> errStateMap = new HashMap<String, Map<String, String>>();
errStateMap.put("TestDB0", new HashMap<String, String>());
errStateMap.get("TestDB0").put("TestDB0_4", "localhost_12918");
errStateMap.get("TestDB0").put("TestDB0_8", "localhost_12918");
boolean result = ClusterStateVerifier.verifyByZkCallback((new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName, errStateMap)));
Assert.assertTrue(result, "Cluster verification fails");
// reset a non-exist partition, should throw exception
try {
String command = "--zkSvr " + ZK_ADDR + " --resetPartition " + clusterName + " localhost_12918 TestDB0 TestDB0_nonExist";
ClusterSetup.processCommandLineArgs(command.split("\\s+"));
Assert.fail("Should throw exception on reset a non-exist partition");
} catch (Exception e) {
// OK
}
// reset one error partition
errPartitions.remove("SLAVE-MASTER");
participants[0].setTransition(new ErrTransitionWithResetCnt(errPartitions));
clearStatusUpdate(clusterName, "localhost_12918", "TestDB0", "TestDB0_4");
_errToOfflineInvoked = 0;
String command = "--zkSvr " + ZK_ADDR + " --resetPartition " + clusterName + " localhost_12918 TestDB0 TestDB0_4";
ClusterSetup.processCommandLineArgs(command.split("\\s+"));
// wait reset to be done
Thread.sleep(200);
try {
ClusterSetup.processCommandLineArgs(command.split("\\s+"));
Assert.fail("Should throw exception on reset a partition not in ERROR state");
} catch (Exception e) {
// OK
}
errStateMap.get("TestDB0").remove("TestDB0_4");
result = ClusterStateVerifier.verifyByZkCallback((new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName, errStateMap)));
Assert.assertTrue(result, "Cluster verification fails");
Assert.assertEquals(_errToOfflineInvoked, 1);
// reset the other error partition
participants[0].setTransition(new ErrTransitionWithResetCnt(null));
clearStatusUpdate(clusterName, "localhost_12918", "TestDB0", "TestDB0_8");
command = "--zkSvr " + ZK_ADDR + " --resetPartition " + clusterName + " localhost_12918 TestDB0 TestDB0_8";
ClusterSetup.processCommandLineArgs(command.split("\\s+"));
result = ClusterStateVerifier.verifyByPolling(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result, "Cluster verification fails");
Assert.assertEquals(_errToOfflineInvoked, 2, "Should reset 2 partitions");
// clean up
controller.syncStop();
for (int i = 0; i < 5; i++) {
participants[i].syncStop();
}
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.mock.participant.ErrTransition in project helix by apache.
the class TestStateTransitionThrottle method testTransitionThrottleOnErrorPartition.
@Test
public void testTransitionThrottleOnErrorPartition() throws Exception {
String clusterName = getShortClassName() + "testMaxErrorPartition";
MockParticipantManager[] participants = new MockParticipantManager[participantCount];
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
final ZKHelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
setupCluster(clusterName, accessor);
// Set throttle config to enable throttling
PropertyKey.Builder keyBuilder = accessor.keyBuilder();
ClusterConfig clusterConfig = accessor.getProperty(accessor.keyBuilder().clusterConfig());
clusterConfig.setResourcePriorityField("Name");
List<StateTransitionThrottleConfig> throttleConfigs = new ArrayList<>();
throttleConfigs.add(new StateTransitionThrottleConfig(StateTransitionThrottleConfig.RebalanceType.LOAD_BALANCE, StateTransitionThrottleConfig.ThrottleScope.CLUSTER, 100));
throttleConfigs.add(new StateTransitionThrottleConfig(StateTransitionThrottleConfig.RebalanceType.RECOVERY_BALANCE, StateTransitionThrottleConfig.ThrottleScope.CLUSTER, 100));
clusterConfig.setStateTransitionThrottleConfigs(throttleConfigs);
accessor.setProperty(keyBuilder.clusterConfig(), clusterConfig);
// set one partition to be always Error, so load balance won't be triggered
Map<String, Set<String>> errPartitions = new HashMap<>();
errPartitions.put("OFFLINE-SLAVE", TestHelper.setOf(resourceName + "_0"));
// start part of participants
for (int i = 0; i < participantCount - 1; i++) {
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, "localhost_" + (12918 + i));
if (i == 0) {
participants[i].setTransition(new ErrTransition(errPartitions));
}
participants[i].syncStart();
}
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
BestPossibleExternalViewVerifier verifier = new BestPossibleExternalViewVerifier.Builder(clusterName).setZkClient(_gZkClient).build();
Assert.assertTrue(verifier.verify(3000));
// Adding one more participant.
participants[participantCount - 1] = new MockParticipantManager(ZK_ADDR, clusterName, "localhost_" + (12918 + participantCount - 1));
participants[participantCount - 1].syncStart();
// Since error partition exists, no load balance transition will be done
Assert.assertFalse(pollForPartitionAssignment(accessor, participants[participantCount - 1], resourceName, 5000));
// Update cluster config to tolerate error partition, so load balance transition will be done
clusterConfig = accessor.getProperty(accessor.keyBuilder().clusterConfig());
clusterConfig.setErrorPartitionThresholdForLoadBalance(1);
accessor.setProperty(keyBuilder.clusterConfig(), clusterConfig);
_gSetupTool.rebalanceResource(clusterName, resourceName, 3);
Assert.assertTrue(pollForPartitionAssignment(accessor, participants[participantCount - 1], resourceName, 3000));
// clean up
controller.syncStop();
for (int i = 0; i < participantCount; i++) {
participants[i].syncStop();
}
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.mock.participant.ErrTransition in project helix by apache.
the class TestDrop method testDropErrorPartitionFailedAutoIS.
@Test
public void testDropErrorPartitionFailedAutoIS() throws Exception {
// Logger.getRootLogger().setLevel(Level.INFO);
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
String clusterName = className + "_" + methodName;
final int n = 5;
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
MockParticipantManager[] participants = new MockParticipantManager[n];
// participant port
TestHelper.setupCluster(// participant port
clusterName, // participant port
ZK_ADDR, // participant port
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
8, // number of nodes
n, // replicas
3, "MasterSlave", // do rebalance
true);
// start controller
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
// start participants
Map<String, Set<String>> errTransitions = new HashMap<String, Set<String>>();
errTransitions.put("SLAVE-MASTER", TestHelper.setOf("TestDB0_4"));
errTransitions.put("ERROR-DROPPED", TestHelper.setOf("TestDB0_4"));
for (int i = 0; i < n; i++) {
String instanceName = "localhost_" + (12918 + i);
if (i == 0) {
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
participants[i].setTransition(new ErrTransition(errTransitions));
} else {
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
}
participants[i].syncStart();
}
Map<String, Map<String, String>> errStateMap = new HashMap<>();
errStateMap.put("TestDB0", new HashMap<String, String>());
errStateMap.get("TestDB0").put("TestDB0_4", "localhost_12918");
HelixClusterVerifier verifier = new BestPossibleExternalViewVerifier.Builder(clusterName).setZkAddr(ZK_ADDR).setErrStates(errStateMap).build();
Assert.assertTrue(verifier.verify());
// drop resource containing error partitions should invoke error->dropped transition
// if error happens during error->dropped transition, partition should be disabled
ClusterSetup.processCommandLineArgs(new String[] { "--zkSvr", ZK_ADDR, "--dropResource", clusterName, "TestDB0" });
Thread.sleep(100);
// make sure TestDB0_4 stay in ERROR state and is disabled
Assert.assertTrue(verifier.verify());
ZKHelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
PropertyKey.Builder keyBuilder = accessor.keyBuilder();
InstanceConfig config = accessor.getProperty(keyBuilder.instanceConfig("localhost_12918"));
List<String> disabledPartitions = config.getDisabledPartitions();
// System.out.println("disabledPartitions: " + disabledPartitions);
Assert.assertEquals(disabledPartitions.size(), 1, "TestDB0_4 should be disabled");
Assert.assertEquals(disabledPartitions.get(0), "TestDB0_4");
// ExteranlView should have TestDB0_4->localhost_12918_>ERROR
Thread.sleep(2000);
ExternalView ev = accessor.getProperty(keyBuilder.externalView("TestDB0"));
Set<String> partitions = ev.getPartitionSet();
Assert.assertEquals(partitions.size(), 1, "Should have TestDB0_4->localhost_12918->ERROR");
String errPartition = partitions.iterator().next();
Assert.assertEquals(errPartition, "TestDB0_4");
Map<String, String> stateMap = ev.getStateMap(errPartition);
Assert.assertEquals(stateMap.size(), 1);
Assert.assertEquals(stateMap.keySet().iterator().next(), "localhost_12918");
Assert.assertEquals(stateMap.get("localhost_12918"), HelixDefinedState.ERROR.name());
// localhost_12918 should have TestDB0_4 in ERROR state
CurrentState cs = accessor.getProperty(keyBuilder.currentState(participants[0].getInstanceName(), participants[0].getSessionId(), "TestDB0"));
Map<String, String> partitionStateMap = cs.getPartitionStateMap();
Assert.assertEquals(partitionStateMap.size(), 1);
Assert.assertEquals(partitionStateMap.keySet().iterator().next(), "TestDB0_4");
Assert.assertEquals(partitionStateMap.get("TestDB0_4"), HelixDefinedState.ERROR.name());
// all other participants should have cleaned up empty current state
for (int i = 1; i < n; i++) {
String instanceName = participants[i].getInstanceName();
String sessionId = participants[i].getSessionId();
Assert.assertNull(accessor.getProperty(keyBuilder.currentState(instanceName, sessionId, "TestDB0")));
}
// clean up
controller.syncStop();
for (int i = 0; i < n; i++) {
participants[i].syncStop();
}
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Aggregations