use of org.apache.helix.controller.pipeline.Pipeline in project helix by apache.
the class TestRebalancePipeline method testChangeIdealStateWithPendingMsg.
@Test
public void testChangeIdealStateWithPendingMsg() {
String clusterName = "CLUSTER_" + _className + "_pending";
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
HelixManager manager = new DummyClusterManager(clusterName, accessor);
ClusterEvent event = new ClusterEvent(ClusterEventType.Unknown);
event.addAttribute(AttributeName.helixmanager.name(), manager);
ClusterDataCache cache = new ClusterDataCache();
event.addAttribute(AttributeName.ClusterDataCache.name(), cache);
refreshClusterConfig(clusterName, accessor);
final String resourceName = "testResource_pending";
String[] resourceGroups = new String[] { resourceName };
// ideal state: node0 is MASTER, node1 is SLAVE
// replica=2 means 1 master and 1 slave
setupIdealState(clusterName, new int[] { 0 }, resourceGroups, 1, 1);
setupLiveInstances(clusterName, new int[] { 0 });
setupStateModel(clusterName);
// cluster data cache refresh pipeline
Pipeline dataRefresh = new Pipeline();
dataRefresh.addStage(new ReadClusterDataStage());
// rebalance pipeline
Pipeline rebalancePipeline = new Pipeline();
rebalancePipeline.addStage(new ResourceComputationStage());
rebalancePipeline.addStage(new CurrentStateComputationStage());
rebalancePipeline.addStage(new BestPossibleStateCalcStage());
rebalancePipeline.addStage(new IntermediateStateCalcStage());
rebalancePipeline.addStage(new MessageGenerationPhase());
rebalancePipeline.addStage(new MessageSelectionStage());
rebalancePipeline.addStage(new MessageThrottleStage());
rebalancePipeline.addStage(new TaskAssignmentStage());
// round1: set node0 currentState to OFFLINE and node1 currentState to SLAVE
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "OFFLINE");
runPipeline(event, dataRefresh);
runPipeline(event, rebalancePipeline);
MessageSelectionStageOutput msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
List<Message> messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
Assert.assertEquals(messages.size(), 1, "Should output 1 message: OFFLINE-SLAVE for node0");
Message message = messages.get(0);
Assert.assertEquals(message.getFromState(), "OFFLINE");
Assert.assertEquals(message.getToState(), "SLAVE");
Assert.assertEquals(message.getTgtName(), "localhost_0");
// round2: drop resource, but keep the
// message, make sure controller should not send O->DROPPED until O->S is done
HelixAdmin admin = new ZKHelixAdmin(_gZkClient);
admin.dropResource(clusterName, resourceName);
List<IdealState> idealStates = accessor.getChildValues(accessor.keyBuilder().idealStates());
cache.setIdealStates(idealStates);
runPipeline(event, dataRefresh);
cache = event.getAttribute(AttributeName.ClusterDataCache.name());
cache.setClusterConfig(new ClusterConfig(clusterName));
runPipeline(event, rebalancePipeline);
msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
Assert.assertEquals(messages.size(), 0, "Should not output only 1 message: OFFLINE->DROPPED for localhost_0");
// round3: remove O->S message for localhost_0, localhost_0 still in OFFLINE
// controller should now send O->DROPPED to localhost_0
Builder keyBuilder = accessor.keyBuilder();
List<String> msgIds = accessor.getChildNames(keyBuilder.messages("localhost_0"));
accessor.removeProperty(keyBuilder.message("localhost_0", msgIds.get(0)));
runPipeline(event, dataRefresh);
runPipeline(event, rebalancePipeline);
msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
Assert.assertEquals(messages.size(), 1, "Should output 1 message: OFFLINE->DROPPED for localhost_0");
message = messages.get(0);
Assert.assertEquals(message.getFromState(), "OFFLINE");
Assert.assertEquals(message.getToState(), "DROPPED");
Assert.assertEquals(message.getTgtName(), "localhost_0");
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.controller.pipeline.Pipeline in project helix by apache.
the class TestRebalancePipeline method testMasterXfer.
@Test
public void testMasterXfer() {
String clusterName = "CLUSTER_" + _className + "_xfer";
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
HelixManager manager = new DummyClusterManager(clusterName, accessor);
ClusterEvent event = new ClusterEvent(ClusterEventType.Unknown);
event.addAttribute(AttributeName.helixmanager.name(), manager);
refreshClusterConfig(clusterName, accessor);
final String resourceName = "testResource_xfer";
String[] resourceGroups = new String[] { resourceName };
// ideal state: node0 is MASTER, node1 is SLAVE
// replica=2 means 1 master and 1 slave
setupIdealState(clusterName, new int[] { 0, 1 }, resourceGroups, 1, 2);
setupLiveInstances(clusterName, new int[] { 1 });
setupStateModel(clusterName);
// cluster data cache refresh pipeline
Pipeline dataRefresh = new Pipeline();
dataRefresh.addStage(new ReadClusterDataStage());
// rebalance pipeline
Pipeline rebalancePipeline = new Pipeline();
rebalancePipeline.addStage(new ResourceComputationStage());
rebalancePipeline.addStage(new CurrentStateComputationStage());
rebalancePipeline.addStage(new BestPossibleStateCalcStage());
rebalancePipeline.addStage(new IntermediateStateCalcStage());
rebalancePipeline.addStage(new MessageGenerationPhase());
rebalancePipeline.addStage(new MessageSelectionStage());
rebalancePipeline.addStage(new MessageThrottleStage());
rebalancePipeline.addStage(new TaskAssignmentStage());
// round1: set node1 currentState to SLAVE
setCurrentState(clusterName, "localhost_1", resourceName, resourceName + "_0", "session_1", "SLAVE");
runPipeline(event, dataRefresh);
runPipeline(event, rebalancePipeline);
MessageSelectionStageOutput msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
List<Message> messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
Assert.assertEquals(messages.size(), 1, "Should output 1 message: SLAVE-MASTER for node1");
Message message = messages.get(0);
Assert.assertEquals(message.getFromState(), "SLAVE");
Assert.assertEquals(message.getToState(), "MASTER");
Assert.assertEquals(message.getTgtName(), "localhost_1");
// round2: updates node0 currentState to SLAVE but keep the
// message, make sure controller should not send S->M until removal is done
setupLiveInstances(clusterName, new int[] { 0 });
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "SLAVE");
runPipeline(event, dataRefresh);
runPipeline(event, rebalancePipeline);
msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
Assert.assertEquals(messages.size(), 0, "Should NOT output 1 message: SLAVE-MASTER for node0");
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.controller.pipeline.Pipeline in project helix by apache.
the class TestRebalancePipeline method testNoDuplicatedMaster.
@Test
public void testNoDuplicatedMaster() {
String clusterName = "CLUSTER_" + _className + "_no_duplicated_master";
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
HelixManager manager = new DummyClusterManager(clusterName, accessor);
ClusterEvent event = new ClusterEvent(ClusterEventType.Unknown);
event.addAttribute(AttributeName.helixmanager.name(), manager);
refreshClusterConfig(clusterName, accessor);
final String resourceName = "testResource_no_duplicated_master";
String[] resourceGroups = new String[] { resourceName };
// ideal state: node0 is SLAVE, node1 is MASTER
// replica=2 means 1 master and 1 slave
setupIdealState(clusterName, new int[] { 0, 1 }, resourceGroups, 1, 2);
setupLiveInstances(clusterName, new int[] { 0, 1 });
setupStateModel(clusterName);
// cluster data cache refresh pipeline
Pipeline dataRefresh = new Pipeline();
dataRefresh.addStage(new ReadClusterDataStage());
// rebalance pipeline
Pipeline rebalancePipeline = new Pipeline();
rebalancePipeline.addStage(new ResourceComputationStage());
rebalancePipeline.addStage(new CurrentStateComputationStage());
rebalancePipeline.addStage(new BestPossibleStateCalcStage());
rebalancePipeline.addStage(new IntermediateStateCalcStage());
rebalancePipeline.addStage(new MessageGenerationPhase());
rebalancePipeline.addStage(new MessageSelectionStage());
rebalancePipeline.addStage(new MessageThrottleStage());
rebalancePipeline.addStage(new TaskAssignmentStage());
// set node0 currentState to SLAVE, node1 currentState to MASTER
// Helix will try to switch the state of the two instances, but it should not be two MASTER at the same time
// so it should first transit M->S, then transit another instance S->M
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "SLAVE");
setCurrentState(clusterName, "localhost_1", resourceName, resourceName + "_0", "session_1", "MASTER");
runPipeline(event, dataRefresh);
runPipeline(event, rebalancePipeline);
MessageSelectionStageOutput msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
List<Message> messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
Assert.assertEquals(messages.size(), 1, "Should output 1 message: MASTER-SLAVE for localhost_1");
Message message = messages.get(0);
Assert.assertEquals(message.getFromState(), "MASTER");
Assert.assertEquals(message.getToState(), "SLAVE");
Assert.assertEquals(message.getTgtName(), "localhost_1");
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.controller.pipeline.Pipeline in project helix by apache.
the class GenericHelixController method handleEvent.
/**
* lock-always: caller always needs to obtain an external lock before call, calls to handleEvent()
* should be serialized
* @param event
*/
protected void handleEvent(ClusterEvent event, ClusterDataCache cache) {
HelixManager manager = event.getAttribute(AttributeName.helixmanager.name());
if (manager == null) {
logger.error("No cluster manager in event:" + event.getEventType());
return;
}
if (!manager.isLeader()) {
logger.error("Cluster manager: " + manager.getInstanceName() + " is not leader for " + manager.getClusterName() + ". Pipeline will not be invoked");
return;
}
// will be excuting in un-paused mode. Which might not be the config in ZK.
if (_paused) {
logger.info("Cluster " + manager.getClusterName() + " is paused. Ignoring the event:" + event.getEventType());
return;
}
NotificationContext context = null;
if (event.getAttribute(AttributeName.changeContext.name()) != null) {
context = event.getAttribute(AttributeName.changeContext.name());
}
if (context != null) {
if (context.getType() == Type.FINALIZE) {
stopRebalancingTimers();
logger.info("Get FINALIZE notification, skip the pipeline. Event :" + event.getEventType());
return;
} else {
// TODO: should be in the initialization of controller.
if (_cache != null) {
checkRebalancingTimer(manager, Collections.EMPTY_LIST, _cache.getClusterConfig());
}
if (_isMonitoring) {
event.addAttribute(AttributeName.clusterStatusMonitor.name(), _clusterStatusMonitor);
}
}
}
// add the cache
event.addAttribute(AttributeName.ClusterDataCache.name(), cache);
List<Pipeline> pipelines = cache.isTaskCache() ? _taskRegistry.getPipelinesForEvent(event.getEventType()) : _registry.getPipelinesForEvent(event.getEventType());
if (pipelines == null || pipelines.size() == 0) {
logger.info("No " + getPipelineType(cache.isTaskCache()) + " pipeline to run for event:" + event.getEventType());
return;
}
logger.info(String.format("START: Invoking %s controller pipeline for cluster %s event: %s", manager.getClusterName(), getPipelineType(cache.isTaskCache()), event.getEventType()));
long startTime = System.currentTimeMillis();
boolean rebalanceFail = false;
for (Pipeline pipeline : pipelines) {
try {
pipeline.handle(event);
pipeline.finish();
} catch (Exception e) {
logger.error("Exception while executing " + getPipelineType(cache.isTaskCache()) + "pipeline: " + pipeline + "for cluster ." + _clusterName + ". Will not continue to next pipeline", e);
if (e instanceof HelixMetaDataAccessException) {
rebalanceFail = true;
// If pipeline failed due to read/write fails to zookeeper, retry the pipeline.
cache.requireFullRefresh();
logger.warn("Rebalance pipeline failed due to read failure from zookeeper, cluster: " + _clusterName);
// only push a retry event when there is no pending event in the corresponding event queue.
if (isEventQueueEmpty(cache.isTaskCache())) {
_continousRebalanceFailureCount++;
long delay = getRetryDelay(_continousRebalanceFailureCount);
if (delay == 0) {
forceRebalance(manager, ClusterEventType.RetryRebalance);
} else {
_asyncTasksThreadPool.schedule(new RebalanceTask(manager, ClusterEventType.RetryRebalance), delay, TimeUnit.MILLISECONDS);
}
logger.info("Retry rebalance pipeline with delay " + delay + "ms for cluster: " + _clusterName);
}
}
_clusterStatusMonitor.reportRebalanceFailure();
break;
}
}
if (!rebalanceFail) {
_continousRebalanceFailureCount = 0;
}
long endTime = System.currentTimeMillis();
logger.info(String.format("END: Invoking %s controller pipeline for event: %s for cluster %s, took %d ms", getPipelineType(cache.isTaskCache()), event.getEventType(), manager.getClusterName(), (endTime - startTime)));
if (!cache.isTaskCache()) {
// report event process durations
NotificationContext notificationContext = event.getAttribute(AttributeName.changeContext.name());
long enqueueTime = event.getCreationTime();
long zkCallbackTime;
StringBuilder sb = new StringBuilder();
if (notificationContext != null) {
zkCallbackTime = notificationContext.getCreationTime();
if (_isMonitoring) {
_clusterStatusMonitor.updateClusterEventDuration(ClusterEventMonitor.PhaseName.Callback.name(), enqueueTime - zkCallbackTime);
}
sb.append(String.format("Callback time for event: " + event.getEventType() + " took: " + (enqueueTime - zkCallbackTime) + " ms\n"));
}
if (_isMonitoring) {
_clusterStatusMonitor.updateClusterEventDuration(ClusterEventMonitor.PhaseName.InQueue.name(), startTime - enqueueTime);
_clusterStatusMonitor.updateClusterEventDuration(ClusterEventMonitor.PhaseName.TotalProcessed.name(), endTime - startTime);
}
sb.append(String.format("InQueue time for event: " + event.getEventType() + " took: " + (startTime - enqueueTime) + " ms\n"));
sb.append(String.format("TotalProcessed time for event: " + event.getEventType() + " took: " + (endTime - startTime) + " ms"));
logger.info(sb.toString());
} else if (_isMonitoring) {
// report workflow status
TaskDriver driver = new TaskDriver(manager);
_clusterStatusMonitor.refreshWorkflowsStatus(driver);
_clusterStatusMonitor.refreshJobsStatus(driver);
}
// If event handling happens before controller deactivate, the process may write unnecessary
// MBeans to monitoring after the monitor is disabled.
// So reset ClusterStatusMonitor according to it's status after all event handling.
// TODO remove this once clusterStatusMonitor blocks any MBean register on isMonitoring = false.
resetClusterStatusMonitor();
}
use of org.apache.helix.controller.pipeline.Pipeline in project helix by apache.
the class GenericHelixController method createDefaultRegistry.
private static PipelineRegistry createDefaultRegistry(String pipelineName) {
logger.info("createDefaultRegistry");
synchronized (GenericHelixController.class) {
PipelineRegistry registry = new PipelineRegistry();
// cluster data cache refresh
Pipeline dataRefresh = new Pipeline(pipelineName);
dataRefresh.addStage(new ReadClusterDataStage());
// rebalance pipeline
Pipeline rebalancePipeline = new Pipeline(pipelineName);
rebalancePipeline.addStage(new ResourceComputationStage());
rebalancePipeline.addStage(new ResourceValidationStage());
rebalancePipeline.addStage(new CurrentStateComputationStage());
rebalancePipeline.addStage(new BestPossibleStateCalcStage());
rebalancePipeline.addStage(new IntermediateStateCalcStage());
rebalancePipeline.addStage(new MessageGenerationPhase());
rebalancePipeline.addStage(new MessageSelectionStage());
rebalancePipeline.addStage(new MessageThrottleStage());
rebalancePipeline.addStage(new TaskAssignmentStage());
rebalancePipeline.addStage(new PersistAssignmentStage());
rebalancePipeline.addStage(new TargetExteralViewCalcStage());
// external view generation
Pipeline externalViewPipeline = new Pipeline(pipelineName);
externalViewPipeline.addStage(new ExternalViewComputeStage());
// backward compatibility check
Pipeline liveInstancePipeline = new Pipeline(pipelineName);
liveInstancePipeline.addStage(new CompatibilityCheckStage());
registry.register(ClusterEventType.IdealStateChange, dataRefresh, rebalancePipeline);
registry.register(ClusterEventType.CurrentStateChange, dataRefresh, rebalancePipeline, externalViewPipeline);
registry.register(ClusterEventType.InstanceConfigChange, dataRefresh, rebalancePipeline);
registry.register(ClusterEventType.ResourceConfigChange, dataRefresh, rebalancePipeline);
registry.register(ClusterEventType.ClusterConfigChange, dataRefresh, rebalancePipeline);
registry.register(ClusterEventType.LiveInstanceChange, dataRefresh, liveInstancePipeline, rebalancePipeline, externalViewPipeline);
registry.register(ClusterEventType.MessageChange, dataRefresh, rebalancePipeline);
registry.register(ClusterEventType.ExternalViewChange, dataRefresh);
registry.register(ClusterEventType.Resume, dataRefresh, rebalancePipeline, externalViewPipeline);
registry.register(ClusterEventType.PeriodicalRebalance, dataRefresh, rebalancePipeline, externalViewPipeline);
return registry;
}
}
Aggregations