use of org.apache.helix.HelixManager in project pinot by linkedin.
the class BaseClusterIntegrationTest method setupSegmentCountCountDownLatch.
protected CountDownLatch setupSegmentCountCountDownLatch(final String tableName, final int expectedSegmentCount) throws Exception {
final CountDownLatch latch = new CountDownLatch(1);
HelixManager manager = HelixManagerFactory.getZKHelixManager(getHelixClusterName(), "test_instance", InstanceType.SPECTATOR, ZkStarter.DEFAULT_ZK_STR);
manager.connect();
manager.addExternalViewChangeListener(new ExternalViewChangeListener() {
private boolean _hasBeenTriggered = false;
@Override
public void onExternalViewChange(List<ExternalView> externalViewList, NotificationContext changeContext) {
// Nothing to do?
if (_hasBeenTriggered) {
return;
}
for (ExternalView externalView : externalViewList) {
if (externalView.getId().contains(tableName)) {
Set<String> partitionSet = externalView.getPartitionSet();
if (partitionSet.size() == expectedSegmentCount) {
int onlinePartitionCount = 0;
for (String partitionId : partitionSet) {
Map<String, String> partitionStateMap = externalView.getStateMap(partitionId);
if (partitionStateMap.containsValue("ONLINE")) {
onlinePartitionCount++;
}
}
if (onlinePartitionCount == expectedSegmentCount) {
// System.out.println("Got " + expectedSegmentCount + " online tables, unlatching the main thread");
latch.countDown();
_hasBeenTriggered = true;
}
}
}
}
}
});
return latch;
}
use of org.apache.helix.HelixManager in project helix by apache.
the class HelixTaskExecutor method onMessage.
@Override
@PreFetch(enabled = false)
public void onMessage(String instanceName, List<Message> messages, NotificationContext changeContext) {
HelixManager manager = changeContext.getManager();
// TODO: see if we should have a separate notification call for resetting
if (changeContext.getType() == Type.FINALIZE) {
reset();
return;
}
if (changeContext.getType() == Type.INIT) {
init();
// continue to process messages
}
// if prefetch is disabled in MessageListenerCallback, we need to read all new messages from zk.
if (messages == null || messages.isEmpty()) {
// If no messages are given, check and read all new messages.
messages = readNewMessagesFromZK(manager, instanceName, changeContext.getChangeType());
}
if (_isShuttingDown) {
StringBuilder sb = new StringBuilder();
for (Message message : messages) {
sb.append(message.getMsgId() + ",");
}
LOG.info("Helix task executor is shutting down, discard unprocessed messages : " + sb.toString());
return;
}
// Update message count
if (_messageQueueMonitor != null) {
_messageQueueMonitor.setMessageQueueBacklog(messages.size());
}
if (messages.isEmpty()) {
LOG.info("No Messages to process");
return;
}
// sort message by creation timestamp, so message created earlier is processed first
Collections.sort(messages, Message.CREATE_TIME_COMPARATOR);
HelixDataAccessor accessor = manager.getHelixDataAccessor();
Builder keyBuilder = accessor.keyBuilder();
// message handlers created
Map<String, MessageHandler> stateTransitionHandlers = new HashMap<>();
List<MessageHandler> nonStateTransitionHandlers = new ArrayList<>();
// message read
List<Message> readMsgs = new ArrayList<>();
String sessionId = manager.getSessionId();
List<String> curResourceNames = accessor.getChildNames(keyBuilder.currentStates(instanceName, sessionId));
List<PropertyKey> createCurStateKeys = new ArrayList<>();
List<CurrentState> metaCurStates = new ArrayList<>();
Set<String> createCurStateNames = new HashSet<>();
for (Message message : messages) {
// situations such as register a new message handler factory
if (message.getMsgType().equalsIgnoreCase(MessageType.NO_OP.toString())) {
LOG.info("Dropping NO-OP message. mid: " + message.getId() + ", from: " + message.getMsgSrc());
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
continue;
}
if (message.isExpired()) {
LOG.info("Dropping expired message. mid: " + message.getId() + ", from: " + message.getMsgSrc() + " relayed from: " + message.getRelaySrcHost());
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
continue;
}
String tgtSessionId = message.getTgtSessionId();
// sessionId mismatch normally means message comes from expired session, just remove it
if (!sessionId.equals(tgtSessionId) && !tgtSessionId.equals("*")) {
String warningMessage = "SessionId does NOT match. expected sessionId: " + sessionId + ", tgtSessionId in message: " + tgtSessionId + ", messageId: " + message.getMsgId();
LOG.warn(warningMessage);
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
_statusUpdateUtil.logWarning(message, HelixStateMachineEngine.class, warningMessage, manager);
// upon session mismatch after a new session is established
if (manager.getInstanceType() == InstanceType.PARTICIPANT || manager.getInstanceType() == InstanceType.CONTROLLER_PARTICIPANT) {
if (message.getCreateTimeStamp() > manager.getSessionStartTime()) {
syncSessionToController(manager);
}
}
continue;
}
if ((manager.getInstanceType() == InstanceType.CONTROLLER || manager.getInstanceType() == InstanceType.CONTROLLER_PARTICIPANT) && MessageType.PARTICIPANT_SESSION_CHANGE.name().equals(message.getMsgType())) {
LOG.info(String.format("Controller received PARTICIPANT_SESSION_CHANGE msg from src: %s", message.getMsgSrc()));
PropertyKey key = new Builder(manager.getClusterName()).liveInstances();
List<LiveInstance> liveInstances = manager.getHelixDataAccessor().getChildValues(key);
_controller.onLiveInstanceChange(liveInstances, changeContext);
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.COMPLETED);
continue;
}
// don't process message that is of READ or UNPROCESSABLE state
if (MessageState.NEW != message.getMsgState()) {
// check for the status and ignore if its already read
if (LOG.isTraceEnabled()) {
LOG.trace("Message already read. msgId: " + message.getMsgId());
}
continue;
}
// State Transition Cancellation
if (message.getMsgType().equals(MessageType.STATE_TRANSITION_CANCELLATION.name())) {
boolean success = cancelNotStartedStateTransition(message, stateTransitionHandlers, accessor, instanceName);
if (success) {
continue;
}
}
_monitor.reportReceivedMessage(message);
// create message handlers, if handlers not found, leave its state as NEW
try {
MessageHandler createHandler = createMessageHandler(message, changeContext);
if (createHandler == null) {
continue;
}
if (message.getMsgType().equals(MessageType.STATE_TRANSITION.name()) || message.getMsgType().equals(MessageType.STATE_TRANSITION_CANCELLATION.name())) {
stateTransitionHandlers.put(getMessageTarget(message.getResourceName(), message.getPartitionName()), createHandler);
} else {
nonStateTransitionHandlers.add(createHandler);
}
} catch (Exception e) {
LOG.error("Failed to create message handler for " + message.getMsgId(), e);
String error = "Failed to create message handler for " + message.getMsgId() + ", exception: " + e;
_statusUpdateUtil.logError(message, HelixStateMachineEngine.class, e, error, manager);
message.setMsgState(MessageState.UNPROCESSABLE);
removeMessageFromZK(accessor, message, instanceName);
LOG.error("Message cannot be processed: " + message.getRecord(), e);
_monitor.reportProcessedMessage(message, ParticipantMessageMonitor.ProcessedMessageState.DISCARDED);
continue;
}
markReadMessage(message, changeContext, manager);
readMsgs.add(message);
// do it for non-controller and state transition messages only
if (!message.isControlerMsg() && message.getMsgType().equals(Message.MessageType.STATE_TRANSITION.name())) {
String resourceName = message.getResourceName();
if (!curResourceNames.contains(resourceName) && !createCurStateNames.contains(resourceName)) {
createCurStateNames.add(resourceName);
createCurStateKeys.add(keyBuilder.currentState(instanceName, sessionId, resourceName));
CurrentState metaCurState = new CurrentState(resourceName);
metaCurState.setBucketSize(message.getBucketSize());
metaCurState.setStateModelDefRef(message.getStateModelDef());
metaCurState.setSessionId(sessionId);
metaCurState.setBatchMessageMode(message.getBatchMessageMode());
String ftyName = message.getStateModelFactoryName();
if (ftyName != null) {
metaCurState.setStateModelFactoryName(ftyName);
} else {
metaCurState.setStateModelFactoryName(HelixConstants.DEFAULT_STATE_MODEL_FACTORY);
}
metaCurStates.add(metaCurState);
}
}
}
// batch create curState meta
if (createCurStateKeys.size() > 0) {
try {
accessor.createChildren(createCurStateKeys, metaCurStates);
} catch (Exception e) {
LOG.error("fail to create cur-state znodes for messages: " + readMsgs, e);
}
}
// update message state to READ in batch and schedule all read messages
if (readMsgs.size() > 0) {
updateMessageState(readMsgs, accessor, instanceName);
for (MessageHandler handler : stateTransitionHandlers.values()) {
HelixTask task = new HelixTask(handler._message, changeContext, handler, this);
scheduleTask(task);
}
for (MessageHandler handler : nonStateTransitionHandlers) {
HelixTask task = new HelixTask(handler._message, changeContext, handler, this);
scheduleTask(task);
}
}
}
use of org.apache.helix.HelixManager in project helix by apache.
the class HelixTaskExecutor method scheduleTask.
@Override
public boolean scheduleTask(MessageTask task) {
String taskId = task.getTaskId();
Message message = task.getMessage();
NotificationContext notificationContext = task.getNotificationContext();
HelixManager manager = notificationContext.getManager();
try {
// Check to see if dedicate thread pool for handling state transition messages is configured or provided.
updateStateTransitionMessageThreadPool(message, manager);
LOG.info("Scheduling message: " + taskId);
// System.out.println("sched msg: " + message.getPartitionName() + "-"
// + message.getTgtName() + "-" + message.getFromState() + "-"
// + message.getToState());
_statusUpdateUtil.logInfo(message, HelixTaskExecutor.class, "Message handling task scheduled", manager);
// sync'ed
synchronized (_lock) {
if (!_taskMap.containsKey(taskId)) {
ExecutorService exeSvc = findExecutorServiceForMsg(message);
if (exeSvc == null) {
LOG.warn(String.format("Threadpool is null for type %s of message %s", message.getMsgType(), message.getMsgId()));
return false;
}
LOG.info("Submit task: " + taskId + " to pool: " + exeSvc);
Future<HelixTaskResult> future = exeSvc.submit(task);
_messageTaskMap.putIfAbsent(getMessageTarget(message.getResourceName(), message.getPartitionName()), taskId);
TimerTask timerTask = null;
if (message.getExecutionTimeout() > 0) {
timerTask = new MessageTimeoutTask(this, task);
_timer.schedule(timerTask, message.getExecutionTimeout());
LOG.info("Message starts with timeout " + message.getExecutionTimeout() + " MsgId: " + task.getTaskId());
} else {
LOG.debug("Message does not have timeout. MsgId: " + task.getTaskId());
}
_taskMap.put(taskId, new MessageTaskInfo(task, future, timerTask));
LOG.info("Message: " + taskId + " handling task scheduled");
return true;
} else {
_statusUpdateUtil.logWarning(message, HelixTaskExecutor.class, "Message handling task already sheduled for " + taskId, manager);
}
}
} catch (Exception e) {
LOG.error("Error while executing task. " + message, e);
_statusUpdateUtil.logError(message, HelixTaskExecutor.class, e, "Error while executing task " + e, manager);
}
return false;
}
use of org.apache.helix.HelixManager in project helix by apache.
the class TestAutoRebalancePartitionLimit method beforeClass.
@Override
@BeforeClass
public void beforeClass() throws Exception {
// Logger.getRootLogger().setLevel(Level.INFO);
System.out.println("START " + CLASS_NAME + " at " + new Date(System.currentTimeMillis()));
String namespace = "/" + CLUSTER_NAME;
if (_gZkClient.exists(namespace)) {
_gZkClient.deleteRecursively(namespace);
}
_setupTool = new ClusterSetup(ZK_ADDR);
// setup storage cluster
_setupTool.addCluster(CLUSTER_NAME, true);
_setupTool.addResourceToCluster(CLUSTER_NAME, TEST_DB, 100, "OnlineOffline", RebalanceMode.FULL_AUTO + "", 0, 25);
for (int i = 0; i < NODE_NR; i++) {
String storageNodeName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
_setupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
}
_setupTool.rebalanceStorageCluster(CLUSTER_NAME, TEST_DB, 1);
// start controller
String controllerName = CONTROLLER_PREFIX + "_0";
_controller = new ClusterControllerManager(ZK_ADDR, CLUSTER_NAME, controllerName);
_controller.syncStart();
// _startCMResultMap.get(controllerName)._manager;
HelixManager manager = _controller;
HelixDataAccessor accessor = manager.getHelixDataAccessor();
// start dummy participants
for (int i = 0; i < NODE_NR; i++) {
String instanceName = PARTICIPANT_PREFIX + "_" + (START_PORT + i);
_participants[i] = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, instanceName);
_participants[i].syncStart();
Thread.sleep(2000);
boolean result = ClusterStateVerifier.verifyByZkCallback(new ExternalViewBalancedVerifier(_gZkClient, CLUSTER_NAME, TEST_DB));
Assert.assertTrue(result);
ExternalView ev = manager.getHelixDataAccessor().getProperty(accessor.keyBuilder().externalView(TEST_DB));
System.out.println(ev.getPartitionSet().size());
if (i < 3) {
Assert.assertEquals(ev.getPartitionSet().size(), 25 * (i + 1));
} else {
Assert.assertEquals(ev.getPartitionSet().size(), 100);
}
}
boolean result = ClusterStateVerifier.verifyByZkCallback(new ExternalViewBalancedVerifier(_gZkClient, CLUSTER_NAME, TEST_DB));
Assert.assertTrue(result);
}
use of org.apache.helix.HelixManager in project helix by apache.
the class TestAutoRebalancePartitionLimit method testAutoRebalanceWithMaxPartitionPerNode.
@Test()
public void testAutoRebalanceWithMaxPartitionPerNode() throws Exception {
HelixManager manager = _controller;
// kill 1 node
_participants[0].syncStop();
// verifyBalanceExternalView();
boolean result = ClusterStateVerifier.verifyByZkCallback(new ExternalViewBalancedVerifier(_gZkClient, CLUSTER_NAME, TEST_DB));
Assert.assertTrue(result);
HelixDataAccessor accessor = manager.getHelixDataAccessor();
ExternalView ev = manager.getHelixDataAccessor().getProperty(accessor.keyBuilder().externalView(TEST_DB));
Assert.assertEquals(ev.getPartitionSet().size(), 100);
_participants[1].syncStop();
// verifyBalanceExternalView();
result = ClusterStateVerifier.verifyByZkCallback(new ExternalViewBalancedVerifier(_gZkClient, CLUSTER_NAME, TEST_DB));
Assert.assertTrue(result);
ev = manager.getHelixDataAccessor().getProperty(accessor.keyBuilder().externalView(TEST_DB));
Assert.assertEquals(ev.getPartitionSet().size(), 75);
// add 2 nodes
for (int i = 0; i < 2; i++) {
String storageNodeName = PARTICIPANT_PREFIX + "_" + (1000 + i);
_setupTool.addInstanceToCluster(CLUSTER_NAME, storageNodeName);
String newInstanceName = storageNodeName.replace(':', '_');
MockParticipantManager participant = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, newInstanceName);
participant.syncStart();
}
Thread.sleep(1000);
result = ClusterStateVerifier.verifyByZkCallback(new ExternalViewBalancedVerifier(_gZkClient, CLUSTER_NAME, TEST_DB));
Assert.assertTrue(result);
}
Aggregations