use of org.apache.helix.model.LiveInstance in project pinot by linkedin.
the class PinotHelixResourceManager method toogleInstance.
/**
* Toggle the status of an Instance between OFFLINE and ONLINE.
* Keeps checking until ideal-state is successfully updated or times out.
*
* @param instanceName: Name of Instance for which the status needs to be toggled.
* @param toggle: 'True' for ONLINE 'False' for OFFLINE.
* @param timeOutInSeconds: Time-out for setting ideal-state.
* @return
*/
public PinotResourceManagerResponse toogleInstance(String instanceName, boolean toggle, int timeOutInSeconds) {
if (!instanceExists(instanceName)) {
return new PinotResourceManagerResponse("Instance " + instanceName + " does not exist.", false);
}
_helixAdmin.enableInstance(_helixClusterName, instanceName, toggle);
long deadline = System.currentTimeMillis() + 1000 * timeOutInSeconds;
boolean toggleSucceed = false;
String beforeToggleStates = (toggle) ? SegmentOnlineOfflineStateModel.OFFLINE : SegmentOnlineOfflineStateModel.ONLINE;
while (System.currentTimeMillis() < deadline) {
toggleSucceed = true;
PropertyKey liveInstanceKey = _keyBuilder.liveInstance(instanceName);
LiveInstance liveInstance = _helixDataAccessor.getProperty(liveInstanceKey);
if (liveInstance == null) {
if (toggle) {
return PinotResourceManagerResponse.FAILURE_RESPONSE;
} else {
return PinotResourceManagerResponse.SUCCESS_RESPONSE;
}
}
PropertyKey instanceCurrentStatesKey = _keyBuilder.currentStates(instanceName, liveInstance.getSessionId());
List<CurrentState> instanceCurrentStates = _helixDataAccessor.getChildValues(instanceCurrentStatesKey);
if (instanceCurrentStates == null) {
return PinotResourceManagerResponse.SUCCESS_RESPONSE;
} else {
for (CurrentState currentState : instanceCurrentStates) {
for (String state : currentState.getPartitionStateMap().values()) {
if (beforeToggleStates.equals(state)) {
toggleSucceed = false;
}
}
}
}
if (toggleSucceed) {
return (toggle) ? new PinotResourceManagerResponse("Instance " + instanceName + " enabled.", true) : new PinotResourceManagerResponse("Instance " + instanceName + " disabled.", true);
} else {
try {
Thread.sleep(500);
} catch (InterruptedException e) {
}
}
}
return new PinotResourceManagerResponse("Instance enable/disable failed, timeout.", false);
}
use of org.apache.helix.model.LiveInstance in project pinot by linkedin.
the class ShowClusterInfoCommand method execute.
@Override
public boolean execute() throws Exception {
Set<String> includeTableSet = new HashSet<>();
String[] includeTables = _tables.split(",");
for (String includeTable : includeTables) {
String name = stripTypeFromName(includeTable.trim());
if (name.length() > 0) {
includeTableSet.add(name);
}
}
Set<String> includeTagSet = new HashSet<>();
String[] includeTags = _tags.split(",");
for (String includeTag : includeTags) {
String name = stripTypeFromName(includeTag.trim());
if (name.length() > 0) {
includeTagSet.add(name);
}
}
ClusterInfo clusterInfo = new ClusterInfo();
clusterInfo.clusterName = _clusterName;
ZKHelixAdmin zkHelixAdmin = new ZKHelixAdmin(_zkAddress);
if (!zkHelixAdmin.getClusters().contains(_clusterName)) {
LOGGER.error("Cluster {} not found in {}.", _clusterName, _zkAddress);
return false;
}
List<String> instancesInCluster = zkHelixAdmin.getInstancesInCluster(_clusterName);
List<String> tables = zkHelixAdmin.getResourcesInCluster(_clusterName);
ZkClient zkClient = new ZkClient(_zkAddress);
zkClient.setZkSerializer(new ZNRecordStreamingSerializer());
LOGGER.info("Connecting to Zookeeper at: {}", _zkAddress);
zkClient.waitUntilConnected();
ZkBaseDataAccessor<ZNRecord> baseDataAccessor = new ZkBaseDataAccessor<>(zkClient);
ZKHelixDataAccessor zkHelixDataAccessor = new ZKHelixDataAccessor(_clusterName, baseDataAccessor);
PropertyKey property = zkHelixDataAccessor.keyBuilder().liveInstances();
List<String> liveInstances = zkHelixDataAccessor.getChildNames(property);
PropertyKey controllerLeaderKey = zkHelixDataAccessor.keyBuilder().controllerLeader();
LiveInstance controllerLeaderLiveInstance = zkHelixDataAccessor.getProperty(controllerLeaderKey);
ControllerInfo controllerInfo = new ControllerInfo();
controllerInfo.leaderName = controllerLeaderLiveInstance.getId();
clusterInfo.controllerInfo = controllerInfo;
for (String server : instancesInCluster) {
if (server.startsWith("Server")) {
ServerInfo serverInfo = new ServerInfo();
serverInfo.name = server;
serverInfo.state = (liveInstances.contains(server)) ? "ONLINE" : "OFFLINE";
InstanceConfig config = zkHelixAdmin.getInstanceConfig(_clusterName, server);
serverInfo.tags = config.getRecord().getListField("TAG_LIST");
clusterInfo.addServerInfo(serverInfo);
}
if (server.startsWith("Broker")) {
BrokerInfo brokerInfo = new BrokerInfo();
brokerInfo.name = server;
brokerInfo.state = (liveInstances.contains(server)) ? "ONLINE" : "OFFLINE";
InstanceConfig config = zkHelixAdmin.getInstanceConfig(_clusterName, server);
brokerInfo.tags = config.getRecord().getListField("TAG_LIST");
clusterInfo.addBrokerInfo(brokerInfo);
}
}
for (String table : tables) {
if ("brokerResource".equalsIgnoreCase(table)) {
continue;
}
TableInfo tableInfo = new TableInfo();
IdealState idealState = zkHelixAdmin.getResourceIdealState(_clusterName, table);
ExternalView externalView = zkHelixAdmin.getResourceExternalView(_clusterName, table);
Set<String> segmentsFromIdealState = idealState.getPartitionSet();
tableInfo.tableName = table;
tableInfo.tag = idealState.getRecord().getSimpleField("INSTANCE_GROUP_TAG");
String rawTableName = stripTypeFromName(tableInfo.tableName);
String rawTagName = stripTypeFromName(tableInfo.tag);
if (!includeTableSet.isEmpty() && !includeTableSet.contains(rawTableName)) {
continue;
}
if (!includeTagSet.isEmpty() && !includeTagSet.contains(rawTagName)) {
continue;
}
for (String segment : segmentsFromIdealState) {
SegmentInfo segmentInfo = new SegmentInfo();
segmentInfo.name = segment;
Map<String, String> serverStateMapFromIS = idealState.getInstanceStateMap(segment);
if (serverStateMapFromIS == null) {
LOGGER.info("Unassigned segment {} in ideal state", segment);
serverStateMapFromIS = Collections.emptyMap();
}
Map<String, String> serverStateMapFromEV = externalView.getStateMap(segment);
if (serverStateMapFromEV == null) {
LOGGER.info("Unassigned segment {} in external view", segment);
serverStateMapFromEV = Collections.emptyMap();
}
for (String serverName : serverStateMapFromIS.keySet()) {
segmentInfo.segmentStateMap.put(serverName, serverStateMapFromEV.get(serverName));
}
tableInfo.addSegmentInfo(segmentInfo);
}
clusterInfo.addTableInfo(tableInfo);
}
Yaml yaml = new Yaml();
StringWriter sw = new StringWriter();
yaml.dump(clusterInfo, sw);
LOGGER.info(sw.toString());
return true;
}
use of org.apache.helix.model.LiveInstance in project helix by apache.
the class HelixTaskExecutor method onMessage.
@Override
@PreFetch(enabled = false)
public void onMessage(String instanceName, List<Message> messages, NotificationContext changeContext) {
HelixManager manager = changeContext.getManager();
// TODO: see if we should have a separate notification call for resetting
if (changeContext.getType() == Type.FINALIZE) {
reset();
return;
}
if (changeContext.getType() == Type.INIT) {
init();
// continue to process messages
}
// if prefetch is disabled in MessageListenerCallback, we need to read all new messages from zk.
if (messages == null || messages.isEmpty()) {
// If no messages are given, check and read all new messages.
messages = readNewMessagesFromZK(manager, instanceName, changeContext.getChangeType());
}
if (_isShuttingDown) {
StringBuilder sb = new StringBuilder();
for (Message message : messages) {
sb.append(message.getMsgId() + ",");
}
LOG.info("Helix task executor is shutting down, discard unprocessed messages : " + sb.toString());
return;
}
// Update message count
if (_messageQueueMonitor != null) {
_messageQueueMonitor.setMessageQueueBacklog(messages.size());
}
if (messages.isEmpty()) {
LOG.info("No Messages to process");
return;
}
// sort message by creation timestamp, so message created earlier is processed first
Collections.sort(messages, Message.CREATE_TIME_COMPARATOR);
HelixDataAccessor accessor = manager.getHelixDataAccessor();
Builder keyBuilder = accessor.keyBuilder();
// message handlers created
Map<String, MessageHandler> stateTransitionHandlers = new HashMap<>();
List<MessageHandler> nonStateTransitionHandlers = new ArrayList<>();
// message read
List<Message> readMsgs = new ArrayList<>();
String sessionId = manager.getSessionId();
List<String> curResourceNames = accessor.getChildNames(keyBuilder.currentStates(instanceName, sessionId));
List<PropertyKey> createCurStateKeys = new ArrayList<>();
List<CurrentState> metaCurStates = new ArrayList<>();
Set<String> createCurStateNames = new HashSet<>();
for (Message message : messages) {
// situations such as register a new message handler factory
if (message.getMsgType().equalsIgnoreCase(MessageType.NO_OP.toString())) {
LOG.info("Dropping NO-OP message. mid: " + message.getId() + ", from: " + message.getMsgSrc());
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
continue;
}
if (message.isExpired()) {
LOG.info("Dropping expired message. mid: " + message.getId() + ", from: " + message.getMsgSrc() + " relayed from: " + message.getRelaySrcHost());
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
continue;
}
String tgtSessionId = message.getTgtSessionId();
// sessionId mismatch normally means message comes from expired session, just remove it
if (!sessionId.equals(tgtSessionId) && !tgtSessionId.equals("*")) {
String warningMessage = "SessionId does NOT match. expected sessionId: " + sessionId + ", tgtSessionId in message: " + tgtSessionId + ", messageId: " + message.getMsgId();
LOG.warn(warningMessage);
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
_statusUpdateUtil.logWarning(message, HelixStateMachineEngine.class, warningMessage, manager);
// upon session mismatch after a new session is established
if (manager.getInstanceType() == InstanceType.PARTICIPANT || manager.getInstanceType() == InstanceType.CONTROLLER_PARTICIPANT) {
if (message.getCreateTimeStamp() > manager.getSessionStartTime()) {
syncSessionToController(manager);
}
}
continue;
}
if ((manager.getInstanceType() == InstanceType.CONTROLLER || manager.getInstanceType() == InstanceType.CONTROLLER_PARTICIPANT) && MessageType.PARTICIPANT_SESSION_CHANGE.name().equals(message.getMsgType())) {
LOG.info(String.format("Controller received PARTICIPANT_SESSION_CHANGE msg from src: %s", message.getMsgSrc()));
PropertyKey key = new Builder(manager.getClusterName()).liveInstances();
List<LiveInstance> liveInstances = manager.getHelixDataAccessor().getChildValues(key);
_controller.onLiveInstanceChange(liveInstances, changeContext);
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.COMPLETED);
continue;
}
// don't process message that is of READ or UNPROCESSABLE state
if (MessageState.NEW != message.getMsgState()) {
// check for the status and ignore if its already read
if (LOG.isTraceEnabled()) {
LOG.trace("Message already read. msgId: " + message.getMsgId());
}
continue;
}
// State Transition Cancellation
if (message.getMsgType().equals(MessageType.STATE_TRANSITION_CANCELLATION.name())) {
boolean success = cancelNotStartedStateTransition(message, stateTransitionHandlers, accessor, instanceName);
if (success) {
continue;
}
}
_monitor.reportReceivedMessage(message);
// create message handlers, if handlers not found, leave its state as NEW
try {
MessageHandler createHandler = createMessageHandler(message, changeContext);
if (createHandler == null) {
continue;
}
if (message.getMsgType().equals(MessageType.STATE_TRANSITION.name()) || message.getMsgType().equals(MessageType.STATE_TRANSITION_CANCELLATION.name())) {
stateTransitionHandlers.put(getMessageTarget(message.getResourceName(), message.getPartitionName()), createHandler);
} else {
nonStateTransitionHandlers.add(createHandler);
}
} catch (Exception e) {
LOG.error("Failed to create message handler for " + message.getMsgId(), e);
String error = "Failed to create message handler for " + message.getMsgId() + ", exception: " + e;
_statusUpdateUtil.logError(message, HelixStateMachineEngine.class, e, error, manager);
message.setMsgState(MessageState.UNPROCESSABLE);
removeMessageFromZK(accessor, message, instanceName);
LOG.error("Message cannot be processed: " + message.getRecord(), e);
_monitor.reportProcessedMessage(message, ParticipantMessageMonitor.ProcessedMessageState.DISCARDED);
continue;
}
markReadMessage(message, changeContext, manager);
readMsgs.add(message);
// do it for non-controller and state transition messages only
if (!message.isControlerMsg() && message.getMsgType().equals(Message.MessageType.STATE_TRANSITION.name())) {
String resourceName = message.getResourceName();
if (!curResourceNames.contains(resourceName) && !createCurStateNames.contains(resourceName)) {
createCurStateNames.add(resourceName);
createCurStateKeys.add(keyBuilder.currentState(instanceName, sessionId, resourceName));
CurrentState metaCurState = new CurrentState(resourceName);
metaCurState.setBucketSize(message.getBucketSize());
metaCurState.setStateModelDefRef(message.getStateModelDef());
metaCurState.setSessionId(sessionId);
metaCurState.setBatchMessageMode(message.getBatchMessageMode());
String ftyName = message.getStateModelFactoryName();
if (ftyName != null) {
metaCurState.setStateModelFactoryName(ftyName);
} else {
metaCurState.setStateModelFactoryName(HelixConstants.DEFAULT_STATE_MODEL_FACTORY);
}
metaCurStates.add(metaCurState);
}
}
}
// batch create curState meta
if (createCurStateKeys.size() > 0) {
try {
accessor.createChildren(createCurStateKeys, metaCurStates);
} catch (Exception e) {
LOG.error("fail to create cur-state znodes for messages: " + readMsgs, e);
}
}
// update message state to READ in batch and schedule all read messages
if (readMsgs.size() > 0) {
updateMessageState(readMsgs, accessor, instanceName);
for (MessageHandler handler : stateTransitionHandlers.values()) {
HelixTask task = new HelixTask(handler._message, changeContext, handler, this);
scheduleTask(task);
}
for (MessageHandler handler : nonStateTransitionHandlers) {
HelixTask task = new HelixTask(handler._message, changeContext, handler, this);
scheduleTask(task);
}
}
}
use of org.apache.helix.model.LiveInstance in project helix by apache.
the class TestP2PMessageSemiAuto method verifyP2PMessage.
private void verifyP2PMessage(String dbName, String instance, String expectedState, String expectedTriggerHost) {
ClusterDataCache dataCache = new ClusterDataCache(CLUSTER_NAME);
dataCache.refresh(_accessor);
Map<String, LiveInstance> liveInstanceMap = dataCache.getLiveInstances();
LiveInstance liveInstance = liveInstanceMap.get(instance);
Map<String, CurrentState> currentStateMap = dataCache.getCurrentState(instance, liveInstance.getSessionId());
Assert.assertNotNull(currentStateMap);
CurrentState currentState = currentStateMap.get(dbName);
Assert.assertNotNull(currentState);
Assert.assertEquals(currentState.getPartitionStateMap().size(), PARTITION_NUMBER);
for (String partition : currentState.getPartitionStateMap().keySet()) {
String state = currentState.getState(partition);
Assert.assertEquals(state, expectedState, dbName + " Partition " + partition + "'s state is different as expected!");
String triggerHost = currentState.getTriggerHost(partition);
Assert.assertEquals(triggerHost, expectedTriggerHost, "Partition " + partition + "'s transition to Master was not triggered by expected host!");
}
}
use of org.apache.helix.model.LiveInstance in project helix by apache.
the class ClusterSetup method dropInstanceFromCluster.
public void dropInstanceFromCluster(String clusterName, String instanceId) {
ZKHelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient));
Builder keyBuilder = accessor.keyBuilder();
InstanceConfig instanceConfig = InstanceConfig.toInstanceConfig(instanceId);
instanceId = instanceConfig.getInstanceName();
// ensure node is stopped
LiveInstance liveInstance = accessor.getProperty(keyBuilder.liveInstance(instanceId));
if (liveInstance != null) {
throw new HelixException("Can't drop " + instanceId + ", please stop " + instanceId + " before drop it");
}
InstanceConfig config = accessor.getProperty(keyBuilder.instanceConfig(instanceId));
if (config == null) {
String error = "Node " + instanceId + " does not exist, cannot drop";
_logger.warn(error);
throw new HelixException(error);
}
ClusterConfig clusterConfig = accessor.getProperty(keyBuilder.clusterConfig());
// ensure node is disabled, otherwise fail
if (config.getInstanceEnabled() && (clusterConfig.getDisabledInstances() == null || !clusterConfig.getDisabledInstances().containsKey(instanceId))) {
String error = "Node " + instanceId + " is enabled, cannot drop";
_logger.warn(error);
throw new HelixException(error);
}
_admin.dropInstance(clusterName, config);
}
Aggregations