use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class HelixStateTransitionHandler method onError.
@Override
public void onError(Exception e, ErrorCode code, ErrorType type) {
HelixDataAccessor accessor = _manager.getHelixDataAccessor();
Builder keyBuilder = accessor.keyBuilder();
String instanceName = _manager.getInstanceName();
String resourceName = _message.getResourceName();
String partition = _message.getPartitionName();
// All internal error has been processed already, so we can skip them
if (type == ErrorType.INTERNAL) {
logger.error("Skip internal error. errCode: " + code + ", errMsg: " + e.getMessage());
return;
}
try {
// if the transition is not canceled, it should go into error state
if (code == ErrorCode.ERROR) {
CurrentState currentStateDelta = new CurrentState(resourceName);
currentStateDelta.setState(partition, HelixDefinedState.ERROR.toString());
_stateModel.updateState(HelixDefinedState.ERROR.toString());
// if transit from ERROR state, disable the partition
if (_message.getFromState().equalsIgnoreCase(HelixDefinedState.ERROR.toString())) {
disablePartition();
}
if (!accessor.updateProperty(keyBuilder.currentState(instanceName, _message.getTgtSessionId(), resourceName), currentStateDelta)) {
logger.error("Fails to persist ERROR current state to ZK for resource " + resourceName + " partition: " + partition);
}
}
} finally {
StateTransitionError error = new StateTransitionError(type, code, e);
_stateModel.rollbackOnError(_message, _notificationContext, error);
}
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class HelixStateTransitionHandler method postHandleMessage.
void postHandleMessage() {
HelixTaskResult taskResult = (HelixTaskResult) _notificationContext.get(MapKey.HELIX_TASK_RESULT.toString());
Exception exception = taskResult.getException();
String partitionKey = _message.getPartitionName();
String resource = _message.getResourceName();
String sessionId = _message.getTgtSessionId();
String instanceName = _manager.getInstanceName();
HelixDataAccessor accessor = _manager.getHelixDataAccessor();
Builder keyBuilder = accessor.keyBuilder();
int bucketSize = _message.getBucketSize();
ZNRecordBucketizer bucketizer = new ZNRecordBucketizer(bucketSize);
// for zk current state it is OK as we have the per-session current state node
if (!_message.getTgtSessionId().equals(_manager.getSessionId())) {
logger.warn("Session id has changed. Skip postExecutionMessage. Old session " + _message.getExecutionSessionId() + " , new session : " + _manager.getSessionId());
return;
}
// Set the INFO property and mark the end time, previous state of the state transition
_currentStateDelta.setInfo(partitionKey, taskResult.getInfo());
_currentStateDelta.setEndTime(partitionKey, taskResult.getCompleteTime());
_currentStateDelta.setPreviousState(partitionKey, _message.getFromState());
// add host name this state transition is triggered by.
if (Message.MessageType.RELAYED_MESSAGE.name().equals(_message.getMsgSubType())) {
_currentStateDelta.setTriggerHost(partitionKey, _message.getRelaySrcHost());
} else {
_currentStateDelta.setTriggerHost(partitionKey, _message.getMsgSrc());
}
if (taskResult.isSuccess()) {
// String fromState = message.getFromState();
String toState = _message.getToState();
_currentStateDelta.setState(partitionKey, toState);
if (toState.equalsIgnoreCase(HelixDefinedState.DROPPED.toString())) {
// for "OnOfflineToDROPPED" message, we need to remove the resource key record
// from the current state of the instance because the resource key is dropped.
// In the state model it will be stayed as "OFFLINE", which is OK.
ZNRecord rec = new ZNRecord(_currentStateDelta.getId());
rec.getMapFields().put(partitionKey, null);
ZNRecordDelta delta = new ZNRecordDelta(rec, MergeOperation.SUBTRACT);
List<ZNRecordDelta> deltaList = new ArrayList<ZNRecordDelta>();
deltaList.add(delta);
_currentStateDelta.setDeltaList(deltaList);
_stateModelFactory.removeStateModel(resource, partitionKey);
} else {
// if the partition is not to be dropped, update _stateModel to the TO_STATE
_stateModel.updateState(toState);
}
} else if (taskResult.isCancelled()) {
// Cancelled message does not need current state update
return;
} else {
if (exception instanceof HelixStateMismatchException) {
// if fromState mismatch, set current state on zk to stateModel's current state
logger.warn("Force CurrentState on Zk to be stateModel's CurrentState. partitionKey: " + partitionKey + ", currentState: " + _stateModel.getCurrentState() + ", message: " + _message);
_currentStateDelta.setState(partitionKey, _stateModel.getCurrentState());
} else {
StateTransitionError error = new StateTransitionError(ErrorType.INTERNAL, ErrorCode.ERROR, exception);
if (exception instanceof InterruptedException) {
if (_isTimeout) {
error = new StateTransitionError(ErrorType.INTERNAL, ErrorCode.TIMEOUT, exception);
} else {
// State transition interrupted but not caused by timeout. Keep the current
// state in this case
logger.error("State transition interrupted but not timeout. Not updating state. Partition : " + _message.getPartitionName() + " MsgId : " + _message.getMsgId());
return;
}
}
_stateModel.rollbackOnError(_message, _notificationContext, error);
_currentStateDelta.setState(partitionKey, HelixDefinedState.ERROR.toString());
_stateModel.updateState(HelixDefinedState.ERROR.toString());
// if we have errors transit from ERROR state, disable the partition
if (_message.getFromState().equalsIgnoreCase(HelixDefinedState.ERROR.toString())) {
disablePartition();
}
}
}
try {
// Update the ZK current state of the node
PropertyKey key = keyBuilder.currentState(instanceName, sessionId, resource, bucketizer.getBucketName(partitionKey));
if (_message.getAttribute(Attributes.PARENT_MSG_ID) == null) {
// normal message
if (!accessor.updateProperty(key, _currentStateDelta)) {
throw new HelixException("Fails to persist current state back to ZK for resource " + resource + " partition: " + _message.getPartitionName());
}
} else {
// sub-message of a batch message
ConcurrentHashMap<String, CurrentStateUpdate> csUpdateMap = (ConcurrentHashMap<String, CurrentStateUpdate>) _notificationContext.get(MapKey.CURRENT_STATE_UPDATE.toString());
csUpdateMap.put(partitionKey, new CurrentStateUpdate(key, _currentStateDelta));
}
} catch (Exception e) {
logger.error("Error when updating current-state ", e);
StateTransitionError error = new StateTransitionError(ErrorType.FRAMEWORK, ErrorCode.ERROR, e);
_stateModel.rollbackOnError(_message, _notificationContext, error);
_statusUpdateUtil.logError(_message, HelixStateTransitionHandler.class, e, "Error when update current-state ", _manager);
}
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class HelixTaskExecutor method onMessage.
@Override
@PreFetch(enabled = false)
public void onMessage(String instanceName, List<Message> messages, NotificationContext changeContext) {
HelixManager manager = changeContext.getManager();
// TODO: see if we should have a separate notification call for resetting
if (changeContext.getType() == Type.FINALIZE) {
reset();
return;
}
if (changeContext.getType() == Type.INIT) {
init();
// continue to process messages
}
// if prefetch is disabled in MessageListenerCallback, we need to read all new messages from zk.
if (messages == null || messages.isEmpty()) {
// If no messages are given, check and read all new messages.
messages = readNewMessagesFromZK(manager, instanceName, changeContext.getChangeType());
}
if (_isShuttingDown) {
StringBuilder sb = new StringBuilder();
for (Message message : messages) {
sb.append(message.getMsgId() + ",");
}
LOG.info("Helix task executor is shutting down, discard unprocessed messages : " + sb.toString());
return;
}
// Update message count
if (_messageQueueMonitor != null) {
_messageQueueMonitor.setMessageQueueBacklog(messages.size());
}
if (messages.isEmpty()) {
LOG.info("No Messages to process");
return;
}
// sort message by creation timestamp, so message created earlier is processed first
Collections.sort(messages, Message.CREATE_TIME_COMPARATOR);
HelixDataAccessor accessor = manager.getHelixDataAccessor();
Builder keyBuilder = accessor.keyBuilder();
// message handlers created
Map<String, MessageHandler> stateTransitionHandlers = new HashMap<>();
List<MessageHandler> nonStateTransitionHandlers = new ArrayList<>();
// message read
List<Message> readMsgs = new ArrayList<>();
String sessionId = manager.getSessionId();
List<String> curResourceNames = accessor.getChildNames(keyBuilder.currentStates(instanceName, sessionId));
List<PropertyKey> createCurStateKeys = new ArrayList<>();
List<CurrentState> metaCurStates = new ArrayList<>();
Set<String> createCurStateNames = new HashSet<>();
for (Message message : messages) {
// situations such as register a new message handler factory
if (message.getMsgType().equalsIgnoreCase(MessageType.NO_OP.toString())) {
LOG.info("Dropping NO-OP message. mid: " + message.getId() + ", from: " + message.getMsgSrc());
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
continue;
}
if (message.isExpired()) {
LOG.info("Dropping expired message. mid: " + message.getId() + ", from: " + message.getMsgSrc() + " relayed from: " + message.getRelaySrcHost());
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
continue;
}
String tgtSessionId = message.getTgtSessionId();
// sessionId mismatch normally means message comes from expired session, just remove it
if (!sessionId.equals(tgtSessionId) && !tgtSessionId.equals("*")) {
String warningMessage = "SessionId does NOT match. expected sessionId: " + sessionId + ", tgtSessionId in message: " + tgtSessionId + ", messageId: " + message.getMsgId();
LOG.warn(warningMessage);
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.DISCARDED);
_statusUpdateUtil.logWarning(message, HelixStateMachineEngine.class, warningMessage, manager);
// upon session mismatch after a new session is established
if (manager.getInstanceType() == InstanceType.PARTICIPANT || manager.getInstanceType() == InstanceType.CONTROLLER_PARTICIPANT) {
if (message.getCreateTimeStamp() > manager.getSessionStartTime()) {
syncSessionToController(manager);
}
}
continue;
}
if ((manager.getInstanceType() == InstanceType.CONTROLLER || manager.getInstanceType() == InstanceType.CONTROLLER_PARTICIPANT) && MessageType.PARTICIPANT_SESSION_CHANGE.name().equals(message.getMsgType())) {
LOG.info(String.format("Controller received PARTICIPANT_SESSION_CHANGE msg from src: %s", message.getMsgSrc()));
PropertyKey key = new Builder(manager.getClusterName()).liveInstances();
List<LiveInstance> liveInstances = manager.getHelixDataAccessor().getChildValues(key);
_controller.onLiveInstanceChange(liveInstances, changeContext);
reportAndRemoveMessage(message, accessor, instanceName, ProcessedMessageState.COMPLETED);
continue;
}
// don't process message that is of READ or UNPROCESSABLE state
if (MessageState.NEW != message.getMsgState()) {
// check for the status and ignore if its already read
if (LOG.isTraceEnabled()) {
LOG.trace("Message already read. msgId: " + message.getMsgId());
}
continue;
}
// State Transition Cancellation
if (message.getMsgType().equals(MessageType.STATE_TRANSITION_CANCELLATION.name())) {
boolean success = cancelNotStartedStateTransition(message, stateTransitionHandlers, accessor, instanceName);
if (success) {
continue;
}
}
_monitor.reportReceivedMessage(message);
// create message handlers, if handlers not found, leave its state as NEW
try {
MessageHandler createHandler = createMessageHandler(message, changeContext);
if (createHandler == null) {
continue;
}
if (message.getMsgType().equals(MessageType.STATE_TRANSITION.name()) || message.getMsgType().equals(MessageType.STATE_TRANSITION_CANCELLATION.name())) {
stateTransitionHandlers.put(getMessageTarget(message.getResourceName(), message.getPartitionName()), createHandler);
} else {
nonStateTransitionHandlers.add(createHandler);
}
} catch (Exception e) {
LOG.error("Failed to create message handler for " + message.getMsgId(), e);
String error = "Failed to create message handler for " + message.getMsgId() + ", exception: " + e;
_statusUpdateUtil.logError(message, HelixStateMachineEngine.class, e, error, manager);
message.setMsgState(MessageState.UNPROCESSABLE);
removeMessageFromZK(accessor, message, instanceName);
LOG.error("Message cannot be processed: " + message.getRecord(), e);
_monitor.reportProcessedMessage(message, ParticipantMessageMonitor.ProcessedMessageState.DISCARDED);
continue;
}
markReadMessage(message, changeContext, manager);
readMsgs.add(message);
// do it for non-controller and state transition messages only
if (!message.isControlerMsg() && message.getMsgType().equals(Message.MessageType.STATE_TRANSITION.name())) {
String resourceName = message.getResourceName();
if (!curResourceNames.contains(resourceName) && !createCurStateNames.contains(resourceName)) {
createCurStateNames.add(resourceName);
createCurStateKeys.add(keyBuilder.currentState(instanceName, sessionId, resourceName));
CurrentState metaCurState = new CurrentState(resourceName);
metaCurState.setBucketSize(message.getBucketSize());
metaCurState.setStateModelDefRef(message.getStateModelDef());
metaCurState.setSessionId(sessionId);
metaCurState.setBatchMessageMode(message.getBatchMessageMode());
String ftyName = message.getStateModelFactoryName();
if (ftyName != null) {
metaCurState.setStateModelFactoryName(ftyName);
} else {
metaCurState.setStateModelFactoryName(HelixConstants.DEFAULT_STATE_MODEL_FACTORY);
}
metaCurStates.add(metaCurState);
}
}
}
// batch create curState meta
if (createCurStateKeys.size() > 0) {
try {
accessor.createChildren(createCurStateKeys, metaCurStates);
} catch (Exception e) {
LOG.error("fail to create cur-state znodes for messages: " + readMsgs, e);
}
}
// update message state to READ in batch and schedule all read messages
if (readMsgs.size() > 0) {
updateMessageState(readMsgs, accessor, instanceName);
for (MessageHandler handler : stateTransitionHandlers.values()) {
HelixTask task = new HelixTask(handler._message, changeContext, handler, this);
scheduleTask(task);
}
for (MessageHandler handler : nonStateTransitionHandlers) {
HelixTask task = new HelixTask(handler._message, changeContext, handler, this);
scheduleTask(task);
}
}
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class HelixTaskExecutor method readNewMessagesFromZK.
private List<Message> readNewMessagesFromZK(HelixManager manager, String instanceName, HelixConstants.ChangeType changeType) {
HelixDataAccessor accessor = manager.getHelixDataAccessor();
Builder keyBuilder = accessor.keyBuilder();
Set<String> messageIds = new HashSet<>();
if (changeType.equals(HelixConstants.ChangeType.MESSAGE)) {
messageIds.addAll(accessor.getChildNames(keyBuilder.messages(instanceName)));
} else if (changeType.equals(HelixConstants.ChangeType.MESSAGES_CONTROLLER)) {
messageIds.addAll(accessor.getChildNames(keyBuilder.controllerMessages()));
} else {
LOG.warn("Unexpected ChangeType for Message Change CallbackHandler: " + changeType);
return Collections.emptyList();
}
// In case the cache contains any deleted message Id, clean up
_knownMessageIds.retainAll(messageIds);
messageIds.removeAll(_knownMessageIds);
List<PropertyKey> keys = new ArrayList<>();
for (String messageId : messageIds) {
if (changeType.equals(HelixConstants.ChangeType.MESSAGE)) {
keys.add(keyBuilder.message(instanceName, messageId));
} else if (changeType.equals(HelixConstants.ChangeType.MESSAGES_CONTROLLER)) {
keys.add(keyBuilder.controllerMessage(messageId));
}
}
List<Message> newMessages = accessor.getProperty(keys);
// Message may be removed before get read, clean up null messages.
Iterator<Message> messageIterator = newMessages.iterator();
while (messageIterator.hasNext()) {
if (messageIterator.next() == null) {
messageIterator.remove();
}
}
return newMessages;
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class DefaultMessagingService method send.
@Override
public int send(final Criteria recipientCriteria, final Message message, AsyncCallback callbackOnReply, int timeOut, int retryCount) {
Map<InstanceType, List<Message>> generateMessage = generateMessage(recipientCriteria, message);
int totalMessageCount = 0;
for (List<Message> messages : generateMessage.values()) {
totalMessageCount += messages.size();
}
_logger.info("Send " + totalMessageCount + " messages with criteria " + recipientCriteria);
if (totalMessageCount == 0) {
return 0;
}
String correlationId = null;
if (callbackOnReply != null) {
int totalTimeout = timeOut * (retryCount + 1);
if (totalTimeout < 0) {
totalTimeout = -1;
}
callbackOnReply.setTimeout(totalTimeout);
correlationId = UUID.randomUUID().toString();
for (List<Message> messages : generateMessage.values()) {
callbackOnReply.setMessagesSent(messages);
}
_asyncCallbackService.registerAsyncCallback(correlationId, callbackOnReply);
}
for (InstanceType receiverType : generateMessage.keySet()) {
List<Message> list = generateMessage.get(receiverType);
for (Message tempMessage : list) {
tempMessage.setRetryCount(retryCount);
tempMessage.setExecutionTimeout(timeOut);
tempMessage.setSrcInstanceType(_manager.getInstanceType());
if (correlationId != null) {
tempMessage.setCorrelationId(correlationId);
}
HelixDataAccessor accessor = _manager.getHelixDataAccessor();
Builder keyBuilder = accessor.keyBuilder();
if (receiverType == InstanceType.CONTROLLER) {
// _manager.getDataAccessor().setProperty(PropertyType.MESSAGES_CONTROLLER,
// tempMessage,
// tempMessage.getId());
accessor.setProperty(keyBuilder.controllerMessage(tempMessage.getId()), tempMessage);
}
if (receiverType == InstanceType.PARTICIPANT) {
accessor.setProperty(keyBuilder.message(tempMessage.getTgtName(), tempMessage.getId()), tempMessage);
}
}
}
if (callbackOnReply != null) {
// start timer if timeout is set
callbackOnReply.startTimer();
}
return totalMessageCount;
}
Aggregations