use of org.apache.helix.participant.statemachine.StateTransitionError in project helix by apache.
the class HelixStateTransitionHandler method preHandleMessage.
void preHandleMessage() throws Exception {
if (!_message.isValid()) {
String errorMessage = "Invalid Message, ensure that message: " + _message + " has all the required fields: " + Arrays.toString(Message.Attributes.values());
_statusUpdateUtil.logError(_message, HelixStateTransitionHandler.class, errorMessage, _manager);
logger.error(errorMessage);
throw new HelixException(errorMessage);
}
logger.info("handling message: " + _message.getMsgId() + " transit " + _message.getResourceName() + "." + _message.getPartitionName() + "|" + _message.getPartitionNames() + " from:" + _message.getFromState() + " to:" + _message.getToState() + ", relayedFrom: " + _message.getRelaySrcHost());
HelixDataAccessor accessor = _manager.getHelixDataAccessor();
String partitionName = _message.getPartitionName();
String fromState = _message.getFromState();
// Verify the fromState and current state of the stateModel
String state = _currentStateDelta.getState(partitionName);
// Set start time right before invoke client logic
_currentStateDelta.setStartTime(_message.getPartitionName(), System.currentTimeMillis());
if (fromState != null && !fromState.equals("*") && !fromState.equalsIgnoreCase(state)) {
String errorMessage = "Current state of stateModel does not match the fromState in Message" + ", Current State:" + state + ", message expected:" + fromState + ", partition: " + partitionName + ", from: " + _message.getMsgSrc() + ", to: " + _message.getTgtName();
_statusUpdateUtil.logError(_message, HelixStateTransitionHandler.class, errorMessage, _manager);
logger.error(errorMessage);
throw new HelixStateMismatchException(errorMessage);
}
// Reset the REQUESTED_STATE property if it exists.
try {
String instance = _manager.getInstanceName();
String sessionId = _message.getTgtSessionId();
String resource = _message.getResourceName();
ZNRecordBucketizer bucketizer = new ZNRecordBucketizer(_message.getBucketSize());
PropertyKey key = accessor.keyBuilder().currentState(instance, sessionId, resource, bucketizer.getBucketName(partitionName));
ZNRecord rec = new ZNRecord(resource);
Map<String, String> map = new TreeMap<String, String>();
map.put(CurrentState.CurrentStateProperty.REQUESTED_STATE.name(), null);
rec.getMapFields().put(partitionName, map);
ZNRecordDelta delta = new ZNRecordDelta(rec, ZNRecordDelta.MergeOperation.SUBTRACT);
List<ZNRecordDelta> deltaList = new ArrayList<ZNRecordDelta>();
deltaList.add(delta);
CurrentState currStateUpdate = new CurrentState(resource);
currStateUpdate.setDeltaList(deltaList);
// Update the ZK current state of the node
if (!accessor.updateProperty(key, currStateUpdate)) {
logger.error("Fails to persist current state back to ZK for resource " + resource + " partition: " + partitionName);
}
} catch (Exception e) {
logger.error("Error when removing " + CurrentState.CurrentStateProperty.REQUESTED_STATE.name() + " from current state.", e);
StateTransitionError error = new StateTransitionError(ErrorType.FRAMEWORK, ErrorCode.ERROR, e);
_stateModel.rollbackOnError(_message, _notificationContext, error);
_statusUpdateUtil.logError(_message, HelixStateTransitionHandler.class, e, "Error when removing " + CurrentState.CurrentStateProperty.REQUESTED_STATE.name() + " from current state.", _manager);
}
}
use of org.apache.helix.participant.statemachine.StateTransitionError in project helix by apache.
the class HelixStateTransitionHandler method onError.
@Override
public void onError(Exception e, ErrorCode code, ErrorType type) {
HelixDataAccessor accessor = _manager.getHelixDataAccessor();
Builder keyBuilder = accessor.keyBuilder();
String instanceName = _manager.getInstanceName();
String resourceName = _message.getResourceName();
String partition = _message.getPartitionName();
// All internal error has been processed already, so we can skip them
if (type == ErrorType.INTERNAL) {
logger.error("Skip internal error. errCode: " + code + ", errMsg: " + e.getMessage());
return;
}
try {
// if the transition is not canceled, it should go into error state
if (code == ErrorCode.ERROR) {
CurrentState currentStateDelta = new CurrentState(resourceName);
currentStateDelta.setState(partition, HelixDefinedState.ERROR.toString());
_stateModel.updateState(HelixDefinedState.ERROR.toString());
// if transit from ERROR state, disable the partition
if (_message.getFromState().equalsIgnoreCase(HelixDefinedState.ERROR.toString())) {
disablePartition();
}
if (!accessor.updateProperty(keyBuilder.currentState(instanceName, _message.getTgtSessionId(), resourceName), currentStateDelta)) {
logger.error("Fails to persist ERROR current state to ZK for resource " + resourceName + " partition: " + partition);
}
}
} finally {
StateTransitionError error = new StateTransitionError(type, code, e);
_stateModel.rollbackOnError(_message, _notificationContext, error);
}
}
use of org.apache.helix.participant.statemachine.StateTransitionError in project helix by apache.
the class HelixStateTransitionHandler method postHandleMessage.
void postHandleMessage() {
HelixTaskResult taskResult = (HelixTaskResult) _notificationContext.get(MapKey.HELIX_TASK_RESULT.toString());
Exception exception = taskResult.getException();
String partitionKey = _message.getPartitionName();
String resource = _message.getResourceName();
String sessionId = _message.getTgtSessionId();
String instanceName = _manager.getInstanceName();
HelixDataAccessor accessor = _manager.getHelixDataAccessor();
Builder keyBuilder = accessor.keyBuilder();
int bucketSize = _message.getBucketSize();
ZNRecordBucketizer bucketizer = new ZNRecordBucketizer(bucketSize);
// for zk current state it is OK as we have the per-session current state node
if (!_message.getTgtSessionId().equals(_manager.getSessionId())) {
logger.warn("Session id has changed. Skip postExecutionMessage. Old session " + _message.getExecutionSessionId() + " , new session : " + _manager.getSessionId());
return;
}
// Set the INFO property and mark the end time, previous state of the state transition
_currentStateDelta.setInfo(partitionKey, taskResult.getInfo());
_currentStateDelta.setEndTime(partitionKey, taskResult.getCompleteTime());
_currentStateDelta.setPreviousState(partitionKey, _message.getFromState());
// add host name this state transition is triggered by.
if (Message.MessageType.RELAYED_MESSAGE.name().equals(_message.getMsgSubType())) {
_currentStateDelta.setTriggerHost(partitionKey, _message.getRelaySrcHost());
} else {
_currentStateDelta.setTriggerHost(partitionKey, _message.getMsgSrc());
}
if (taskResult.isSuccess()) {
// String fromState = message.getFromState();
String toState = _message.getToState();
_currentStateDelta.setState(partitionKey, toState);
if (toState.equalsIgnoreCase(HelixDefinedState.DROPPED.toString())) {
// for "OnOfflineToDROPPED" message, we need to remove the resource key record
// from the current state of the instance because the resource key is dropped.
// In the state model it will be stayed as "OFFLINE", which is OK.
ZNRecord rec = new ZNRecord(_currentStateDelta.getId());
rec.getMapFields().put(partitionKey, null);
ZNRecordDelta delta = new ZNRecordDelta(rec, MergeOperation.SUBTRACT);
List<ZNRecordDelta> deltaList = new ArrayList<ZNRecordDelta>();
deltaList.add(delta);
_currentStateDelta.setDeltaList(deltaList);
_stateModelFactory.removeStateModel(resource, partitionKey);
} else {
// if the partition is not to be dropped, update _stateModel to the TO_STATE
_stateModel.updateState(toState);
}
} else if (taskResult.isCancelled()) {
// Cancelled message does not need current state update
return;
} else {
if (exception instanceof HelixStateMismatchException) {
// if fromState mismatch, set current state on zk to stateModel's current state
logger.warn("Force CurrentState on Zk to be stateModel's CurrentState. partitionKey: " + partitionKey + ", currentState: " + _stateModel.getCurrentState() + ", message: " + _message);
_currentStateDelta.setState(partitionKey, _stateModel.getCurrentState());
} else {
StateTransitionError error = new StateTransitionError(ErrorType.INTERNAL, ErrorCode.ERROR, exception);
if (exception instanceof InterruptedException) {
if (_isTimeout) {
error = new StateTransitionError(ErrorType.INTERNAL, ErrorCode.TIMEOUT, exception);
} else {
// State transition interrupted but not caused by timeout. Keep the current
// state in this case
logger.error("State transition interrupted but not timeout. Not updating state. Partition : " + _message.getPartitionName() + " MsgId : " + _message.getMsgId());
return;
}
}
_stateModel.rollbackOnError(_message, _notificationContext, error);
_currentStateDelta.setState(partitionKey, HelixDefinedState.ERROR.toString());
_stateModel.updateState(HelixDefinedState.ERROR.toString());
// if we have errors transit from ERROR state, disable the partition
if (_message.getFromState().equalsIgnoreCase(HelixDefinedState.ERROR.toString())) {
disablePartition();
}
}
}
try {
// Update the ZK current state of the node
PropertyKey key = keyBuilder.currentState(instanceName, sessionId, resource, bucketizer.getBucketName(partitionKey));
if (_message.getAttribute(Attributes.PARENT_MSG_ID) == null) {
// normal message
if (!accessor.updateProperty(key, _currentStateDelta)) {
throw new HelixException("Fails to persist current state back to ZK for resource " + resource + " partition: " + _message.getPartitionName());
}
} else {
// sub-message of a batch message
ConcurrentHashMap<String, CurrentStateUpdate> csUpdateMap = (ConcurrentHashMap<String, CurrentStateUpdate>) _notificationContext.get(MapKey.CURRENT_STATE_UPDATE.toString());
csUpdateMap.put(partitionKey, new CurrentStateUpdate(key, _currentStateDelta));
}
} catch (Exception e) {
logger.error("Error when updating current-state ", e);
StateTransitionError error = new StateTransitionError(ErrorType.FRAMEWORK, ErrorCode.ERROR, e);
_stateModel.rollbackOnError(_message, _notificationContext, error);
_statusUpdateUtil.logError(_message, HelixStateTransitionHandler.class, e, "Error when update current-state ", _manager);
}
}
Aggregations