use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class ZKHelixAdmin method resetPartition.
@Override
public void resetPartition(String clusterName, String instanceName, String resourceName, List<String> partitionNames) {
HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient));
Builder keyBuilder = accessor.keyBuilder();
// check the instance is alive
LiveInstance liveInstance = accessor.getProperty(keyBuilder.liveInstance(instanceName));
if (liveInstance == null) {
throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because " + instanceName + " is not alive");
}
// check resource group exists
IdealState idealState = accessor.getProperty(keyBuilder.idealStates(resourceName));
if (idealState == null) {
throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because " + resourceName + " is not added");
}
// check partition exists in resource group
Set<String> resetPartitionNames = new HashSet<String>(partitionNames);
if (idealState.getRebalanceMode() == RebalanceMode.CUSTOMIZED) {
Set<String> partitions = new HashSet<String>(idealState.getRecord().getMapFields().keySet());
if (!partitions.containsAll(resetPartitionNames)) {
throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because not all " + partitionNames + " exist");
}
} else {
Set<String> partitions = new HashSet<String>(idealState.getRecord().getListFields().keySet());
if (!partitions.containsAll(resetPartitionNames)) {
throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because not all " + partitionNames + " exist");
}
}
// check partition is in ERROR state
String sessionId = liveInstance.getSessionId();
CurrentState curState = accessor.getProperty(keyBuilder.currentState(instanceName, sessionId, resourceName));
for (String partitionName : resetPartitionNames) {
if (!curState.getState(partitionName).equals(HelixDefinedState.ERROR.toString())) {
throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because not all " + partitionNames + " are in ERROR state");
}
}
// check stateModelDef exists and get initial state
String stateModelDef = idealState.getStateModelDefRef();
StateModelDefinition stateModel = accessor.getProperty(keyBuilder.stateModelDef(stateModelDef));
if (stateModel == null) {
throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because " + stateModelDef + " is NOT found");
}
// check there is no pending messages for the partitions exist
List<Message> messages = accessor.getChildValues(keyBuilder.messages(instanceName));
for (Message message : messages) {
if (!MessageType.STATE_TRANSITION.name().equalsIgnoreCase(message.getMsgType()) || !sessionId.equals(message.getTgtSessionId()) || !resourceName.equals(message.getResourceName()) || !resetPartitionNames.contains(message.getPartitionName())) {
continue;
}
throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because a pending message exists: " + message);
}
String adminName = null;
try {
adminName = InetAddress.getLocalHost().getCanonicalHostName() + "-ADMIN";
} catch (UnknownHostException e) {
// can ignore it
logger.info("Unable to get host name. Will set it to UNKNOWN, mostly ignorable", e);
adminName = "UNKNOWN";
}
List<Message> resetMessages = new ArrayList<Message>();
List<PropertyKey> messageKeys = new ArrayList<PropertyKey>();
for (String partitionName : resetPartitionNames) {
// send ERROR to initialState message
String msgId = UUID.randomUUID().toString();
Message message = new Message(MessageType.STATE_TRANSITION, msgId);
message.setSrcName(adminName);
message.setTgtName(instanceName);
message.setMsgState(MessageState.NEW);
message.setPartitionName(partitionName);
message.setResourceName(resourceName);
message.setTgtSessionId(sessionId);
message.setStateModelDef(stateModelDef);
message.setFromState(HelixDefinedState.ERROR.toString());
message.setToState(stateModel.getInitialState());
message.setStateModelFactoryName(idealState.getStateModelFactoryName());
if (idealState.getResourceGroupName() != null) {
message.setResourceGroupName(idealState.getResourceGroupName());
}
if (idealState.getInstanceGroupTag() != null) {
message.setResourceTag(idealState.getInstanceGroupTag());
}
resetMessages.add(message);
messageKeys.add(keyBuilder.message(instanceName, message.getId()));
}
accessor.setChildren(messageKeys, resetMessages);
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class ZKHelixAdmin method setInstanceConfig.
@Override
public boolean setInstanceConfig(String clusterName, String instanceName, InstanceConfig newInstanceConfig) {
String instanceConfigPath = PropertyPathBuilder.getPath(PropertyType.CONFIGS, clusterName, HelixConfigScope.ConfigScopeProperty.PARTICIPANT.toString(), instanceName);
if (!_zkClient.exists(instanceConfigPath)) {
throw new HelixException("instance" + instanceName + " does not exist in cluster " + clusterName);
}
HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient));
PropertyKey instanceConfigPropertyKey = accessor.keyBuilder().instanceConfig(instanceName);
InstanceConfig currentInstanceConfig = accessor.getProperty(instanceConfigPropertyKey);
if (!newInstanceConfig.getHostName().equals(currentInstanceConfig.getHostName()) || !newInstanceConfig.getPort().equals(currentInstanceConfig.getPort())) {
throw new HelixException("Hostname and port cannot be changed, current hostname: " + currentInstanceConfig.getHostName() + " and port: " + currentInstanceConfig.getPort() + " is different from new hostname: " + newInstanceConfig.getHostName() + "and new port: " + newInstanceConfig.getPort());
}
return accessor.setProperty(instanceConfigPropertyKey, newInstanceConfig);
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class ZKHelixAdmin method addInstanceTag.
@Override
public void addInstanceTag(String clusterName, String instanceName, String tag) {
if (!ZKUtil.isClusterSetup(clusterName, _zkClient)) {
throw new HelixException("cluster " + clusterName + " is not setup yet");
}
if (!ZKUtil.isInstanceSetup(_zkClient, clusterName, instanceName, InstanceType.PARTICIPANT)) {
throw new HelixException("cluster " + clusterName + " instance " + instanceName + " is not setup yet");
}
HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient));
Builder keyBuilder = accessor.keyBuilder();
InstanceConfig config = accessor.getProperty(keyBuilder.instanceConfig(instanceName));
config.addTag(tag);
accessor.setProperty(keyBuilder.instanceConfig(instanceName), config);
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class HelixStateTransitionHandler method preHandleMessage.
void preHandleMessage() throws Exception {
if (!_message.isValid()) {
String errorMessage = "Invalid Message, ensure that message: " + _message + " has all the required fields: " + Arrays.toString(Message.Attributes.values());
_statusUpdateUtil.logError(_message, HelixStateTransitionHandler.class, errorMessage, _manager);
logger.error(errorMessage);
throw new HelixException(errorMessage);
}
logger.info("handling message: " + _message.getMsgId() + " transit " + _message.getResourceName() + "." + _message.getPartitionName() + "|" + _message.getPartitionNames() + " from:" + _message.getFromState() + " to:" + _message.getToState() + ", relayedFrom: " + _message.getRelaySrcHost());
HelixDataAccessor accessor = _manager.getHelixDataAccessor();
String partitionName = _message.getPartitionName();
String fromState = _message.getFromState();
// Verify the fromState and current state of the stateModel
String state = _currentStateDelta.getState(partitionName);
// Set start time right before invoke client logic
_currentStateDelta.setStartTime(_message.getPartitionName(), System.currentTimeMillis());
if (fromState != null && !fromState.equals("*") && !fromState.equalsIgnoreCase(state)) {
String errorMessage = "Current state of stateModel does not match the fromState in Message" + ", Current State:" + state + ", message expected:" + fromState + ", partition: " + partitionName + ", from: " + _message.getMsgSrc() + ", to: " + _message.getTgtName();
_statusUpdateUtil.logError(_message, HelixStateTransitionHandler.class, errorMessage, _manager);
logger.error(errorMessage);
throw new HelixStateMismatchException(errorMessage);
}
// Reset the REQUESTED_STATE property if it exists.
try {
String instance = _manager.getInstanceName();
String sessionId = _message.getTgtSessionId();
String resource = _message.getResourceName();
ZNRecordBucketizer bucketizer = new ZNRecordBucketizer(_message.getBucketSize());
PropertyKey key = accessor.keyBuilder().currentState(instance, sessionId, resource, bucketizer.getBucketName(partitionName));
ZNRecord rec = new ZNRecord(resource);
Map<String, String> map = new TreeMap<String, String>();
map.put(CurrentState.CurrentStateProperty.REQUESTED_STATE.name(), null);
rec.getMapFields().put(partitionName, map);
ZNRecordDelta delta = new ZNRecordDelta(rec, ZNRecordDelta.MergeOperation.SUBTRACT);
List<ZNRecordDelta> deltaList = new ArrayList<ZNRecordDelta>();
deltaList.add(delta);
CurrentState currStateUpdate = new CurrentState(resource);
currStateUpdate.setDeltaList(deltaList);
// Update the ZK current state of the node
if (!accessor.updateProperty(key, currStateUpdate)) {
logger.error("Fails to persist current state back to ZK for resource " + resource + " partition: " + partitionName);
}
} catch (Exception e) {
logger.error("Error when removing " + CurrentState.CurrentStateProperty.REQUESTED_STATE.name() + " from current state.", e);
StateTransitionError error = new StateTransitionError(ErrorType.FRAMEWORK, ErrorCode.ERROR, e);
_stateModel.rollbackOnError(_message, _notificationContext, error);
_statusUpdateUtil.logError(_message, HelixStateTransitionHandler.class, e, "Error when removing " + CurrentState.CurrentStateProperty.REQUESTED_STATE.name() + " from current state.", _manager);
}
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class HelixTask method call.
@Override
public HelixTaskResult call() {
HelixTaskResult taskResult = null;
ErrorType type = null;
ErrorCode code = null;
long start = System.currentTimeMillis();
logger.info("handling task: " + getTaskId() + " begin, at: " + start);
HelixDataAccessor accessor = _manager.getHelixDataAccessor();
_statusUpdateUtil.logInfo(_message, HelixTask.class, "Message handling task begin execute", _manager);
_message.setExecuteStartTimeStamp(new Date().getTime());
// partitionName -> csUpdate
if (_message.getBatchMessageMode() == true) {
_notificationContext.add(MapKey.CURRENT_STATE_UPDATE.toString(), new ConcurrentHashMap<String, CurrentStateUpdate>());
}
// Handle the message
try {
setStarted();
taskResult = _handler.handleMessage();
} catch (InterruptedException e) {
taskResult = new HelixTaskResult();
taskResult.setException(e);
taskResult.setInterrupted(true);
_statusUpdateUtil.logError(_message, HelixTask.class, e, "State transition interrupted, timeout:" + _isTimeout, _manager);
logger.info("Message " + _message.getMsgId() + " is interrupted");
} catch (Exception e) {
taskResult = new HelixTaskResult();
taskResult.setException(e);
taskResult.setMessage(e.getMessage());
String errorMessage = "Exception while executing a message. " + e + " msgId: " + _message.getMsgId() + " type: " + _message.getMsgType();
logger.error(errorMessage, e);
_statusUpdateUtil.logError(_message, HelixTask.class, e, errorMessage, _manager);
}
// cancel timeout task
_executor.cancelTimeoutTask(this);
Exception exception = null;
try {
if (taskResult.isSuccess()) {
_statusUpdateUtil.logInfo(_message, _handler.getClass(), "Message handling task completed successfully", _manager);
logger.info("Message " + _message.getMsgId() + " completed.");
_executor.getParticipantMonitor().reportProcessedMessage(_message, ParticipantMessageMonitor.ProcessedMessageState.COMPLETED);
} else {
type = ErrorType.INTERNAL;
if (taskResult.isInterrupted()) {
logger.info("Message " + _message.getMsgId() + " is interrupted");
code = _isTimeout ? ErrorCode.TIMEOUT : ErrorCode.CANCEL;
if (_isTimeout) {
int retryCount = _message.getRetryCount();
logger.info("Message timeout, retry count: " + retryCount + " msgId:" + _message.getMsgId());
_statusUpdateUtil.logInfo(_message, _handler.getClass(), "Message handling task timeout, retryCount:" + retryCount, _manager);
// we should retry the execution of the message by re-schedule it in
if (retryCount > 0) {
_message.setRetryCount(retryCount - 1);
HelixTask task = new HelixTask(_message, _notificationContext, _handler, _executor);
_executor.scheduleTask(task);
return taskResult;
}
}
_executor.getParticipantMonitor().reportProcessedMessage(_message, ParticipantMessageMonitor.ProcessedMessageState.DISCARDED);
} else if (taskResult.isCancelled()) {
type = null;
_statusUpdateUtil.logInfo(_message, _handler.getClass(), "Cancellation completed successfully", _manager);
_executor.getParticipantMonitor().reportProcessedMessage(_message, ParticipantMessageMonitor.ProcessedMessageState.DISCARDED);
} else {
// logging for errors
code = ErrorCode.ERROR;
String errorMsg = "Message execution failed. msgId: " + getTaskId() + ", errorMsg: " + taskResult.getMessage();
logger.error(errorMsg);
_statusUpdateUtil.logError(_message, _handler.getClass(), errorMsg, _manager);
_executor.getParticipantMonitor().reportProcessedMessage(_message, ParticipantMessageMonitor.ProcessedMessageState.FAILED);
}
}
// forward relay messages attached to this message to other participants
if (taskResult.isSuccess()) {
try {
forwardRelayMessages(accessor, _message, taskResult.getCompleteTime());
} catch (Exception e) {
// Fail to send relay message should not result in a task execution failure
// Currently we don't log error to ZK to reduce writes as when accessor throws
// exception, ZK might not be in good condition.
logger.warn("Failed to send relay messages.", e);
}
}
if (_message.getAttribute(Attributes.PARENT_MSG_ID) == null) {
removeMessageFromZk(accessor, _message);
reportMessageStat(_manager, _message, taskResult);
sendReply(accessor, _message, taskResult);
_executor.finishTask(this);
}
} catch (Exception e) {
exception = e;
type = ErrorType.FRAMEWORK;
code = ErrorCode.ERROR;
String errorMessage = "Exception after executing a message, msgId: " + _message.getMsgId() + e;
logger.error(errorMessage, e);
_statusUpdateUtil.logError(_message, HelixTask.class, errorMessage, _manager);
} finally {
long end = System.currentTimeMillis();
logger.info("msg: " + _message.getMsgId() + " handling task completed, results:" + taskResult.isSuccess() + ", at: " + end + ", took:" + (end - start));
// the handler have chance to finally cleanup
if (type == ErrorType.INTERNAL) {
_handler.onError(taskResult.getException(), code, type);
} else if (type == ErrorType.FRAMEWORK) {
_handler.onError(exception, code, type);
}
}
return taskResult;
}
Aggregations