Search in sources :

Example 51 with HelixDataAccessor

use of org.apache.helix.HelixDataAccessor in project helix by apache.

the class ZKHelixAdmin method resetPartition.

@Override
public void resetPartition(String clusterName, String instanceName, String resourceName, List<String> partitionNames) {
    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient));
    Builder keyBuilder = accessor.keyBuilder();
    // check the instance is alive
    LiveInstance liveInstance = accessor.getProperty(keyBuilder.liveInstance(instanceName));
    if (liveInstance == null) {
        throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because " + instanceName + " is not alive");
    }
    // check resource group exists
    IdealState idealState = accessor.getProperty(keyBuilder.idealStates(resourceName));
    if (idealState == null) {
        throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because " + resourceName + " is not added");
    }
    // check partition exists in resource group
    Set<String> resetPartitionNames = new HashSet<String>(partitionNames);
    if (idealState.getRebalanceMode() == RebalanceMode.CUSTOMIZED) {
        Set<String> partitions = new HashSet<String>(idealState.getRecord().getMapFields().keySet());
        if (!partitions.containsAll(resetPartitionNames)) {
            throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because not all " + partitionNames + " exist");
        }
    } else {
        Set<String> partitions = new HashSet<String>(idealState.getRecord().getListFields().keySet());
        if (!partitions.containsAll(resetPartitionNames)) {
            throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because not all " + partitionNames + " exist");
        }
    }
    // check partition is in ERROR state
    String sessionId = liveInstance.getSessionId();
    CurrentState curState = accessor.getProperty(keyBuilder.currentState(instanceName, sessionId, resourceName));
    for (String partitionName : resetPartitionNames) {
        if (!curState.getState(partitionName).equals(HelixDefinedState.ERROR.toString())) {
            throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because not all " + partitionNames + " are in ERROR state");
        }
    }
    // check stateModelDef exists and get initial state
    String stateModelDef = idealState.getStateModelDefRef();
    StateModelDefinition stateModel = accessor.getProperty(keyBuilder.stateModelDef(stateModelDef));
    if (stateModel == null) {
        throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because " + stateModelDef + " is NOT found");
    }
    // check there is no pending messages for the partitions exist
    List<Message> messages = accessor.getChildValues(keyBuilder.messages(instanceName));
    for (Message message : messages) {
        if (!MessageType.STATE_TRANSITION.name().equalsIgnoreCase(message.getMsgType()) || !sessionId.equals(message.getTgtSessionId()) || !resourceName.equals(message.getResourceName()) || !resetPartitionNames.contains(message.getPartitionName())) {
            continue;
        }
        throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because a pending message exists: " + message);
    }
    String adminName = null;
    try {
        adminName = InetAddress.getLocalHost().getCanonicalHostName() + "-ADMIN";
    } catch (UnknownHostException e) {
        // can ignore it
        logger.info("Unable to get host name. Will set it to UNKNOWN, mostly ignorable", e);
        adminName = "UNKNOWN";
    }
    List<Message> resetMessages = new ArrayList<Message>();
    List<PropertyKey> messageKeys = new ArrayList<PropertyKey>();
    for (String partitionName : resetPartitionNames) {
        // send ERROR to initialState message
        String msgId = UUID.randomUUID().toString();
        Message message = new Message(MessageType.STATE_TRANSITION, msgId);
        message.setSrcName(adminName);
        message.setTgtName(instanceName);
        message.setMsgState(MessageState.NEW);
        message.setPartitionName(partitionName);
        message.setResourceName(resourceName);
        message.setTgtSessionId(sessionId);
        message.setStateModelDef(stateModelDef);
        message.setFromState(HelixDefinedState.ERROR.toString());
        message.setToState(stateModel.getInitialState());
        message.setStateModelFactoryName(idealState.getStateModelFactoryName());
        if (idealState.getResourceGroupName() != null) {
            message.setResourceGroupName(idealState.getResourceGroupName());
        }
        if (idealState.getInstanceGroupTag() != null) {
            message.setResourceTag(idealState.getInstanceGroupTag());
        }
        resetMessages.add(message);
        messageKeys.add(keyBuilder.message(instanceName, message.getId()));
    }
    accessor.setChildren(messageKeys, resetMessages);
}
Also used : Message(org.apache.helix.model.Message) UnknownHostException(java.net.UnknownHostException) Builder(org.apache.helix.PropertyKey.Builder) PropertyPathBuilder(org.apache.helix.PropertyPathBuilder) ArrayList(java.util.ArrayList) IdealState(org.apache.helix.model.IdealState) HelixException(org.apache.helix.HelixException) HelixDataAccessor(org.apache.helix.HelixDataAccessor) LiveInstance(org.apache.helix.model.LiveInstance) StateModelDefinition(org.apache.helix.model.StateModelDefinition) CurrentState(org.apache.helix.model.CurrentState) ZNRecord(org.apache.helix.ZNRecord) PropertyKey(org.apache.helix.PropertyKey) HashSet(java.util.HashSet)

Example 52 with HelixDataAccessor

use of org.apache.helix.HelixDataAccessor in project helix by apache.

the class ZKHelixAdmin method setInstanceConfig.

@Override
public boolean setInstanceConfig(String clusterName, String instanceName, InstanceConfig newInstanceConfig) {
    String instanceConfigPath = PropertyPathBuilder.getPath(PropertyType.CONFIGS, clusterName, HelixConfigScope.ConfigScopeProperty.PARTICIPANT.toString(), instanceName);
    if (!_zkClient.exists(instanceConfigPath)) {
        throw new HelixException("instance" + instanceName + " does not exist in cluster " + clusterName);
    }
    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient));
    PropertyKey instanceConfigPropertyKey = accessor.keyBuilder().instanceConfig(instanceName);
    InstanceConfig currentInstanceConfig = accessor.getProperty(instanceConfigPropertyKey);
    if (!newInstanceConfig.getHostName().equals(currentInstanceConfig.getHostName()) || !newInstanceConfig.getPort().equals(currentInstanceConfig.getPort())) {
        throw new HelixException("Hostname and port cannot be changed, current hostname: " + currentInstanceConfig.getHostName() + " and port: " + currentInstanceConfig.getPort() + " is different from new hostname: " + newInstanceConfig.getHostName() + "and new port: " + newInstanceConfig.getPort());
    }
    return accessor.setProperty(instanceConfigPropertyKey, newInstanceConfig);
}
Also used : HelixException(org.apache.helix.HelixException) HelixDataAccessor(org.apache.helix.HelixDataAccessor) InstanceConfig(org.apache.helix.model.InstanceConfig) ZNRecord(org.apache.helix.ZNRecord) PropertyKey(org.apache.helix.PropertyKey)

Example 53 with HelixDataAccessor

use of org.apache.helix.HelixDataAccessor in project helix by apache.

the class ZKHelixAdmin method addInstanceTag.

@Override
public void addInstanceTag(String clusterName, String instanceName, String tag) {
    if (!ZKUtil.isClusterSetup(clusterName, _zkClient)) {
        throw new HelixException("cluster " + clusterName + " is not setup yet");
    }
    if (!ZKUtil.isInstanceSetup(_zkClient, clusterName, instanceName, InstanceType.PARTICIPANT)) {
        throw new HelixException("cluster " + clusterName + " instance " + instanceName + " is not setup yet");
    }
    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient));
    Builder keyBuilder = accessor.keyBuilder();
    InstanceConfig config = accessor.getProperty(keyBuilder.instanceConfig(instanceName));
    config.addTag(tag);
    accessor.setProperty(keyBuilder.instanceConfig(instanceName), config);
}
Also used : HelixException(org.apache.helix.HelixException) HelixDataAccessor(org.apache.helix.HelixDataAccessor) InstanceConfig(org.apache.helix.model.InstanceConfig) Builder(org.apache.helix.PropertyKey.Builder) PropertyPathBuilder(org.apache.helix.PropertyPathBuilder) ZNRecord(org.apache.helix.ZNRecord)

Example 54 with HelixDataAccessor

use of org.apache.helix.HelixDataAccessor in project helix by apache.

the class HelixStateTransitionHandler method preHandleMessage.

void preHandleMessage() throws Exception {
    if (!_message.isValid()) {
        String errorMessage = "Invalid Message, ensure that message: " + _message + " has all the required fields: " + Arrays.toString(Message.Attributes.values());
        _statusUpdateUtil.logError(_message, HelixStateTransitionHandler.class, errorMessage, _manager);
        logger.error(errorMessage);
        throw new HelixException(errorMessage);
    }
    logger.info("handling message: " + _message.getMsgId() + " transit " + _message.getResourceName() + "." + _message.getPartitionName() + "|" + _message.getPartitionNames() + " from:" + _message.getFromState() + " to:" + _message.getToState() + ", relayedFrom: " + _message.getRelaySrcHost());
    HelixDataAccessor accessor = _manager.getHelixDataAccessor();
    String partitionName = _message.getPartitionName();
    String fromState = _message.getFromState();
    // Verify the fromState and current state of the stateModel
    String state = _currentStateDelta.getState(partitionName);
    // Set start time right before invoke client logic
    _currentStateDelta.setStartTime(_message.getPartitionName(), System.currentTimeMillis());
    if (fromState != null && !fromState.equals("*") && !fromState.equalsIgnoreCase(state)) {
        String errorMessage = "Current state of stateModel does not match the fromState in Message" + ", Current State:" + state + ", message expected:" + fromState + ", partition: " + partitionName + ", from: " + _message.getMsgSrc() + ", to: " + _message.getTgtName();
        _statusUpdateUtil.logError(_message, HelixStateTransitionHandler.class, errorMessage, _manager);
        logger.error(errorMessage);
        throw new HelixStateMismatchException(errorMessage);
    }
    // Reset the REQUESTED_STATE property if it exists.
    try {
        String instance = _manager.getInstanceName();
        String sessionId = _message.getTgtSessionId();
        String resource = _message.getResourceName();
        ZNRecordBucketizer bucketizer = new ZNRecordBucketizer(_message.getBucketSize());
        PropertyKey key = accessor.keyBuilder().currentState(instance, sessionId, resource, bucketizer.getBucketName(partitionName));
        ZNRecord rec = new ZNRecord(resource);
        Map<String, String> map = new TreeMap<String, String>();
        map.put(CurrentState.CurrentStateProperty.REQUESTED_STATE.name(), null);
        rec.getMapFields().put(partitionName, map);
        ZNRecordDelta delta = new ZNRecordDelta(rec, ZNRecordDelta.MergeOperation.SUBTRACT);
        List<ZNRecordDelta> deltaList = new ArrayList<ZNRecordDelta>();
        deltaList.add(delta);
        CurrentState currStateUpdate = new CurrentState(resource);
        currStateUpdate.setDeltaList(deltaList);
        // Update the ZK current state of the node
        if (!accessor.updateProperty(key, currStateUpdate)) {
            logger.error("Fails to persist current state back to ZK for resource " + resource + " partition: " + partitionName);
        }
    } catch (Exception e) {
        logger.error("Error when removing " + CurrentState.CurrentStateProperty.REQUESTED_STATE.name() + " from current state.", e);
        StateTransitionError error = new StateTransitionError(ErrorType.FRAMEWORK, ErrorCode.ERROR, e);
        _stateModel.rollbackOnError(_message, _notificationContext, error);
        _statusUpdateUtil.logError(_message, HelixStateTransitionHandler.class, e, "Error when removing " + CurrentState.CurrentStateProperty.REQUESTED_STATE.name() + " from current state.", _manager);
    }
}
Also used : ZNRecordBucketizer(org.apache.helix.ZNRecordBucketizer) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) ZNRecordDelta(org.apache.helix.ZNRecordDelta) HelixException(org.apache.helix.HelixException) HelixRollbackException(org.apache.helix.HelixRollbackException) InvocationTargetException(java.lang.reflect.InvocationTargetException) HelixException(org.apache.helix.HelixException) HelixDataAccessor(org.apache.helix.HelixDataAccessor) CurrentState(org.apache.helix.model.CurrentState) StateTransitionError(org.apache.helix.participant.statemachine.StateTransitionError) PropertyKey(org.apache.helix.PropertyKey) ZNRecord(org.apache.helix.ZNRecord)

Example 55 with HelixDataAccessor

use of org.apache.helix.HelixDataAccessor in project helix by apache.

the class HelixTask method call.

@Override
public HelixTaskResult call() {
    HelixTaskResult taskResult = null;
    ErrorType type = null;
    ErrorCode code = null;
    long start = System.currentTimeMillis();
    logger.info("handling task: " + getTaskId() + " begin, at: " + start);
    HelixDataAccessor accessor = _manager.getHelixDataAccessor();
    _statusUpdateUtil.logInfo(_message, HelixTask.class, "Message handling task begin execute", _manager);
    _message.setExecuteStartTimeStamp(new Date().getTime());
    // partitionName -> csUpdate
    if (_message.getBatchMessageMode() == true) {
        _notificationContext.add(MapKey.CURRENT_STATE_UPDATE.toString(), new ConcurrentHashMap<String, CurrentStateUpdate>());
    }
    // Handle the message
    try {
        setStarted();
        taskResult = _handler.handleMessage();
    } catch (InterruptedException e) {
        taskResult = new HelixTaskResult();
        taskResult.setException(e);
        taskResult.setInterrupted(true);
        _statusUpdateUtil.logError(_message, HelixTask.class, e, "State transition interrupted, timeout:" + _isTimeout, _manager);
        logger.info("Message " + _message.getMsgId() + " is interrupted");
    } catch (Exception e) {
        taskResult = new HelixTaskResult();
        taskResult.setException(e);
        taskResult.setMessage(e.getMessage());
        String errorMessage = "Exception while executing a message. " + e + " msgId: " + _message.getMsgId() + " type: " + _message.getMsgType();
        logger.error(errorMessage, e);
        _statusUpdateUtil.logError(_message, HelixTask.class, e, errorMessage, _manager);
    }
    // cancel timeout task
    _executor.cancelTimeoutTask(this);
    Exception exception = null;
    try {
        if (taskResult.isSuccess()) {
            _statusUpdateUtil.logInfo(_message, _handler.getClass(), "Message handling task completed successfully", _manager);
            logger.info("Message " + _message.getMsgId() + " completed.");
            _executor.getParticipantMonitor().reportProcessedMessage(_message, ParticipantMessageMonitor.ProcessedMessageState.COMPLETED);
        } else {
            type = ErrorType.INTERNAL;
            if (taskResult.isInterrupted()) {
                logger.info("Message " + _message.getMsgId() + " is interrupted");
                code = _isTimeout ? ErrorCode.TIMEOUT : ErrorCode.CANCEL;
                if (_isTimeout) {
                    int retryCount = _message.getRetryCount();
                    logger.info("Message timeout, retry count: " + retryCount + " msgId:" + _message.getMsgId());
                    _statusUpdateUtil.logInfo(_message, _handler.getClass(), "Message handling task timeout, retryCount:" + retryCount, _manager);
                    // we should retry the execution of the message by re-schedule it in
                    if (retryCount > 0) {
                        _message.setRetryCount(retryCount - 1);
                        HelixTask task = new HelixTask(_message, _notificationContext, _handler, _executor);
                        _executor.scheduleTask(task);
                        return taskResult;
                    }
                }
                _executor.getParticipantMonitor().reportProcessedMessage(_message, ParticipantMessageMonitor.ProcessedMessageState.DISCARDED);
            } else if (taskResult.isCancelled()) {
                type = null;
                _statusUpdateUtil.logInfo(_message, _handler.getClass(), "Cancellation completed successfully", _manager);
                _executor.getParticipantMonitor().reportProcessedMessage(_message, ParticipantMessageMonitor.ProcessedMessageState.DISCARDED);
            } else {
                // logging for errors
                code = ErrorCode.ERROR;
                String errorMsg = "Message execution failed. msgId: " + getTaskId() + ", errorMsg: " + taskResult.getMessage();
                logger.error(errorMsg);
                _statusUpdateUtil.logError(_message, _handler.getClass(), errorMsg, _manager);
                _executor.getParticipantMonitor().reportProcessedMessage(_message, ParticipantMessageMonitor.ProcessedMessageState.FAILED);
            }
        }
        // forward relay messages attached to this message to other participants
        if (taskResult.isSuccess()) {
            try {
                forwardRelayMessages(accessor, _message, taskResult.getCompleteTime());
            } catch (Exception e) {
                // Fail to send relay message should not result in a task execution failure
                // Currently we don't log error to ZK to reduce writes as when accessor throws
                // exception, ZK might not be in good condition.
                logger.warn("Failed to send relay messages.", e);
            }
        }
        if (_message.getAttribute(Attributes.PARENT_MSG_ID) == null) {
            removeMessageFromZk(accessor, _message);
            reportMessageStat(_manager, _message, taskResult);
            sendReply(accessor, _message, taskResult);
            _executor.finishTask(this);
        }
    } catch (Exception e) {
        exception = e;
        type = ErrorType.FRAMEWORK;
        code = ErrorCode.ERROR;
        String errorMessage = "Exception after executing a message, msgId: " + _message.getMsgId() + e;
        logger.error(errorMessage, e);
        _statusUpdateUtil.logError(_message, HelixTask.class, errorMessage, _manager);
    } finally {
        long end = System.currentTimeMillis();
        logger.info("msg: " + _message.getMsgId() + " handling task completed, results:" + taskResult.isSuccess() + ", at: " + end + ", took:" + (end - start));
        // the handler have chance to finally cleanup
        if (type == ErrorType.INTERNAL) {
            _handler.onError(taskResult.getException(), code, type);
        } else if (type == ErrorType.FRAMEWORK) {
            _handler.onError(exception, code, type);
        }
    }
    return taskResult;
}
Also used : Date(java.util.Date) HelixRollbackException(org.apache.helix.HelixRollbackException) StateTransitionDataPoint(org.apache.helix.monitoring.StateTransitionDataPoint) HelixDataAccessor(org.apache.helix.HelixDataAccessor) ErrorType(org.apache.helix.messaging.handling.MessageHandler.ErrorType) ErrorCode(org.apache.helix.messaging.handling.MessageHandler.ErrorCode)

Aggregations

HelixDataAccessor (org.apache.helix.HelixDataAccessor)173 ZNRecord (org.apache.helix.ZNRecord)91 PropertyKey (org.apache.helix.PropertyKey)69 Test (org.testng.annotations.Test)67 Builder (org.apache.helix.PropertyKey.Builder)59 ZKHelixDataAccessor (org.apache.helix.manager.zk.ZKHelixDataAccessor)40 Date (java.util.Date)39 HelixManager (org.apache.helix.HelixManager)35 IdealState (org.apache.helix.model.IdealState)33 LiveInstance (org.apache.helix.model.LiveInstance)31 HashMap (java.util.HashMap)30 MockParticipantManager (org.apache.helix.integration.manager.MockParticipantManager)30 Message (org.apache.helix.model.Message)30 ArrayList (java.util.ArrayList)28 ExternalView (org.apache.helix.model.ExternalView)26 PropertyPathBuilder (org.apache.helix.PropertyPathBuilder)25 Map (java.util.Map)19 HelixException (org.apache.helix.HelixException)19 ClusterControllerManager (org.apache.helix.integration.manager.ClusterControllerManager)19 InstanceConfig (org.apache.helix.model.InstanceConfig)17