Search in sources :

Example 26 with Message

use of org.apache.helix.model.Message in project helix by apache.

the class TestZKCallback method testInvocation.

@Test()
public void testInvocation() throws Exception {
    HelixManager testHelixManager = HelixManagerFactory.getZKHelixManager(clusterName, "localhost_8900", InstanceType.PARTICIPANT, ZK_ADDR);
    testHelixManager.connect();
    TestZKCallback test = new TestZKCallback();
    TestZKCallback.TestCallbackListener testListener = test.new TestCallbackListener();
    testHelixManager.addMessageListener(testListener, "localhost_8900");
    testHelixManager.addCurrentStateChangeListener(testListener, "localhost_8900", testHelixManager.getSessionId());
    testHelixManager.addConfigChangeListener(testListener);
    testHelixManager.addIdealStateChangeListener(testListener);
    testHelixManager.addExternalViewChangeListener(testListener);
    testHelixManager.addLiveInstanceChangeListener(testListener);
    // Initial add listener should trigger the first execution of the
    // listener callbacks
    AssertJUnit.assertTrue(testListener.configChangeReceived & testListener.currentStateChangeReceived & testListener.externalViewChangeReceived & testListener.idealStateChangeReceived & testListener.liveInstanceChangeReceived & testListener.messageChangeReceived);
    testListener.Reset();
    HelixDataAccessor accessor = testHelixManager.getHelixDataAccessor();
    Builder keyBuilder = accessor.keyBuilder();
    ExternalView extView = new ExternalView("db-12345");
    accessor.setProperty(keyBuilder.externalView("db-12345"), extView);
    Thread.sleep(100);
    AssertJUnit.assertTrue(testListener.externalViewChangeReceived);
    testListener.Reset();
    CurrentState curState = new CurrentState("db-12345");
    curState.setSessionId("sessionId");
    curState.setStateModelDefRef("StateModelDef");
    accessor.setProperty(keyBuilder.currentState("localhost_8900", testHelixManager.getSessionId(), curState.getId()), curState);
    Thread.sleep(100);
    AssertJUnit.assertTrue(testListener.currentStateChangeReceived);
    testListener.Reset();
    IdealState idealState = new IdealState("db-1234");
    idealState.setNumPartitions(400);
    idealState.setReplicas(Integer.toString(2));
    idealState.setStateModelDefRef("StateModeldef");
    accessor.setProperty(keyBuilder.idealStates("db-1234"), idealState);
    Thread.sleep(100);
    AssertJUnit.assertTrue(testListener.idealStateChangeReceived);
    testListener.Reset();
    // dummyRecord = new ZNRecord("db-12345");
    // dataAccessor.setProperty(PropertyType.IDEALSTATES, idealState, "db-12345"
    // );
    // Thread.sleep(100);
    // AssertJUnit.assertTrue(testListener.idealStateChangeReceived);
    // testListener.Reset();
    // dummyRecord = new ZNRecord("localhost:8900");
    // List<ZNRecord> recList = new ArrayList<ZNRecord>();
    // recList.add(dummyRecord);
    testListener.Reset();
    Message message = new Message(MessageType.STATE_TRANSITION, UUID.randomUUID().toString());
    message.setTgtSessionId("*");
    message.setResourceName("testResource");
    message.setPartitionName("testPartitionKey");
    message.setStateModelDef("MasterSlave");
    message.setToState("toState");
    message.setFromState("fromState");
    message.setTgtName("testTarget");
    message.setStateModelFactoryName(HelixConstants.DEFAULT_STATE_MODEL_FACTORY);
    accessor.setProperty(keyBuilder.message("localhost_8900", message.getId()), message);
    Thread.sleep(500);
    AssertJUnit.assertTrue(testListener.messageChangeReceived);
    // dummyRecord = new ZNRecord("localhost_9801");
    LiveInstance liveInstance = new LiveInstance("localhost_9801");
    liveInstance.setSessionId(UUID.randomUUID().toString());
    liveInstance.setHelixVersion(UUID.randomUUID().toString());
    accessor.setProperty(keyBuilder.liveInstance("localhost_9801"), liveInstance);
    Thread.sleep(500);
    AssertJUnit.assertTrue(testListener.liveInstanceChangeReceived);
    testListener.Reset();
// dataAccessor.setNodeConfigs(recList); Thread.sleep(100);
// AssertJUnit.assertTrue(testListener.configChangeReceived);
// testListener.Reset();
}
Also used : ExternalView(org.apache.helix.model.ExternalView) Message(org.apache.helix.model.Message) LiveInstance(org.apache.helix.model.LiveInstance) Builder(org.apache.helix.PropertyKey.Builder) CurrentState(org.apache.helix.model.CurrentState) IdealState(org.apache.helix.model.IdealState) Test(org.testng.annotations.Test)

Example 27 with Message

use of org.apache.helix.model.Message in project helix by apache.

the class JobRebalancer method computeResourceMapping.

private ResourceAssignment computeResourceMapping(String jobResource, WorkflowConfig workflowConfig, JobConfig jobCfg, ResourceAssignment prevTaskToInstanceStateAssignment, Collection<String> liveInstances, CurrentStateOutput currStateOutput, WorkflowContext workflowCtx, JobContext jobCtx, Set<Integer> partitionsToDropFromIs, ClusterDataCache cache) {
    TargetState jobTgtState = workflowConfig.getTargetState();
    TaskState jobState = workflowCtx.getJobState(jobResource);
    TaskState workflowState = workflowCtx.getWorkflowState();
    if (jobState == TaskState.IN_PROGRESS && (isTimeout(jobCtx.getStartTime(), jobCfg.getTimeout()) || TaskState.TIMED_OUT.equals(workflowState))) {
        jobState = TaskState.TIMING_OUT;
        workflowCtx.setJobState(jobResource, TaskState.TIMING_OUT);
    } else if (jobState != TaskState.TIMING_OUT && jobState != TaskState.FAILING) {
        // Update running status in workflow context
        if (jobTgtState == TargetState.STOP) {
            if (checkJobStopped(jobCtx)) {
                workflowCtx.setJobState(jobResource, TaskState.STOPPED);
            } else {
                workflowCtx.setJobState(jobResource, TaskState.STOPPING);
            }
            // Workflow has been stopped if all in progress jobs are stopped
            if (isWorkflowStopped(workflowCtx, workflowConfig)) {
                workflowCtx.setWorkflowState(TaskState.STOPPED);
            } else {
                workflowCtx.setWorkflowState(TaskState.STOPPING);
            }
        } else {
            workflowCtx.setJobState(jobResource, TaskState.IN_PROGRESS);
            // Workflow is in progress if any task is in progress
            workflowCtx.setWorkflowState(TaskState.IN_PROGRESS);
        }
    }
    // Used to keep track of tasks that have already been assigned to instances.
    Set<Integer> assignedPartitions = new HashSet<Integer>();
    // Used to keep track of tasks that have failed, but whose failure is acceptable
    Set<Integer> skippedPartitions = new HashSet<Integer>();
    // Keeps a mapping of (partition) -> (instance, state)
    Map<Integer, PartitionAssignment> paMap = new TreeMap<Integer, PartitionAssignment>();
    Set<String> excludedInstances = getExcludedInstances(jobResource, workflowConfig, cache);
    // Process all the current assignments of tasks.
    TaskAssignmentCalculator taskAssignmentCal = getAssignmentCalulator(jobCfg);
    Set<Integer> allPartitions = taskAssignmentCal.getAllTaskPartitions(jobCfg, jobCtx, workflowConfig, workflowCtx, cache.getIdealStates());
    if (allPartitions == null || allPartitions.isEmpty()) {
        // Empty target partitions, mark the job as FAILED.
        String failureMsg = "Empty task partition mapping for job " + jobResource + ", marked the job as FAILED!";
        LOG.info(failureMsg);
        jobCtx.setInfo(failureMsg);
        failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
        markAllPartitionsError(jobCtx, TaskPartitionState.ERROR, false);
        return new ResourceAssignment(jobResource);
    }
    Map<String, SortedSet<Integer>> prevInstanceToTaskAssignments = getPrevInstanceToTaskAssignments(liveInstances, prevTaskToInstanceStateAssignment, allPartitions);
    long currentTime = System.currentTimeMillis();
    LOG.debug("All partitions: " + allPartitions + " taskAssignment: " + prevInstanceToTaskAssignments + " excludedInstances: " + excludedInstances);
    // Iterate through all instances
    for (String instance : prevInstanceToTaskAssignments.keySet()) {
        if (excludedInstances.contains(instance)) {
            continue;
        }
        Set<Integer> pSet = prevInstanceToTaskAssignments.get(instance);
        // Used to keep track of partitions that are in one of the final states: COMPLETED, TIMED_OUT,
        // TASK_ERROR, ERROR.
        Set<Integer> donePartitions = new TreeSet<Integer>();
        for (int pId : pSet) {
            final String pName = pName(jobResource, pId);
            TaskPartitionState currState = updateJobContextAndGetTaskCurrentState(currStateOutput, jobResource, pId, pName, instance, jobCtx);
            // Check for pending state transitions on this (partition, instance).
            Message pendingMessage = currStateOutput.getPendingState(jobResource, new Partition(pName), instance);
            if (pendingMessage != null && !pendingMessage.getToState().equals(currState.name())) {
                processTaskWithPendingMessage(prevTaskToInstanceStateAssignment, pId, pName, instance, pendingMessage, jobState, currState, paMap, assignedPartitions);
                continue;
            }
            // Process any requested state transitions.
            String requestedStateStr = currStateOutput.getRequestedState(jobResource, new Partition(pName), instance);
            if (requestedStateStr != null && !requestedStateStr.isEmpty()) {
                TaskPartitionState requestedState = TaskPartitionState.valueOf(requestedStateStr);
                if (requestedState.equals(currState)) {
                    LOG.warn(String.format("Requested state %s is the same as the current state for instance %s.", requestedState, instance));
                }
                paMap.put(pId, new PartitionAssignment(instance, requestedState.name()));
                assignedPartitions.add(pId);
                LOG.debug(String.format("Instance %s requested a state transition to %s for partition %s.", instance, requestedState, pName));
                continue;
            }
            switch(currState) {
                case RUNNING:
                    {
                        TaskPartitionState nextState = TaskPartitionState.RUNNING;
                        if (jobState == TaskState.TIMING_OUT) {
                            nextState = TaskPartitionState.TASK_ABORTED;
                        } else if (jobTgtState == TargetState.STOP) {
                            nextState = TaskPartitionState.STOPPED;
                        }
                        paMap.put(pId, new PartitionAssignment(instance, nextState.name()));
                        assignedPartitions.add(pId);
                        LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, nextState, instance));
                    }
                    break;
                case STOPPED:
                    {
                        TaskPartitionState nextState;
                        if (jobTgtState == TargetState.START) {
                            nextState = TaskPartitionState.RUNNING;
                        } else {
                            nextState = TaskPartitionState.STOPPED;
                        }
                        paMap.put(pId, new PartitionAssignment(instance, nextState.name()));
                        assignedPartitions.add(pId);
                        LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, nextState, instance));
                    }
                    break;
                case COMPLETED:
                    {
                        // The task has completed on this partition. Mark as such in the context object.
                        donePartitions.add(pId);
                        LOG.debug(String.format("Task partition %s has completed with state %s. Marking as such in rebalancer context.", pName, currState));
                        partitionsToDropFromIs.add(pId);
                        markPartitionCompleted(jobCtx, pId);
                    }
                    break;
                case TIMED_OUT:
                case TASK_ERROR:
                case TASK_ABORTED:
                case ERROR:
                    {
                        // The task may be rescheduled on a different instance.
                        donePartitions.add(pId);
                        LOG.debug(String.format("Task partition %s has error state %s with msg %s. Marking as such in rebalancer context.", pName, currState, jobCtx.getPartitionInfo(pId)));
                        markPartitionError(jobCtx, pId, currState, true);
                        // After all tasks are aborted, they will be dropped, because of job timeout.
                        if (jobState != TaskState.TIMED_OUT && jobState != TaskState.TIMING_OUT) {
                            if (jobCtx.getPartitionNumAttempts(pId) >= jobCfg.getMaxAttemptsPerTask() || currState.equals(TaskPartitionState.TASK_ABORTED) || currState.equals(TaskPartitionState.ERROR)) {
                                skippedPartitions.add(pId);
                                partitionsToDropFromIs.add(pId);
                                LOG.debug("skippedPartitions:" + skippedPartitions);
                            } else {
                                // Mark the task to be started at some later time (if enabled)
                                markPartitionDelayed(jobCfg, jobCtx, pId);
                            }
                        }
                    }
                    break;
                case INIT:
                case DROPPED:
                    {
                        // currState in [INIT, DROPPED]. Do nothing, the partition is eligible to be reassigned.
                        donePartitions.add(pId);
                        LOG.debug(String.format("Task partition %s has state %s. It will be dropped from the current ideal state.", pName, currState));
                    }
                    break;
                default:
                    throw new AssertionError("Unknown enum symbol: " + currState);
            }
        }
        // Remove the set of task partitions that are completed or in one of the error states.
        pSet.removeAll(donePartitions);
    }
    addGiveupPartitions(skippedPartitions, jobCtx, allPartitions, jobCfg);
    if (jobState == TaskState.IN_PROGRESS && skippedPartitions.size() > jobCfg.getFailureThreshold()) {
        if (isJobFinished(jobCtx, jobResource, currStateOutput)) {
            failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
            return buildEmptyAssignment(jobResource, currStateOutput);
        }
        workflowCtx.setJobState(jobResource, TaskState.FAILING);
        // Drop all assigned but not given-up tasks
        for (int pId : jobCtx.getPartitionSet()) {
            String instance = jobCtx.getAssignedParticipant(pId);
            if (jobCtx.getPartitionState(pId) != null && !isTaskGivenup(jobCtx, jobCfg, pId)) {
                paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.TASK_ABORTED.name()));
            }
            Partition partition = new Partition(pName(jobResource, pId));
            Message pendingMessage = currStateOutput.getPendingState(jobResource, partition, instance);
            // so that Helix will cancel the transition.
            if (jobCtx.getPartitionState(pId) == TaskPartitionState.INIT && pendingMessage != null) {
                paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.INIT.name()));
            }
        }
        return toResourceAssignment(jobResource, paMap);
    }
    if (jobState == TaskState.FAILING && isJobFinished(jobCtx, jobResource, currStateOutput)) {
        failJob(jobResource, workflowCtx, jobCtx, workflowConfig, cache.getJobConfigMap());
        return buildEmptyAssignment(jobResource, currStateOutput);
    }
    if (isJobComplete(jobCtx, allPartitions, jobCfg)) {
        markJobComplete(jobResource, jobCtx, workflowConfig, workflowCtx, cache.getJobConfigMap());
        _clusterStatusMonitor.updateJobCounters(jobCfg, TaskState.COMPLETED, jobCtx.getFinishTime() - jobCtx.getStartTime());
        _rebalanceScheduler.removeScheduledRebalance(jobResource);
        TaskUtil.cleanupJobIdealStateExtView(_manager.getHelixDataAccessor(), jobResource);
        return buildEmptyAssignment(jobResource, currStateOutput);
    }
    // can be dropped(note that Helix doesn't track whether the drop is success or not).
    if (jobState == TaskState.TIMING_OUT && isJobFinished(jobCtx, jobResource, currStateOutput)) {
        jobCtx.setFinishTime(System.currentTimeMillis());
        workflowCtx.setJobState(jobResource, TaskState.TIMED_OUT);
        // Mark all INIT task to TASK_ABORTED
        for (int pId : jobCtx.getPartitionSet()) {
            if (jobCtx.getPartitionState(pId) == TaskPartitionState.INIT) {
                jobCtx.setPartitionState(pId, TaskPartitionState.TASK_ABORTED);
            }
        }
        _clusterStatusMonitor.updateJobCounters(jobCfg, TaskState.TIMED_OUT);
        _rebalanceScheduler.removeScheduledRebalance(jobResource);
        TaskUtil.cleanupJobIdealStateExtView(_manager.getHelixDataAccessor(), jobResource);
        return buildEmptyAssignment(jobResource, currStateOutput);
    }
    // For delayed tasks, trigger a rebalance event for the closest upcoming ready time
    scheduleForNextTask(jobResource, jobCtx, currentTime);
    // Make additional task assignments if needed.
    if (jobState != TaskState.TIMING_OUT && jobState != TaskState.TIMED_OUT && jobTgtState == TargetState.START) {
        // Contains the set of task partitions that must be excluded from consideration when making
        // any new assignments.
        // This includes all completed, failed, delayed, and already assigned partitions.
        Set<Integer> excludeSet = Sets.newTreeSet(assignedPartitions);
        addCompletedTasks(excludeSet, jobCtx, allPartitions);
        addGiveupPartitions(excludeSet, jobCtx, allPartitions, jobCfg);
        excludeSet.addAll(skippedPartitions);
        excludeSet.addAll(getNonReadyPartitions(jobCtx, currentTime));
        // Get instance->[partition, ...] mappings for the target resource.
        Map<String, SortedSet<Integer>> tgtPartitionAssignments = taskAssignmentCal.getTaskAssignment(currStateOutput, prevTaskToInstanceStateAssignment, liveInstances, jobCfg, jobCtx, workflowConfig, workflowCtx, allPartitions, cache.getIdealStates());
        if (!isGenericTaskJob(jobCfg) || jobCfg.isRebalanceRunningTask()) {
            dropRebalancedRunningTasks(tgtPartitionAssignments, prevInstanceToTaskAssignments, paMap, jobCtx);
        }
        for (Map.Entry<String, SortedSet<Integer>> entry : prevInstanceToTaskAssignments.entrySet()) {
            String instance = entry.getKey();
            if (!tgtPartitionAssignments.containsKey(instance) || excludedInstances.contains(instance)) {
                continue;
            }
            // 1. throttled by job configuration
            // Contains the set of task partitions currently assigned to the instance.
            Set<Integer> pSet = entry.getValue();
            int jobCfgLimitation = jobCfg.getNumConcurrentTasksPerInstance() - pSet.size();
            // 2. throttled by participant capacity
            int participantCapacity = cache.getInstanceConfigMap().get(instance).getMaxConcurrentTask();
            if (participantCapacity == InstanceConfig.MAX_CONCURRENT_TASK_NOT_SET) {
                participantCapacity = cache.getClusterConfig().getMaxConcurrentTaskPerInstance();
            }
            int participantLimitation = participantCapacity - cache.getParticipantActiveTaskCount(instance);
            // New tasks to be assigned
            int numToAssign = Math.min(jobCfgLimitation, participantLimitation);
            LOG.debug(String.format("Throttle tasks to be assigned to instance %s using limitation: Job Concurrent Task(%d), " + "Participant Max Task(%d). Remaining capacity %d.", instance, jobCfgLimitation, participantCapacity, numToAssign));
            if (numToAssign > 0) {
                Set<Integer> throttledSet = new HashSet<Integer>();
                List<Integer> nextPartitions = getNextPartitions(tgtPartitionAssignments.get(instance), excludeSet, throttledSet, numToAssign);
                for (Integer pId : nextPartitions) {
                    String pName = pName(jobResource, pId);
                    paMap.put(pId, new PartitionAssignment(instance, TaskPartitionState.RUNNING.name()));
                    excludeSet.add(pId);
                    jobCtx.setAssignedParticipant(pId, instance);
                    jobCtx.setPartitionState(pId, TaskPartitionState.INIT);
                    jobCtx.setPartitionStartTime(pId, System.currentTimeMillis());
                    LOG.debug(String.format("Setting task partition %s state to %s on instance %s.", pName, TaskPartitionState.RUNNING, instance));
                }
                cache.setParticipantActiveTaskCount(instance, cache.getParticipantActiveTaskCount(instance) + nextPartitions.size());
                if (!throttledSet.isEmpty()) {
                    LOG.debug(throttledSet.size() + "tasks are ready but throttled when assigned to participant.");
                }
            }
        }
    }
    return toResourceAssignment(jobResource, paMap);
}
Also used : Partition(org.apache.helix.model.Partition) Message(org.apache.helix.model.Message) TreeMap(java.util.TreeMap) SortedSet(java.util.SortedSet) ResourceAssignment(org.apache.helix.model.ResourceAssignment) TreeSet(java.util.TreeSet) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) TreeMap(java.util.TreeMap) HashSet(java.util.HashSet)

Example 28 with Message

use of org.apache.helix.model.Message in project helix by apache.

the class ZKHelixAdmin method resetPartition.

@Override
public void resetPartition(String clusterName, String instanceName, String resourceName, List<String> partitionNames) {
    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient));
    Builder keyBuilder = accessor.keyBuilder();
    // check the instance is alive
    LiveInstance liveInstance = accessor.getProperty(keyBuilder.liveInstance(instanceName));
    if (liveInstance == null) {
        throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because " + instanceName + " is not alive");
    }
    // check resource group exists
    IdealState idealState = accessor.getProperty(keyBuilder.idealStates(resourceName));
    if (idealState == null) {
        throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because " + resourceName + " is not added");
    }
    // check partition exists in resource group
    Set<String> resetPartitionNames = new HashSet<String>(partitionNames);
    if (idealState.getRebalanceMode() == RebalanceMode.CUSTOMIZED) {
        Set<String> partitions = new HashSet<String>(idealState.getRecord().getMapFields().keySet());
        if (!partitions.containsAll(resetPartitionNames)) {
            throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because not all " + partitionNames + " exist");
        }
    } else {
        Set<String> partitions = new HashSet<String>(idealState.getRecord().getListFields().keySet());
        if (!partitions.containsAll(resetPartitionNames)) {
            throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because not all " + partitionNames + " exist");
        }
    }
    // check partition is in ERROR state
    String sessionId = liveInstance.getSessionId();
    CurrentState curState = accessor.getProperty(keyBuilder.currentState(instanceName, sessionId, resourceName));
    for (String partitionName : resetPartitionNames) {
        if (!curState.getState(partitionName).equals(HelixDefinedState.ERROR.toString())) {
            throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because not all " + partitionNames + " are in ERROR state");
        }
    }
    // check stateModelDef exists and get initial state
    String stateModelDef = idealState.getStateModelDefRef();
    StateModelDefinition stateModel = accessor.getProperty(keyBuilder.stateModelDef(stateModelDef));
    if (stateModel == null) {
        throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because " + stateModelDef + " is NOT found");
    }
    // check there is no pending messages for the partitions exist
    List<Message> messages = accessor.getChildValues(keyBuilder.messages(instanceName));
    for (Message message : messages) {
        if (!MessageType.STATE_TRANSITION.name().equalsIgnoreCase(message.getMsgType()) || !sessionId.equals(message.getTgtSessionId()) || !resourceName.equals(message.getResourceName()) || !resetPartitionNames.contains(message.getPartitionName())) {
            continue;
        }
        throw new HelixException("Can't reset state for " + resourceName + "/" + partitionNames + " on " + instanceName + ", because a pending message exists: " + message);
    }
    String adminName = null;
    try {
        adminName = InetAddress.getLocalHost().getCanonicalHostName() + "-ADMIN";
    } catch (UnknownHostException e) {
        // can ignore it
        logger.info("Unable to get host name. Will set it to UNKNOWN, mostly ignorable", e);
        adminName = "UNKNOWN";
    }
    List<Message> resetMessages = new ArrayList<Message>();
    List<PropertyKey> messageKeys = new ArrayList<PropertyKey>();
    for (String partitionName : resetPartitionNames) {
        // send ERROR to initialState message
        String msgId = UUID.randomUUID().toString();
        Message message = new Message(MessageType.STATE_TRANSITION, msgId);
        message.setSrcName(adminName);
        message.setTgtName(instanceName);
        message.setMsgState(MessageState.NEW);
        message.setPartitionName(partitionName);
        message.setResourceName(resourceName);
        message.setTgtSessionId(sessionId);
        message.setStateModelDef(stateModelDef);
        message.setFromState(HelixDefinedState.ERROR.toString());
        message.setToState(stateModel.getInitialState());
        message.setStateModelFactoryName(idealState.getStateModelFactoryName());
        if (idealState.getResourceGroupName() != null) {
            message.setResourceGroupName(idealState.getResourceGroupName());
        }
        if (idealState.getInstanceGroupTag() != null) {
            message.setResourceTag(idealState.getInstanceGroupTag());
        }
        resetMessages.add(message);
        messageKeys.add(keyBuilder.message(instanceName, message.getId()));
    }
    accessor.setChildren(messageKeys, resetMessages);
}
Also used : Message(org.apache.helix.model.Message) UnknownHostException(java.net.UnknownHostException) Builder(org.apache.helix.PropertyKey.Builder) PropertyPathBuilder(org.apache.helix.PropertyPathBuilder) ArrayList(java.util.ArrayList) IdealState(org.apache.helix.model.IdealState) HelixException(org.apache.helix.HelixException) HelixDataAccessor(org.apache.helix.HelixDataAccessor) LiveInstance(org.apache.helix.model.LiveInstance) StateModelDefinition(org.apache.helix.model.StateModelDefinition) CurrentState(org.apache.helix.model.CurrentState) ZNRecord(org.apache.helix.ZNRecord) PropertyKey(org.apache.helix.PropertyKey) HashSet(java.util.HashSet)

Example 29 with Message

use of org.apache.helix.model.Message in project helix by apache.

the class HelixStateTransitionHandler method handleMessage.

@Override
public HelixTaskResult handleMessage() {
    NotificationContext context = _notificationContext;
    Message message = _message;
    synchronized (_stateModel) {
        HelixTaskResult taskResult = new HelixTaskResult();
        HelixManager manager = context.getManager();
        _statusUpdateUtil.logInfo(message, HelixStateTransitionHandler.class, "Message handling task begin execute", manager);
        message.setExecuteStartTimeStamp(new Date().getTime());
        try {
            preHandleMessage();
            invoke(manager, context, taskResult, message);
        } catch (HelixStateMismatchException e) {
            // Simply log error and return from here if State mismatch.
            // The current state of the state model is intact.
            taskResult.setSuccess(false);
            taskResult.setMessage(e.toString());
            taskResult.setException(e);
        } catch (Exception e) {
            String errorMessage = "Exception while executing a state transition task " + message.getPartitionName();
            logger.error(errorMessage, e);
            if (e.getCause() != null && e.getCause() instanceof InterruptedException) {
                e = (InterruptedException) e.getCause();
            }
            if (e instanceof HelixRollbackException || (e.getCause() != null && e.getCause() instanceof HelixRollbackException)) {
                // TODO : Support cancel to any state
                logger.info("Rollback happened of state transition on resource \"" + _message.getResourceName() + "\" partition \"" + _message.getPartitionName() + "\" from \"" + _message.getFromState() + "\" to \"" + _message.getToState() + "\"");
                taskResult.setCancelled(true);
            } else {
                _statusUpdateUtil.logError(message, HelixStateTransitionHandler.class, e, errorMessage, manager);
                taskResult.setSuccess(false);
                taskResult.setMessage(e.toString());
                taskResult.setException(e);
                taskResult.setInterrupted(e instanceof InterruptedException);
            }
        }
        taskResult.setCompleteTime(System.currentTimeMillis());
        // add task result to context for postHandling
        context.add(MapKey.HELIX_TASK_RESULT.toString(), taskResult);
        postHandleMessage();
        return taskResult;
    }
}
Also used : NotificationContext(org.apache.helix.NotificationContext) HelixManager(org.apache.helix.HelixManager) Message(org.apache.helix.model.Message) HelixRollbackException(org.apache.helix.HelixRollbackException) Date(java.util.Date) HelixException(org.apache.helix.HelixException) HelixRollbackException(org.apache.helix.HelixRollbackException) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 30 with Message

use of org.apache.helix.model.Message in project helix by apache.

the class HelixStateTransitionHandler method invoke.

private void invoke(HelixManager manager, NotificationContext context, HelixTaskResult taskResult, Message message) throws IllegalAccessException, InvocationTargetException, InterruptedException, HelixRollbackException {
    _statusUpdateUtil.logInfo(message, HelixStateTransitionHandler.class, "Message handling invoking", manager);
    // by default, we invoke state transition function in state model
    Method methodToInvoke = null;
    String fromState = message.getFromState();
    String toState = message.getToState();
    methodToInvoke = _transitionMethodFinder.getMethodForTransition(_stateModel.getClass(), fromState, toState, new Class[] { Message.class, NotificationContext.class });
    if (methodToInvoke != null) {
        logger.info(String.format("Instance %s, partition %s received state transition from %s to %s on session %s, message id: %s", message.getTgtName(), message.getPartitionName(), message.getFromState(), message.getToState(), message.getTgtSessionId(), message.getMsgId()));
        if (_cancelled) {
            throw new HelixRollbackException(String.format("Instance %s, partition %s state transition from %s to %s on session %s has been cancelled, message id: %s", message.getTgtName(), message.getPartitionName(), message.getFromState(), message.getToState(), message.getTgtSessionId(), message.getMsgId()));
        }
        if (_cancelled) {
            throw new HelixRollbackException(String.format("Instance %s, partition %s state transition from %s to %s on session %s has been cancelled", message.getTgtName(), message.getPartitionName(), message.getFromState(), message.getToState(), message.getTgtSessionId()));
        }
        Object result = methodToInvoke.invoke(_stateModel, new Object[] { message, context });
        taskResult.setSuccess(true);
        String resultStr;
        if (result == null || result instanceof Void) {
            resultStr = "";
        } else {
            resultStr = result.toString();
        }
        taskResult.setInfo(resultStr);
    } else {
        String errorMessage = "Unable to find method for transition from " + fromState + " to " + toState + " in " + _stateModel.getClass();
        logger.error(errorMessage);
        taskResult.setSuccess(false);
        _statusUpdateUtil.logError(message, HelixStateTransitionHandler.class, errorMessage, manager);
    }
}
Also used : NotificationContext(org.apache.helix.NotificationContext) Message(org.apache.helix.model.Message) HelixRollbackException(org.apache.helix.HelixRollbackException) Method(java.lang.reflect.Method)

Aggregations

Message (org.apache.helix.model.Message)116 Test (org.testng.annotations.Test)53 ArrayList (java.util.ArrayList)36 HelixDataAccessor (org.apache.helix.HelixDataAccessor)30 Builder (org.apache.helix.PropertyKey.Builder)28 HelixManager (org.apache.helix.HelixManager)22 ZNRecord (org.apache.helix.ZNRecord)22 Criteria (org.apache.helix.Criteria)21 Date (java.util.Date)19 HashMap (java.util.HashMap)18 Partition (org.apache.helix.model.Partition)18 PropertyKey (org.apache.helix.PropertyKey)17 LiveInstance (org.apache.helix.model.LiveInstance)13 ZKHelixDataAccessor (org.apache.helix.manager.zk.ZKHelixDataAccessor)12 NotificationContext (org.apache.helix.NotificationContext)11 CurrentState (org.apache.helix.model.CurrentState)10 HelixException (org.apache.helix.HelixException)9 Resource (org.apache.helix.model.Resource)9 StringWriter (java.io.StringWriter)8 List (java.util.List)8