Search in sources :

Example 1 with NodeAlarmEvent

use of com.alibaba.otter.shared.communication.model.arbitrate.NodeAlarmEvent in project otter by alibaba.

the class LogRecordServiceImpl method create.

public void create(Event event) {
    LogRecord logRecord = new LogRecord();
    if (event instanceof NodeAlarmEvent) {
        NodeAlarmEvent nodeAlarmEvent = (NodeAlarmEvent) event;
        Pipeline tempPipeline = new Pipeline();
        tempPipeline.setId(nodeAlarmEvent.getPipelineId());
        logRecord.setPipeline(tempPipeline);
        logRecord.setNid(nodeAlarmEvent.getNid());
        logRecord.setTitle(nodeAlarmEvent.getTitle());
        logRecord.setMessage(nodeAlarmEvent.getMessage());
    }
    create(logRecord);
}
Also used : LogRecord(com.alibaba.otter.shared.common.model.config.record.LogRecord) NodeAlarmEvent(com.alibaba.otter.shared.communication.model.arbitrate.NodeAlarmEvent) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Example 2 with NodeAlarmEvent

use of com.alibaba.otter.shared.communication.model.arbitrate.NodeAlarmEvent in project otter by alibaba.

the class ExceptionRuleMonitor method feed.

@Override
public void feed(Object data, Long pipelineId) {
    if (!(data instanceof NodeAlarmEvent)) {
        return;
    }
    NodeAlarmEvent alarmEvent = (NodeAlarmEvent) data;
    // 异常一定需要记录日志
    String message = String.format(MESAGE_FORMAT, alarmEvent.getPipelineId(), alarmEvent.getNid(), alarmEvent.getMessage());
    logRecordAlarm(pipelineId, alarmEvent.getNid(), MonitorName.EXCEPTION, message);
    // 报警检查
    List<AlarmRule> rules = alarmRuleService.getAlarmRules(pipelineId, AlarmRuleStatus.ENABLE);
    // TODO 需要给 alarmRuleService 提需求
    Date now = new Date();
    List<AlarmRule> exceptionRules = new ArrayList<AlarmRule>();
    for (AlarmRule rule : rules) {
        if (MonitorName.EXCEPTION.equals(rule.getMonitorName()) && checkEnable(rule, now)) {
            exceptionRules.add(rule);
        }
    }
    if (CollectionUtils.isEmpty(exceptionRules)) {
        return;
    }
    for (AlarmRule rule : exceptionRules) {
        check(rule, alarmEvent);
    }
}
Also used : AlarmRule(com.alibaba.otter.shared.common.model.config.alarm.AlarmRule) NodeAlarmEvent(com.alibaba.otter.shared.communication.model.arbitrate.NodeAlarmEvent) ArrayList(java.util.ArrayList) Date(java.util.Date)

Example 3 with NodeAlarmEvent

use of com.alibaba.otter.shared.communication.model.arbitrate.NodeAlarmEvent in project otter by alibaba.

the class RestartAlarmRecovery method processRecovery.

private boolean processRecovery(Long channelId, Long ruleId, boolean needStop) {
    boolean result = true;
    if (!needStop) {
        result = arbitrateManageService.channelEvent().restart(channelId);
        if (result) {
            // 推送一下配置
            channelService.notifyChannel(channelId);
        }
    } else {
        // 解决process rpc模式下释放不完整,通过stop完整释放一次所有对象资源
        channelService.stopChannel(channelId);
        channelService.startChannel(channelId);
    }
    NodeAlarmEvent alarm = new NodeAlarmEvent();
    alarm.setPipelineId(-1L);
    alarm.setTitle(MonitorName.EXCEPTION.name());
    if (result) {
        if (!needStop) {
            alarm.setMessage(String.format("cid:%s restart recovery successful for rid:%s", String.valueOf(channelId), String.valueOf(ruleId)));
        } else {
            alarm.setMessage(String.format("cid:%s stop recovery successful for rid:%s", String.valueOf(channelId), String.valueOf(ruleId)));
        }
        try {
            exceptionRuleMonitor.feed(alarm, alarm.getPipelineId());
        } catch (Exception e) {
            logger.error(String.format("ERROR # exceptionRuleMonitor error for %s", alarm.toString()), e);
        }
    }
    return result;
}
Also used : NodeAlarmEvent(com.alibaba.otter.shared.communication.model.arbitrate.NodeAlarmEvent)

Example 4 with NodeAlarmEvent

use of com.alibaba.otter.shared.communication.model.arbitrate.NodeAlarmEvent in project otter by alibaba.

the class ExceptionRuleMonitorTest method testSerialProcess.

@Test
public void testSerialProcess() {
    new NonStrictExpectations() {

        {
            alarmRuleService.getAlarmRules(anyLong, AlarmRuleStatus.ENABLE);
            List<AlarmRule> rules = new ArrayList<AlarmRule>();
            AlarmRule rule = new AlarmRule();
            rule.setDescription("xxx");
            rule.setGmtCreate(new Date());
            rule.setGmtModified(new Date());
            rule.setId(1L);
            rule.setMatchValue("EXCEPTION");
            rule.setMonitorName(MonitorName.EXCEPTION);
            rule.setPipelineId(2L);
            rule.setReceiverKey("otterteam");
            rule.setStatus(AlarmRuleStatus.ENABLE);
            rules.add(rule);
            returns(rules);
        }
    };
    NodeAlarmEvent event = new NodeAlarmEvent();
    event.setMessage("pid:77 nid:5 exception:EXCEPTON,nid:5[setl:ERROR ## SelectTask processId = 644408,parallelism = 5,ProcessEnd processId = 644394 invalid]");
    event.setNid(5L);
    event.setPipelineId(2L);
    event.setTitle("EXCEPTON");
    monitor.feed(event, 2L);
}
Also used : AlarmRule(com.alibaba.otter.shared.common.model.config.alarm.AlarmRule) ArrayList(java.util.ArrayList) NodeAlarmEvent(com.alibaba.otter.shared.communication.model.arbitrate.NodeAlarmEvent) Date(java.util.Date) Test(org.testng.annotations.Test) BaseOtterTest(com.alibaba.otter.manager.biz.BaseOtterTest)

Example 5 with NodeAlarmEvent

use of com.alibaba.otter.shared.communication.model.arbitrate.NodeAlarmEvent in project otter by alibaba.

the class DeadNodeListener method processDead.

private void processDead(Long deadNode) {
    List<Long> aliveNodes = nodeMonitor.getAliveNodes(true);
    // 需要考虑一种网络瞬断的情况,会导致node所有出现重连,导致出现restart风暴,执行restart时需要重新check下是否存活
    if (aliveNodes.contains(deadNode)) {
        logger.warn("dead node[{}] happend just one moment , check it's alived", deadNode);
        return;
    }
    // 发送一条报警信息
    List<Long> channelIds = Lists.newArrayList();
    List<Channel> channels = channelService.listByNodeId(deadNode, ChannelStatus.START);
    for (Channel channel : channels) {
        channelIds.add(channel.getId());
    }
    Collections.sort(channelIds);
    NodeAlarmEvent alarm = new NodeAlarmEvent();
    alarm.setPipelineId(-1L);
    alarm.setTitle(MonitorName.EXCEPTION.name());
    alarm.setMessage(String.format("nid:%s is dead and restart cids:%s", String.valueOf(deadNode), channelIds.toString()));
    try {
        exceptionRuleMonitor.feed(alarm, alarm.getPipelineId());
    } catch (Exception e) {
        logger.error(String.format("ERROR # exceptionRuleMonitor error for %s", alarm.toString()), e);
    }
    for (Long channelId : channelIds) {
        // 重启一下对应的channel
        boolean result = arbitrateManageService.channelEvent().restart(channelId);
        if (result) {
            // 推送一下配置
            channelService.notifyChannel(channelId);
        }
    }
}
Also used : Channel(com.alibaba.otter.shared.common.model.config.channel.Channel) NodeAlarmEvent(com.alibaba.otter.shared.communication.model.arbitrate.NodeAlarmEvent)

Aggregations

NodeAlarmEvent (com.alibaba.otter.shared.communication.model.arbitrate.NodeAlarmEvent)6 AlarmRule (com.alibaba.otter.shared.common.model.config.alarm.AlarmRule)2 ArrayList (java.util.ArrayList)2 Date (java.util.Date)2 BaseOtterTest (com.alibaba.otter.manager.biz.BaseOtterTest)1 Channel (com.alibaba.otter.shared.common.model.config.channel.Channel)1 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)1 LogRecord (com.alibaba.otter.shared.common.model.config.record.LogRecord)1 Test (org.testng.annotations.Test)1