Search in sources :

Example 1 with ProcessStat

use of com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat in project otter by alibaba.

the class ProcessTimeoutRuleMonitor method explore.

@Override
public void explore(List<AlarmRule> rules) {
    if (CollectionUtils.isEmpty(rules)) {
        return;
    }
    Long pipelineId = rules.get(0).getPipelineId();
    List<ProcessStat> processStats = processStatService.listRealtimeProcessStat(pipelineId);
    if (CollectionUtils.isEmpty(processStats)) {
        return;
    }
    long now = System.currentTimeMillis();
    Map<Long, Long> processTime = new HashMap<Long, Long>();
    for (ProcessStat processStat : processStats) {
        Long timeout = 0L;
        if (!CollectionUtils.isEmpty(processStat.getStageStats())) {
            timeout = now - processStat.getStageStats().get(0).getStartTime();
        }
        processTime.put(processStat.getProcessId(), timeout);
    }
    String message = StringUtils.EMPTY;
    for (AlarmRule rule : rules) {
        if (message.isEmpty()) {
            message = checkTimeout(rule, processTime);
        } else {
            checkTimeout(rule, processTime);
        }
    }
    if (!message.isEmpty()) {
        logRecordAlarm(pipelineId, MonitorName.PROCESSTIMEOUT, message);
    }
}
Also used : HashMap(java.util.HashMap) AlarmRule(com.alibaba.otter.shared.common.model.config.alarm.AlarmRule) ProcessStat(com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat)

Example 2 with ProcessStat

use of com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat in project otter by alibaba.

the class ArbitrateViewServiceImpl method listProcesses.

public List<ProcessStat> listProcesses(Long channelId, Long pipelineId) {
    List<ProcessStat> processStats = new ArrayList<ProcessStat>();
    String processRoot = ManagePathUtils.getProcessRoot(channelId, pipelineId);
    IZkConnection connection = zookeeper.getConnection();
    // zkclient会将获取stat信息和正常的操作分开,使用原生的zk进行优化
    ZooKeeper orginZk = ((ZooKeeperx) connection).getZookeeper();
    // 获取所有的process列表
    List<String> processNodes = zookeeper.getChildren(processRoot);
    List<Long> processIds = new ArrayList<Long>();
    for (String processNode : processNodes) {
        processIds.add(ManagePathUtils.getProcessId(processNode));
    }
    Collections.sort(processIds);
    for (int i = 0; i < processIds.size(); i++) {
        Long processId = processIds.get(i);
        // 当前的process可能会有变化
        ProcessStat processStat = new ProcessStat();
        processStat.setPipelineId(pipelineId);
        processStat.setProcessId(processId);
        List<StageStat> stageStats = new ArrayList<StageStat>();
        processStat.setStageStats(stageStats);
        try {
            String processPath = ManagePathUtils.getProcess(channelId, pipelineId, processId);
            Stat zkProcessStat = new Stat();
            List<String> stages = orginZk.getChildren(processPath, false, zkProcessStat);
            Collections.sort(stages, new StageComparator());
            StageStat prev = null;
            for (String stage : stages) {
                // 循环每个process下的stage
                String stagePath = processPath + "/" + stage;
                Stat zkStat = new Stat();
                StageStat stageStat = new StageStat();
                stageStat.setPipelineId(pipelineId);
                stageStat.setProcessId(processId);
                byte[] bytes = orginZk.getData(stagePath, false, zkStat);
                if (bytes != null && bytes.length > 0) {
                    // 特殊处理zookeeper里的data信息,manager没有对应node中PipeKey的对象,所以导致反序列化会失败,需要特殊处理,删除'@'符号
                    String json = StringUtils.remove(new String(bytes, "UTF-8"), '@');
                    EtlEventData data = JsonUtils.unmarshalFromString(json, EtlEventData.class);
                    stageStat.setNumber(data.getNumber());
                    stageStat.setSize(data.getSize());
                    Map exts = new HashMap();
                    if (!CollectionUtils.isEmpty(data.getExts())) {
                        exts.putAll(data.getExts());
                    }
                    exts.put("currNid", data.getCurrNid());
                    exts.put("nextNid", data.getNextNid());
                    exts.put("desc", data.getDesc());
                    stageStat.setExts(exts);
                }
                if (prev != null) {
                    // 对应的start时间为上一个节点的结束时间
                    stageStat.setStartTime(prev.getEndTime());
                } else {
                    // process的最后修改时间,select
                    stageStat.setStartTime(zkProcessStat.getMtime());
                // await成功后会设置USED标志位
                }
                stageStat.setEndTime(zkStat.getMtime());
                if (ArbitrateConstants.NODE_SELECTED.equals(stage)) {
                    stageStat.setStage(StageType.SELECT);
                } else if (ArbitrateConstants.NODE_EXTRACTED.equals(stage)) {
                    stageStat.setStage(StageType.EXTRACT);
                } else if (ArbitrateConstants.NODE_TRANSFORMED.equals(stage)) {
                    stageStat.setStage(StageType.TRANSFORM);
                // } else if
                // (ArbitrateConstants.NODE_LOADED.equals(stage)) {
                // stageStat.setStage(StageType.LOAD);
                }
                prev = stageStat;
                stageStats.add(stageStat);
            }
            // 添加一个当前正在处理的
            StageStat currentStageStat = new StageStat();
            currentStageStat.setPipelineId(pipelineId);
            currentStageStat.setProcessId(processId);
            if (prev == null) {
                byte[] bytes = orginZk.getData(processPath, false, zkProcessStat);
                if (bytes == null || bytes.length == 0) {
                    // 直接认为未使用,忽略之
                    continue;
                }
                ProcessNodeEventData nodeData = JsonUtils.unmarshalFromByte(bytes, ProcessNodeEventData.class);
                if (nodeData.getStatus().isUnUsed()) {
                    // 跳过该process
                    continue;
                } else {
                    // select操作
                    currentStageStat.setStage(StageType.SELECT);
                    currentStageStat.setStartTime(zkProcessStat.getMtime());
                }
            } else {
                // 判断上一个节点,确定当前的stage
                StageType stage = prev.getStage();
                if (stage.isSelect()) {
                    currentStageStat.setStage(StageType.EXTRACT);
                } else if (stage.isExtract()) {
                    currentStageStat.setStage(StageType.TRANSFORM);
                } else if (stage.isTransform()) {
                    currentStageStat.setStage(StageType.LOAD);
                } else if (stage.isLoad()) {
                    // 已经是最后一个节点了
                    continue;
                }
                // 开始时间为上一个节点的结束时间
                currentStageStat.setStartTime(prev.getEndTime());
            }
            if (currentStageStat.getStage().isLoad()) {
                // load必须为第一个process节点
                if (i == 0) {
                    stageStats.add(currentStageStat);
                }
            } else {
                // 其他情况都添加
                stageStats.add(currentStageStat);
            }
        } catch (NoNodeException e) {
        // ignore
        } catch (KeeperException e) {
            throw new ArbitrateException(e);
        } catch (InterruptedException e) {
        // ignore
        } catch (UnsupportedEncodingException e) {
        // ignore
        }
        processStats.add(processStat);
    }
    return processStats;
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ProcessStat(com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat) Stat(org.apache.zookeeper.data.Stat) StageStat(com.alibaba.otter.shared.common.model.statistics.stage.StageStat) StageType(com.alibaba.otter.shared.common.model.config.enums.StageType) ZooKeeperx(com.alibaba.otter.shared.common.utils.zookeeper.ZooKeeperx) ProcessNodeEventData(com.alibaba.otter.shared.arbitrate.model.ProcessNodeEventData) StageComparator(com.alibaba.otter.shared.arbitrate.impl.setl.helper.StageComparator) IZkConnection(org.I0Itec.zkclient.IZkConnection) UnsupportedEncodingException(java.io.UnsupportedEncodingException) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData) ZooKeeper(org.apache.zookeeper.ZooKeeper) ProcessStat(com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat) ArbitrateException(com.alibaba.otter.shared.arbitrate.exception.ArbitrateException) StageStat(com.alibaba.otter.shared.common.model.statistics.stage.StageStat) HashMap(java.util.HashMap) Map(java.util.Map) KeeperException(org.apache.zookeeper.KeeperException)

Example 3 with ProcessStat

use of com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat in project otter by alibaba.

the class AnalysisStageStat method execute.

public void execute(@Param("pipelineId") Long pipelineId, Context context) throws Exception {
    List<ProcessStat> processStats = new ArrayList<ProcessStat>();
    Pipeline pipeline = pipelineService.findById(pipelineId);
    processStats = processStatService.listRealtimeProcessStat(pipelineId);
    // Map ext = new HashMap<Long, String>();
    // // ext.put(145456451, "asdf");
    // for (Long i = 1L; i <= 3; i++) {
    // List<StageStat> stageStats = new ArrayList<StageStat>();
    // ProcessStat processStat = new ProcessStat();
    // processStat.setPipelineId(1L);
    // processStat.setProcessId(i);
    // StageStat stage = new StageStat();
    // stage.setStage(StageType.SELECT);
    // stage.setStartTime(((new Date()).getTime() + i * 10 * 1000));
    // stage.setEndTime(((new Date()).getTime() + i * 200 * 1000));
    // stage.setNumber(11231230L);
    // stage.setSize(14545645640L);
    // // stage.setExts(ext);
    // stageStats.add(stage);
    // stage = new StageStat();
    // stage.setStage(StageType.EXTRACT);
    // stage.setStartTime(((new Date()).getTime() + i * 2000 * 1000));
    // stage.setEndTime(((new Date()).getTime() + i * 3000 * 1000));
    // stage.setExts(ext);
    // // stage.setNumber(10L);
    // // stage.setSize(10L);
    // stageStats.add(stage);
    // stage = new StageStat();
    // stage.setStage(StageType.TRANSFORM);
    // stage.setStartTime(((new Date()).getTime() + i * 5000 * 1000));
    // stage.setEndTime(((new Date()).getTime() + i * 6000 * 1000));
    // stage.setNumber(154640L);
    // stage.setExts(ext);
    // // stage.setSize(10L);
    // stageStats.add(stage);
    // stage = new StageStat();
    // stage.setStage(StageType.LOAD);
    // stage.setStartTime(((new Date()).getTime() + i * 70000 * 1000));
    // stage.setEndTime(((new Date()).getTime() + i * 80000 * 1000));
    // // stage.setNumber(10L);
    // stage.setSize(101445L);
    // // stage.setExts(ext);
    // stageStats.add(stage);
    // processStat.setStageStats(stageStats);
    // processStats.add(processStat);
    // }
    Long stageStart = 0L;
    // Long stageEnd = new Date().getTime() + 3 * 80000 * 1000;
    Long stageEnd = new Date().getTime();
    Long interval = 0L;
    double offset = 0L;
    // 找出最先开始的process的select阶段的开始时间作为起始时间
    if (processStats.size() > 0) {
        if (processStats.get(0).getStageStats().size() > 0) {
            stageStart = processStats.get(0).getStageStats().get(0).getStartTime();
        }
    }
    // 动态计算每个阶段的长度比例
    if (stageStart > 0) {
        interval = stageEnd - stageStart;
    }
    if (interval > 0) {
        offset = 800.0 / interval;
    }
    // 计算每个process当前任务所做的时间总和
    Map<Long, Long> processTime = new HashMap<Long, Long>();
    for (ProcessStat processStat : processStats) {
        Long timeout = 0L;
        if (processStat.getStageStats().size() > 0) {
            timeout = stageEnd - processStat.getStageStats().get(0).getStartTime();
        }
        processTime.put(processStat.getProcessId(), timeout);
    }
    // 获取下mainstem状态信息
    MainStemEventData mainstemData = arbitrateViewService.mainstemData(pipeline.getChannelId(), pipelineId);
    PositionEventData positionData = arbitrateViewService.getCanalCursor(pipeline.getParameters().getDestinationName(), pipeline.getParameters().getMainstemClientId());
    ChannelStatus status = channelArbitrateEvent.status(pipeline.getChannelId());
    context.put("pipeline", pipeline);
    context.put("pipelineId", pipelineId);
    context.put("processStats", processStats);
    context.put("offset", offset);
    context.put("stageStart", stageStart);
    context.put("stageEnd", stageEnd);
    context.put("processTime", processTime);
    context.put("mainstemData", mainstemData);
    context.put("positionData", positionData);
    context.put("channelStatus", status);
}
Also used : PositionEventData(com.alibaba.otter.shared.arbitrate.model.PositionEventData) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ProcessStat(com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat) MainStemEventData(com.alibaba.otter.shared.arbitrate.model.MainStemEventData) ChannelStatus(com.alibaba.otter.shared.common.model.config.channel.ChannelStatus) Date(java.util.Date) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Example 4 with ProcessStat

use of com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat in project otter by alibaba.

the class ChannelArbitrateEvent method canStart.

private boolean canStart(Channel channel) {
    // 判断机器节点是否有存活的通路
    // 查询一下最新的存活的node列表,可能channel取出来的数据为cache的结果
    List<Long> liveNodes = nodeEvent.liveNodes();
    for (Pipeline pipeline : channel.getPipelines()) {
        // 判断select
        List<Long> nids = getNids(pipeline.getSelectNodes());
        if (!CollectionUtils.containsAny(liveNodes, nids)) {
            logger.error("current live nodes:{} , but select nids:{} , result:{}", new Object[] { liveNodes, nids, CollectionUtils.containsAny(liveNodes, nids) });
            sendWarningMessage(pipeline.getId(), "can't restart by no select live node");
            return false;
        }
        // 判断extract
        nids = getNids(pipeline.getExtractNodes());
        if (!CollectionUtils.containsAny(liveNodes, nids)) {
            logger.error("current live nodes:{} , but extract nids:{} , result:{}", new Object[] { liveNodes, nids, CollectionUtils.containsAny(liveNodes, nids) });
            sendWarningMessage(pipeline.getId(), "can't restart by no extract live node");
            return false;
        }
        // 判断transform/load
        nids = getNids(pipeline.getLoadNodes());
        if (!CollectionUtils.containsAny(liveNodes, nids)) {
            logger.error("current live nodes:{} , but transform nids:{} , result:{}", new Object[] { liveNodes, nids, CollectionUtils.containsAny(liveNodes, nids) });
            sendWarningMessage(pipeline.getId(), "can't restart by no transform live node");
            return false;
        }
        // 判断当前没有未清理的process
        List<ProcessStat> stats = arbitrateViewService.listProcesses(channel.getId(), pipeline.getId());
        if (!stats.isEmpty() && !status(channel.getId()).isStart()) {
            List<Long> processIds = new ArrayList<Long>();
            for (ProcessStat stat : stats) {
                processIds.add(stat.getProcessId());
            }
            sendWarningMessage(pipeline.getId(), "can't restart by exist process[" + StringUtils.join(processIds, ',') + "]");
            return false;
        }
    }
    return true;
}
Also used : ProcessStat(com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat) ArrayList(java.util.ArrayList) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Aggregations

ProcessStat (com.alibaba.otter.shared.common.model.statistics.stage.ProcessStat)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)2 ArbitrateException (com.alibaba.otter.shared.arbitrate.exception.ArbitrateException)1 StageComparator (com.alibaba.otter.shared.arbitrate.impl.setl.helper.StageComparator)1 EtlEventData (com.alibaba.otter.shared.arbitrate.model.EtlEventData)1 MainStemEventData (com.alibaba.otter.shared.arbitrate.model.MainStemEventData)1 PositionEventData (com.alibaba.otter.shared.arbitrate.model.PositionEventData)1 ProcessNodeEventData (com.alibaba.otter.shared.arbitrate.model.ProcessNodeEventData)1 AlarmRule (com.alibaba.otter.shared.common.model.config.alarm.AlarmRule)1 ChannelStatus (com.alibaba.otter.shared.common.model.config.channel.ChannelStatus)1 StageType (com.alibaba.otter.shared.common.model.config.enums.StageType)1 StageStat (com.alibaba.otter.shared.common.model.statistics.stage.StageStat)1 ZooKeeperx (com.alibaba.otter.shared.common.utils.zookeeper.ZooKeeperx)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 Date (java.util.Date)1 Map (java.util.Map)1 IZkConnection (org.I0Itec.zkclient.IZkConnection)1 KeeperException (org.apache.zookeeper.KeeperException)1