use of com.alibaba.otter.shared.common.model.config.channel.ChannelStatus in project otter by alibaba.
the class PipelineMonitor method explore.
@Override
public void explore(List<AlarmRule> rules) {
Long pipelineId = rules.get(0).getPipelineId();
Pipeline pipeline = pipelineService.findById(pipelineId);
// 如果处于stop状态,则忽略报警
ChannelStatus status = arbitrateManageService.channelEvent().status(pipeline.getChannelId());
if (status == null || status.isStop()) {
return;
}
List<AlarmRule> delayTimeRules = new LinkedList<AlarmRule>();
List<AlarmRule> exceptonRules = new LinkedList<AlarmRule>();
List<AlarmRule> pipelineTimeoutRules = new LinkedList<AlarmRule>();
List<AlarmRule> processTimeoutRules = new LinkedList<AlarmRule>();
List<AlarmRule> positionTimeoutRules = new LinkedList<AlarmRule>();
Date now = new Date();
for (AlarmRule rule : rules) {
switch(rule.getMonitorName()) {
case DELAYTIME:
if (checkEnable(rule, now)) {
delayTimeRules.add(rule);
}
break;
case EXCEPTION:
if (checkEnable(rule, now)) {
exceptonRules.add(rule);
}
break;
case PIPELINETIMEOUT:
if (checkEnable(rule, now)) {
pipelineTimeoutRules.add(rule);
}
break;
case PROCESSTIMEOUT:
if (checkEnable(rule, now)) {
processTimeoutRules.add(rule);
}
break;
case POSITIONTIMEOUT:
if (checkEnable(rule, now)) {
positionTimeoutRules.add(rule);
}
break;
default:
break;
}
}
if (!delayTimeRules.isEmpty()) {
delayStatRuleMonitor.explore(delayTimeRules);
}
if (!pipelineTimeoutRules.isEmpty()) {
pipelineTimeoutRuleMonitor.explore(pipelineTimeoutRules);
}
if (!processTimeoutRules.isEmpty()) {
processTimeoutRuleMonitor.explore(processTimeoutRules);
}
if (!positionTimeoutRules.isEmpty()) {
positionTimeoutRuleMonitor.explore(positionTimeoutRules);
}
}
use of com.alibaba.otter.shared.common.model.config.channel.ChannelStatus in project otter by alibaba.
the class SelectZooKeeperArbitrateEvent method await.
// private TerminArbitrateEvent terminEvent;
/**
* <pre>
* 算法:
* 1. 检查当前的Permit,阻塞等待其授权(解决Channel的pause状态处理)
* 2. 开始阻塞获取符合条件的processId,创建空的EventData对象,添加next node信息后直接返回
* </pre>
*/
public EtlEventData await(Long pipelineId) throws InterruptedException {
Assert.notNull(pipelineId);
PermitMonitor permitMonitor = ArbitrateFactory.getInstance(pipelineId, PermitMonitor.class);
// 阻塞等待授权
permitMonitor.waitForPermit();
SelectStageListener selectStageListener = ArbitrateFactory.getInstance(pipelineId, SelectStageListener.class);
// 符合条件的processId
Long processId = selectStageListener.waitForProcess();
ChannelStatus status = permitMonitor.getChannelPermit();
if (status.isStart()) {
try {
EtlEventData eventData = new EtlEventData();
eventData.setPipelineId(pipelineId);
eventData.setProcessId(processId);
// 返回当前时间
eventData.setStartTime(new Date().getTime());
// 获取下一个处理节点信息
Node node = LoadBalanceFactory.getNextExtractNode(pipelineId);
if (node == null) {
// terminEvent.single(termin);
throw new ArbitrateException("Select_single", "no next node");
} else {
eventData.setNextNid(node.getId());
// 标记为已使用
markUsed(eventData);
// 只有这一条路返回
return eventData;
}
} catch (ZkNoNodeException e) {
logger.error("pipeline[{}] processId[{}] is invalid , retry again", pipelineId, processId);
// /出现节点不存在,说明出现了error情况,递归调用重新获取一次
return await(pipelineId);
} catch (ZkException e) {
throw new ArbitrateException("Select_await", e.getMessage(), e);
}
} else {
logger.warn("pipelineId[{}] select ignore processId[{}] by status[{}]", new Object[] { pipelineId, processId, status });
// add by ljh 2013-02-01
// 遇到一个bug:
// a. 某台机器发起了一个RESTART指令,然后开始删除process列表
// b. 此时另一个台机器(select工作节点),并没有收到PAUSE的推送,导致还会再创建一个process节点
// c. 后续收到PAUSE指令后,丢弃了processId,就出现了unused的processId
// 这里删除了,要考虑一个问题,就是和restart指令在并行删除同一个processId时的并发考虑,目前来看没问题
String path = StagePathUtils.getProcess(pipelineId, processId);
// 忽略删除失败
zookeeper.delete(path);
// 递归调用
return await(pipelineId);
}
}
use of com.alibaba.otter.shared.common.model.config.channel.ChannelStatus in project otter by alibaba.
the class TransformZooKeeperArbitrateEvent method await.
/**
* <pre>
* 算法:
* 1. 检查当前的Permit,阻塞等待其授权(解决Channel的pause状态处理)
* 2. 开始阻塞获取符合条件的processId
* 3. 检查当前的即时Permit状态 (在阻塞获取processId过程会出现一些error信号,process节点会被删除)
* 4. 获取Select传递的EventData数据,添加next node信息后直接返回
* </pre>
*
* @return
*/
public EtlEventData await(Long pipelineId) throws InterruptedException {
Assert.notNull(pipelineId);
PermitMonitor permitMonitor = ArbitrateFactory.getInstance(pipelineId, PermitMonitor.class);
// 阻塞等待授权
permitMonitor.waitForPermit();
TransformStageListener transformStageListener = ArbitrateFactory.getInstance(pipelineId, TransformStageListener.class);
// 符合条件的processId
Long processId = transformStageListener.waitForProcess();
ChannelStatus status = permitMonitor.getChannelPermit();
if (status.isStart()) {
// 即时查询一下当前的状态,状态随时可能会变
// 根据pipelineId+processId构造对应的path
String path = StagePathUtils.getExtractStage(pipelineId, processId);
try {
byte[] data = zookeeper.readData(path);
EtlEventData eventData = JsonUtils.unmarshalFromByte(data, EtlEventData.class);
// 下一个节点信息即为自己
eventData.setNextNid(ArbitrateConfigUtils.getCurrentNid());
// 只有这一条路返回
return eventData;
} catch (ZkNoNodeException e) {
logger.error("pipeline[{}] processId[{}] is invalid , retry again", pipelineId, processId);
// /出现节点不存在,说明出现了error情况,递归调用重新获取一次
return await(pipelineId);
} catch (ZkException e) {
throw new ArbitrateException("transform_await", e.getMessage(), e);
}
} else {
logger.info("pipelineId[{}] transform ignore processId[{}] by status[{}]", new Object[] { pipelineId, processId, status });
// 递归调用
return await(pipelineId);
}
}
use of com.alibaba.otter.shared.common.model.config.channel.ChannelStatus in project otter by alibaba.
the class MainstemMonitor method initMainstem.
public void initMainstem() {
if (isStop()) {
return;
}
PermitMonitor permitMonitor = ArbitrateFactory.getInstance(getPipelineId(), PermitMonitor.class);
ChannelStatus status = permitMonitor.getChannelPermit(true);
if (status.isStop()) {
// 如果已经关闭则退出
return;
}
Long nid = ArbitrateConfigUtils.getCurrentNid();
String path = StagePathUtils.getMainStem(getPipelineId());
MainStemEventData data = new MainStemEventData();
data.setStatus(MainStemEventData.Status.TAKEING);
data.setPipelineId(getPipelineId());
// 设置当前的nid
data.setNid(nid);
// 序列化
byte[] bytes = JsonUtils.marshalToByte(data);
try {
mutex.set(false);
zookeeper.create(path, bytes, CreateMode.EPHEMERAL);
activeData = data;
// 触发一下事件
processActiveEnter();
mutex.set(true);
} catch (ZkNodeExistsException e) {
bytes = zookeeper.readData(path, true);
if (bytes == null) {
// 如果不存在节点,立即尝试一次
initMainstem();
} else {
activeData = JsonUtils.unmarshalFromByte(bytes, MainStemEventData.class);
if (nid.equals(activeData.getNid())) {
// reload时会重复创建,如果是自己就触发一下
mutex.set(true);
}
}
}
}
use of com.alibaba.otter.shared.common.model.config.channel.ChannelStatus in project otter by alibaba.
the class ExtractRpcArbitrateEvent method await.
public EtlEventData await(Long pipelineId) throws InterruptedException {
Assert.notNull(pipelineId);
PermitMonitor permitMonitor = ArbitrateFactory.getInstance(pipelineId, PermitMonitor.class);
// 阻塞等待授权
permitMonitor.waitForPermit();
RpcStageController stageController = ArbitrateFactory.getInstance(pipelineId, RpcStageController.class);
// 符合条件的processId
Long processId = stageController.waitForProcess(StageType.EXTRACT);
ChannelStatus status = permitMonitor.getChannelPermit();
if (status.isStart() || status.isPause()) {
// pause状态也让其处理,避免误删除pause状态的processId,导致通道挂起
EtlEventData eventData = stageController.getLastData(processId);
// 获取下一个处理节点信息
Node node = LoadBalanceFactory.getNextTransformNode(pipelineId);
if (node == null) {
// 没有后端节点
throw new ArbitrateException("Extract_single", "no next node");
} else {
eventData.setNextNid(node.getId());
// 只有这一条路返回
return eventData;
}
} else {
logger.warn("pipelineId[{}] extract ignore processId[{}] by status[{}]", new Object[] { pipelineId, processId, status });
String path = StagePathUtils.getProcess(pipelineId, processId);
zookeeper.exists(path);
// 递归调用
return await(pipelineId);
}
}
Aggregations