use of com.alibaba.otter.shared.arbitrate.model.EtlEventData in project otter by alibaba.
the class ExtractRpcArbitrateEvent method await.
public EtlEventData await(Long pipelineId) throws InterruptedException {
Assert.notNull(pipelineId);
PermitMonitor permitMonitor = ArbitrateFactory.getInstance(pipelineId, PermitMonitor.class);
// 阻塞等待授权
permitMonitor.waitForPermit();
RpcStageController stageController = ArbitrateFactory.getInstance(pipelineId, RpcStageController.class);
// 符合条件的processId
Long processId = stageController.waitForProcess(StageType.EXTRACT);
ChannelStatus status = permitMonitor.getChannelPermit();
if (status.isStart() || status.isPause()) {
// pause状态也让其处理,避免误删除pause状态的processId,导致通道挂起
EtlEventData eventData = stageController.getLastData(processId);
// 获取下一个处理节点信息
Node node = LoadBalanceFactory.getNextTransformNode(pipelineId);
if (node == null) {
// 没有后端节点
throw new ArbitrateException("Extract_single", "no next node");
} else {
eventData.setNextNid(node.getId());
// 只有这一条路返回
return eventData;
}
} else {
logger.warn("pipelineId[{}] extract ignore processId[{}] by status[{}]", new Object[] { pipelineId, processId, status });
String path = StagePathUtils.getProcess(pipelineId, processId);
zookeeper.exists(path);
// 递归调用
return await(pipelineId);
}
}
use of com.alibaba.otter.shared.arbitrate.model.EtlEventData in project otter by alibaba.
the class TransformStageListener method stageChannged.
public void stageChannged(Long processId, List<String> stageNodes) {
try {
// 1. 根据pipelineId+processId构造对应的path
String path = StagePathUtils.getProcess(getPipelineId(), processId);
// 2.1 判断是否存在了error节点,end节点或者current节点
if (stageNodes.contains(currentNode)) {
if (replyProcessIds.remove(processId)) {
if (logger.isDebugEnabled()) {
logger.debug("## remove reply id [{}]", processId);
}
}
// 不需要监听了
return;
}
if (replyProcessIds.contains(processId)) {
// 避免重复处理
return;
}
// 2.2 判断是否存在了prev节点
if (stageNodes.contains(prevNode)) {
// 2.2.1 获取上一个节点的next node节点信息
byte[] data = zookeeper.readData(path + "/" + prevNode);
EtlEventData eventData = JsonUtils.unmarshalFromByte(data, EtlEventData.class);
if (eventData.getNextNid().equals(ArbitrateConfigUtils.getCurrentNid())) {
// 添加到返回队列,唤醒wait阻塞
addReply(processId);
}
}
} catch (ZkNoNodeException e) {
// 出现节点不存在,说明出现了error情况
} catch (ZkException e) {
logger.error("TransformStageListener", e);
}
}
use of com.alibaba.otter.shared.arbitrate.model.EtlEventData in project otter by alibaba.
the class TransformTask method run.
public void run() {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
while (running) {
try {
final EtlEventData etlEventData = arbitrateEventService.transformEvent().await(pipelineId);
Runnable task = new Runnable() {
@Override
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "transformWorker"));
try {
// 后续可判断同步数据是否为rowData
List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
DbBatch dbBatch = rowDataPipeDelegate.get(keys);
// 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
if (dbBatch == null) {
processMissData(pipelineId, "transform miss data with keys:" + keys.toString());
return;
}
// 根据对应的tid,转化为目标端的tid。后续可进行字段的加工处理
// 暂时认为rowBatchs和fileBatchs不会有异构数据的转化
Map<Class, BatchObject> dataBatchs = otterTransformerFactory.transform(dbBatch.getRowBatch());
// 可能存在同一个Pipeline下有Mq和Db两种同步类型
dbBatch.setRowBatch((RowBatch) dataBatchs.get(EventData.class));
if (dbBatch.getFileBatch() != null) {
Map<Class, BatchObject> fileBatchs = otterTransformerFactory.transform(dbBatch.getFileBatch());
dbBatch.setFileBatch((FileBatch) fileBatchs.get(FileData.class));
}
// 传递给下一个流程
List<PipeKey> nextKeys = rowDataPipeDelegate.put(dbBatch, etlEventData.getNextNid());
etlEventData.setDesc(nextKeys);
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.TRANSFORM, new AggregationItem(profilingStartTime, profilingEndTime));
}
// 处理完成后通知single已完成
arbitrateEventService.transformEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] transformWork executor is error! data:%s", pipelineId, etlEventData), e);
sendRollbackTermin(pipelineId, e);
} else {
logger.info(String.format("[%s] transformWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.TRANSFORM, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (isInterrupt(e)) {
logger.info(String.format("[%s] transformTask is interrupted!", pipelineId), e);
return;
} else {
logger.error(String.format("[%s] transformTask is error!", pipelineId), e);
sendRollbackTermin(pipelineId, e);
}
}
}
}
use of com.alibaba.otter.shared.arbitrate.model.EtlEventData in project otter by alibaba.
the class ExtractArbitrateEventTest method test_extract.
@Test
public void test_extract() {
Mockit.setUpMock(ArbitrateConfigUtils.class, new Object() {
@Mock
public int getParallelism(Long pipelineId) {
//并行度
return 2;
}
});
selectEvent = new SelectZooKeeperArbitrateEvent();
extractEvent = new ExtractZooKeeperArbitrateEvent();
final List<Long> initProcessIds = new ArrayList<Long>();
try {
//获取数据
//select stage
EtlEventData sdata1 = selectEvent.await(pipelineId);
EtlEventData sdata2 = selectEvent.await(pipelineId);
initProcessIds.add(sdata1.getProcessId());
initProcessIds.add(sdata2.getProcessId());
selectEvent.single(sdata1);
selectEvent.single(sdata2);
// extract stage
EtlEventData edata1 = extractEvent.await(pipelineId);
EtlEventData edata2 = extractEvent.await(pipelineId);
extractEvent.single(edata1);
extractEvent.single(edata2);
ArbitrateFactory.destory(pipelineId);
} catch (InterruptedException e) {
want.fail();
} finally {
for (Long processId : initProcessIds) {
destoryStage(processId, ArbitrateConstants.NODE_SELECTED);
destoryStage(processId, ArbitrateConstants.NODE_EXTRACTED);
destoryProcess(processId);
}
}
}
use of com.alibaba.otter.shared.arbitrate.model.EtlEventData in project otter by alibaba.
the class LoadArbitrateEventTest method test_load.
@Test
public void test_load() {
Mockit.setUpMock(ArbitrateConfigUtils.class, new Object() {
@Mock
public int getParallelism(Long pipelineId) {
// 并行度
return 2;
}
});
selectEvent = new SelectZooKeeperArbitrateEvent();
extractEvent = new ExtractZooKeeperArbitrateEvent();
transformEvent = new TransformZooKeeperArbitrateEvent();
loadEvent = new LoadZooKeeperArbitrateEvent();
terminEvent = (TerminZooKeeperArbitrateEvent) this.getBeanFactory().getBean("terminZooKeeperEvent");
loadEvent.setTerminEvent(terminEvent);
final List<Long> initProcessIds = new ArrayList<Long>();
try {
// 获取数据
// select stage
EtlEventData sdata1 = selectEvent.await(pipelineId);
EtlEventData sdata2 = selectEvent.await(pipelineId);
initProcessIds.add(sdata1.getProcessId());
initProcessIds.add(sdata2.getProcessId());
selectEvent.single(sdata1);
selectEvent.single(sdata2);
// extract stage
EtlEventData edata1 = extractEvent.await(pipelineId);
EtlEventData edata2 = extractEvent.await(pipelineId);
extractEvent.single(edata1);
extractEvent.single(edata2);
// transform stage
EtlEventData tdata1 = transformEvent.await(pipelineId);
EtlEventData tdata2 = transformEvent.await(pipelineId);
transformEvent.single(tdata1);
transformEvent.single(tdata2);
SelectStageListener selectStageListener = ArbitrateFactory.getInstance(pipelineId, SelectStageListener.class);
selectStageListener.destory();
// load stage
EtlEventData ldata1 = loadEvent.await(pipelineId);
loadEvent.single(ldata1);
Long p1 = ldata1.getProcessId();
TerminEventData terminData1 = new TerminEventData();
terminData1.setPipelineId(pipelineId);
terminData1.setProcessId(p1);
// 发送ack信号,删除termin节点
terminEvent.ack(terminData1);
EtlEventData ldata2 = loadEvent.await(pipelineId);
want.bool(ldata1.getProcessId() < ldata2.getProcessId()).is(true);
loadEvent.single(ldata2);
Long p2 = ldata2.getProcessId();
TerminEventData terminData2 = new TerminEventData();
terminData2.setPipelineId(pipelineId);
terminData2.setProcessId(p2);
// 发送ack信号,删除termin节点
terminEvent.ack(terminData2);
sleep(2000L);
ArbitrateFactory.destory(pipelineId);
} catch (InterruptedException e) {
want.fail();
} finally {
}
}
Aggregations