use of com.alibaba.otter.node.etl.extract.SetlFuture in project otter by alibaba.
the class TransformTask method run.
public void run() {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
while (running) {
try {
final EtlEventData etlEventData = arbitrateEventService.transformEvent().await(pipelineId);
Runnable task = new Runnable() {
@Override
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "transformWorker"));
try {
// 后续可判断同步数据是否为rowData
List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
DbBatch dbBatch = rowDataPipeDelegate.get(keys);
// 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
if (dbBatch == null) {
processMissData(pipelineId, "transform miss data with keys:" + keys.toString());
return;
}
// 根据对应的tid,转化为目标端的tid。后续可进行字段的加工处理
// 暂时认为rowBatchs和fileBatchs不会有异构数据的转化
Map<Class, BatchObject> dataBatchs = otterTransformerFactory.transform(dbBatch.getRowBatch());
// 可能存在同一个Pipeline下有Mq和Db两种同步类型
dbBatch.setRowBatch((RowBatch) dataBatchs.get(EventData.class));
if (dbBatch.getFileBatch() != null) {
Map<Class, BatchObject> fileBatchs = otterTransformerFactory.transform(dbBatch.getFileBatch());
dbBatch.setFileBatch((FileBatch) fileBatchs.get(FileData.class));
}
// 传递给下一个流程
List<PipeKey> nextKeys = rowDataPipeDelegate.put(dbBatch, etlEventData.getNextNid());
etlEventData.setDesc(nextKeys);
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.TRANSFORM, new AggregationItem(profilingStartTime, profilingEndTime));
}
// 处理完成后通知single已完成
arbitrateEventService.transformEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] transformWork executor is error! data:%s", pipelineId, etlEventData), e);
sendRollbackTermin(pipelineId, e);
} else {
logger.info(String.format("[%s] transformWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.TRANSFORM, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (isInterrupt(e)) {
logger.info(String.format("[%s] transformTask is interrupted!", pipelineId), e);
return;
} else {
logger.error(String.format("[%s] transformTask is error!", pipelineId), e);
sendRollbackTermin(pipelineId, e);
}
}
}
}
use of com.alibaba.otter.node.etl.extract.SetlFuture in project otter by alibaba.
the class SelectTask method processSelect.
private void processSelect() {
while (running) {
try {
// 等待ProcessTermin exhaust,会阻塞
// ProcessTermin发现出现rollback,会立即通知暂停,比分布式permit及时性高
canStartSelector.get();
// 判断当前是否为工作节点,S模块不能出现双节点工作,selector容易出现数据错乱
if (needCheck) {
checkContinueWork();
}
// 出现阻塞挂起时,等待mananger处理完成,解挂开启同步
// 出现rollback后能及时停住
arbitrateEventService.toolEvent().waitForPermit(pipelineId);
// 使用startVersion要解决的一个问题:出现rollback时,尽可能判断取出来的数据是rollback前还是rollback后,想办法丢弃rollback前的数据。
// (因为出现rollback,之前取出去的几个批次的数据其实是没有执行成功,get取出来的数据会是其后一批数据,如果不丢弃的话,会出现后面的数据先执行,然后又回到出错的点,再执行一遍)
// int startVersion = rversion.get();
Message gotMessage = otterSelector.selector();
// modify by ljh at 2012-09-10,startVersion获取操作应该放在拿到数据之后
// 放在前面 : (遇到一个并发bug)
// // a.
// 先拿startVersion,再获取数据,在拿数据过程中rollback开始并完成了,导致selector返回时数据已经取到了末尾
// // b. 在进行version判断时发现已经有变化,导致又触发一次拿数据的过程,此时的get
// cursor已经到队列的末尾,拿不出任何数据,所以出现死等情况
// 放在后面 : (一点点瑕疵)
// // a.
// 并发操作rollback和selector时,针对拿到rollback前的老数据,此时startVersion还未初始化,导致判断不出出现过rollback操作,后面的变更数据会提前同步
// (概率性会比较高,取决于selector和初始化startVersion的时间间隔)
int startVersion = rversion.get();
if (canStartSelector.state() == false) {
// 是否出现异常
// 回滚在出现异常的瞬间,拿出来的数据,因为otterSelector.selector()会循环,可能出现了rollback,其还未感知到
rollback(gotMessage.getId());
continue;
}
if (CollectionUtils.isEmpty(gotMessage.getDatas())) {
// 处理下空数据,也得更新下游标,可能是回环数据被过滤掉
// 添加到待响应的buffer列表,不需要await termin信号,因为没启动过s/e/t/l流程
batchBuffer.put(new BatchTermin(gotMessage.getId(), false));
continue;
}
final EtlEventData etlEventData = arbitrateEventService.selectEvent().await(pipelineId);
if (rversion.get() != startVersion) {
// 说明存在过变化,中间出现过rollback,需要丢弃该数据
logger.warn("rollback happend , should skip this data and get new message.");
// 确认一下rollback是否完成
canStartSelector.get();
// 先睡眠一段时间,保证channel有足够的时间变成pause态,即使没有变成PAUSE态,***MemoryArbitrateEvent里面有回滚操作兜底。
Thread.sleep(10 * 1000);
arbitrateEventService.toolEvent().waitForPermit(pipelineId);
// 这时不管有没有数据,都需要执行一次s/e/t/l
gotMessage = otterSelector.selector();
}
final Message message = gotMessage;
final BatchTermin batchTermin = new BatchTermin(message.getId(), etlEventData.getProcessId());
// 添加到待响应的buffer列表
batchBuffer.put(batchTermin);
Runnable task = new Runnable() {
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "SelectWorker"));
try {
pipeline = configClientService.findPipeline(pipelineId);
List<EventData> eventData = message.getDatas();
long startTime = etlEventData.getStartTime();
if (!CollectionUtils.isEmpty(eventData)) {
startTime = eventData.get(0).getExecuteTime();
}
Channel channel = configClientService.findChannelByPipelineId(pipelineId);
RowBatch rowBatch = new RowBatch();
// 构造唯一标识
Identity identity = new Identity();
identity.setChannelId(channel.getId());
identity.setPipelineId(pipelineId);
identity.setProcessId(etlEventData.getProcessId());
rowBatch.setIdentity(identity);
// 进行数据合并
for (EventData data : eventData) {
rowBatch.merge(data);
}
long nextNodeId = etlEventData.getNextNid();
List<PipeKey> pipeKeys = rowDataPipeDelegate.put(new DbBatch(rowBatch), nextNodeId);
etlEventData.setDesc(pipeKeys);
etlEventData.setNumber((long) eventData.size());
// 使用原始数据的第一条
etlEventData.setFirstTime(startTime);
etlEventData.setBatchId(message.getId());
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.SELECT, new AggregationItem(profilingStartTime, profilingEndTime));
}
arbitrateEventService.selectEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] selectwork executor is error! data:%s", pipelineId, etlEventData), e);
sendRollbackTermin(pipelineId, e);
} else {
logger.info(String.format("[%s] selectwork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.SELECT, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] selectTask is error!", pipelineId), e);
sendRollbackTermin(pipelineId, e);
} else {
logger.info(String.format("[%s] selectTask is interrrupt!", pipelineId), e);
return;
}
}
}
}
use of com.alibaba.otter.node.etl.extract.SetlFuture in project otter by alibaba.
the class LoadTask method run.
public void run() {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
while (running) {
try {
final EtlEventData etlEventData = arbitrateEventService.loadEvent().await(pipelineId);
Runnable task = new Runnable() {
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "LoadWorker"));
List<LoadContext> processedContexts = null;
try {
// 后续可判断同步数据是否为rowData
List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
DbBatch dbBatch = rowDataPipeDelegate.get(keys);
// 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
if (dbBatch == null) {
processMissData(pipelineId, "load miss data with keys:" + keys.toString());
return;
}
// 进行数据load处理
otterLoaderFactory.setStartTime(dbBatch.getRowBatch().getIdentity(), etlEventData.getStartTime());
processedContexts = otterLoaderFactory.load(dbBatch);
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.LOAD, new AggregationItem(profilingStartTime, profilingEndTime));
}
// 处理完成后通知single已完成
arbitrateEventService.loadEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] loadWork executor is error! data:%s", pipelineId, etlEventData), e);
} else {
logger.info(String.format("[%s] loadWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
if (processedContexts != null) {
// 说明load成功了,但是通知仲裁器失败了,需要记录下记录到store
for (LoadContext context : processedContexts) {
try {
if (context instanceof DbLoadContext) {
dbLoadInterceptor.error((DbLoadContext) context);
}
} catch (Throwable ie) {
logger.error(String.format("[%s] interceptor process error failed!", pipelineId), ie);
}
}
}
if (!isInterrupt(e)) {
sendRollbackTermin(pipelineId, e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.LOAD, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (isInterrupt(e)) {
logger.info(String.format("[%s] loadTask is interrupted!", pipelineId), e);
// 释放锁
return;
} else {
logger.error(String.format("[%s] loadTask is error!", pipelineId), e);
// arbitrateEventService.loadEvent().release(pipelineId); //
// 释放锁
// 先解除lock,后发送rollback信号
sendRollbackTermin(pipelineId, e);
}
}
}
}
Aggregations