use of com.alibaba.otter.node.etl.common.pipe.PipeKey in project otter by alibaba.
the class TransformTask method run.
public void run() {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
while (running) {
try {
final EtlEventData etlEventData = arbitrateEventService.transformEvent().await(pipelineId);
Runnable task = new Runnable() {
@Override
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "transformWorker"));
try {
// 后续可判断同步数据是否为rowData
List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
DbBatch dbBatch = rowDataPipeDelegate.get(keys);
// 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
if (dbBatch == null) {
processMissData(pipelineId, "transform miss data with keys:" + keys.toString());
return;
}
// 根据对应的tid,转化为目标端的tid。后续可进行字段的加工处理
// 暂时认为rowBatchs和fileBatchs不会有异构数据的转化
Map<Class, BatchObject> dataBatchs = otterTransformerFactory.transform(dbBatch.getRowBatch());
// 可能存在同一个Pipeline下有Mq和Db两种同步类型
dbBatch.setRowBatch((RowBatch) dataBatchs.get(EventData.class));
if (dbBatch.getFileBatch() != null) {
Map<Class, BatchObject> fileBatchs = otterTransformerFactory.transform(dbBatch.getFileBatch());
dbBatch.setFileBatch((FileBatch) fileBatchs.get(FileData.class));
}
// 传递给下一个流程
List<PipeKey> nextKeys = rowDataPipeDelegate.put(dbBatch, etlEventData.getNextNid());
etlEventData.setDesc(nextKeys);
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.TRANSFORM, new AggregationItem(profilingStartTime, profilingEndTime));
}
// 处理完成后通知single已完成
arbitrateEventService.transformEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] transformWork executor is error! data:%s", pipelineId, etlEventData), e);
sendRollbackTermin(pipelineId, e);
} else {
logger.info(String.format("[%s] transformWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.TRANSFORM, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (isInterrupt(e)) {
logger.info(String.format("[%s] transformTask is interrupted!", pipelineId), e);
return;
} else {
logger.error(String.format("[%s] transformTask is error!", pipelineId), e);
sendRollbackTermin(pipelineId, e);
}
}
}
}
use of com.alibaba.otter.node.etl.common.pipe.PipeKey in project otter by alibaba.
the class RowDataPipeDelegate method get.
public DbBatch get(List<PipeKey> keys) {
Assert.notNull(keys);
DbBatch dbBatch = new DbBatch();
Future<File> future = null;
for (final PipeKey key : keys) {
if (key == null) {
// 忽略空的key
continue;
}
if (key instanceof MemoryPipeKey) {
dbBatch = rowDataMemoryPipe.get((MemoryPipeKey) key);
// 直接返回
return dbBatch;
} else if (key instanceof HttpPipeKey) {
if (key.getDataType().isDbBatch()) {
// 区分一下数据下载
dbBatch = rowDataHttpPipe.get((HttpPipeKey) key);
} else {
future = executorService.submit(new Callable<File>() {
public File call() throws Exception {
try {
HttpPipeKey pipeKey = (HttpPipeKey) key;
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipeKey.getIdentity().getPipelineId()));
return attachmentHttpPipe.get(pipeKey);
} finally {
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
});
}
} else if (key instanceof RpcPipeKey) {
dbBatch = rowDataRpcPipe.get((RpcPipeKey) key);
} else {
throw new PipeException("unknow_PipeKey", key.toString());
}
}
if (future != null && dbBatch != null) {
try {
dbBatch.setRoot(future.get());
} catch (Exception e) {
throw new PipeException(e);
}
}
return dbBatch;
}
use of com.alibaba.otter.node.etl.common.pipe.PipeKey in project otter by alibaba.
the class RowDataPipeDelegate method put.
/**
* 将对应的数据传递到指定的Node id节点上
*/
public List<PipeKey> put(final DbBatch data, Long nid) throws PipeException {
List<PipeKey> keys = new ArrayList<PipeKey>();
if (isLocal(nid)) {
keys.add(rowDataMemoryPipe.put(data));
} else {
Future<PipeKey> future = null;
Pipeline pipeline = configClientService.findPipeline(data.getRowBatch().getIdentity().getPipelineId());
if (data.getFileBatch() != null && !CollectionUtils.isEmpty(data.getFileBatch().getFiles())) {
future = executorService.submit(new Callable<PipeKey>() {
public PipeKey call() throws Exception {
try {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(data.getFileBatch().getIdentity().getPipelineId()));
return attachmentHttpPipe.put(data.getFileBatch());
} finally {
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
});
}
try {
PipeChooseMode pipeChooseMode = pipeline.getParameters().getPipeChooseType();
if (pipeChooseMode.isAutomatic()) {
if (calculateSize(data) <= sizeThresold) {
keys.add(rowDataRpcPipe.put(data));
} else {
keys.add(rowDataHttpPipe.put(data));
}
} else if (pipeChooseMode.isRpc()) {
keys.add(rowDataRpcPipe.put(data));
} else if (pipeChooseMode.isHttp()) {
keys.add(rowDataHttpPipe.put(data));
} else {
throw new PipeException("pipeChooseMode is error!" + pipeChooseMode);
}
// 等待一下附件处理
if (future != null) {
keys.add(future.get());
}
} catch (Exception e) {
throw new PipeException(e);
}
}
return keys;
}
use of com.alibaba.otter.node.etl.common.pipe.PipeKey in project otter by alibaba.
the class ExtractTask method run.
public void run() {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
while (running) {
try {
final EtlEventData etlEventData = arbitrateEventService.extractEvent().await(pipelineId);
Runnable task = new Runnable() {
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "ExtractWorker"));
try {
pipeline = configClientService.findPipeline(pipelineId);
List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
long nextNodeId = etlEventData.getNextNid();
DbBatch dbBatch = rowDataPipeDelegate.get(keys);
// 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
if (dbBatch == null) {
processMissData(pipelineId, "extract miss data with keys:" + keys.toString());
return;
}
// 重新装配一下数据
otterExtractorFactory.extract(dbBatch);
if (dbBatch.getFileBatch() != null && !CollectionUtils.isEmpty(dbBatch.getFileBatch().getFiles()) && pipeline.getParameters().getFileDetect()) {
// 判断一下是否有文件同步,并且需要进行文件对比
// 对比一下中美图片是否有变化
FileBatch fileBatch = fileBatchConflictDetectService.detect(dbBatch.getFileBatch(), nextNodeId);
dbBatch.setFileBatch(fileBatch);
}
List<PipeKey> pipeKeys = rowDataPipeDelegate.put(dbBatch, nextNodeId);
etlEventData.setDesc(pipeKeys);
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.EXTRACT, new AggregationItem(profilingStartTime, profilingEndTime));
}
arbitrateEventService.extractEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%d] extractwork executor is error! data:%s", pipelineId, etlEventData), e);
sendRollbackTermin(pipelineId, e);
} else {
logger.info(String.format("[%d] extractwork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.EXTRACT, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (isInterrupt(e)) {
logger.info(String.format("[%s] extractTask is interrupted!", pipelineId), e);
return;
} else {
logger.error(String.format("[%s] extractTask is error!", pipelineId), e);
sendRollbackTermin(pipelineId, e);
}
}
}
}
use of com.alibaba.otter.node.etl.common.pipe.PipeKey in project otter by alibaba.
the class LoadTask method run.
public void run() {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
while (running) {
try {
final EtlEventData etlEventData = arbitrateEventService.loadEvent().await(pipelineId);
Runnable task = new Runnable() {
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "LoadWorker"));
List<LoadContext> processedContexts = null;
try {
// 后续可判断同步数据是否为rowData
List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
DbBatch dbBatch = rowDataPipeDelegate.get(keys);
// 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
if (dbBatch == null) {
processMissData(pipelineId, "load miss data with keys:" + keys.toString());
return;
}
// 进行数据load处理
otterLoaderFactory.setStartTime(dbBatch.getRowBatch().getIdentity(), etlEventData.getStartTime());
processedContexts = otterLoaderFactory.load(dbBatch);
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.LOAD, new AggregationItem(profilingStartTime, profilingEndTime));
}
// 处理完成后通知single已完成
arbitrateEventService.loadEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] loadWork executor is error! data:%s", pipelineId, etlEventData), e);
} else {
logger.info(String.format("[%s] loadWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
if (processedContexts != null) {
// 说明load成功了,但是通知仲裁器失败了,需要记录下记录到store
for (LoadContext context : processedContexts) {
try {
if (context instanceof DbLoadContext) {
dbLoadInterceptor.error((DbLoadContext) context);
}
} catch (Throwable ie) {
logger.error(String.format("[%s] interceptor process error failed!", pipelineId), ie);
}
}
}
if (!isInterrupt(e)) {
sendRollbackTermin(pipelineId, e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.LOAD, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (isInterrupt(e)) {
logger.info(String.format("[%s] loadTask is interrupted!", pipelineId), e);
// 释放锁
return;
} else {
logger.error(String.format("[%s] loadTask is error!", pipelineId), e);
// arbitrateEventService.loadEvent().release(pipelineId); //
// 释放锁
// 先解除lock,后发送rollback信号
sendRollbackTermin(pipelineId, e);
}
}
}
}
Aggregations