use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.
the class DataBatchLoader method load.
public List<LoadContext> load(DbBatch data) {
final RowBatch rowBatch = data.getRowBatch();
final FileBatch fileBatch = data.getFileBatch();
boolean existFileBatch = (rowBatch != null && !CollectionUtils.isEmpty(fileBatch.getFiles()) && data.getRoot() != null);
boolean existRowBatch = (rowBatch != null && !CollectionUtils.isEmpty(rowBatch.getDatas()));
int count = 0;
List<RowBatch> rowBatchs = null;
if (existRowBatch) {
// 根据介质内容进行分类合并,每个介质一个载入通道
rowBatchs = split(rowBatch);
count += rowBatchs.size();
}
if (existFileBatch) {
count += 1;
}
WeightController controller = new WeightController(count);
List<Future> futures = new ArrayList<Future>();
ExecutorCompletionService completionService = new ExecutorCompletionService(executorService);
if (existFileBatch) {
submitFileBatch(futures, completionService, fileBatch, data.getRoot(), controller);
}
if (existRowBatch) {
submitRowBatch(futures, completionService, rowBatchs, controller);
}
// 先获取一下异步处理的结果,记录一下出错的index
List<LoadContext> processedContexts = new ArrayList<LoadContext>();
int index = 0;
LoadException exception = null;
while (index < futures.size()) {
try {
// 它也可能被打断
Future future = completionService.take();
future.get();
} catch (InterruptedException e) {
exception = new LoadException(e);
break;
} catch (ExecutionException e) {
exception = new LoadException(e);
break;
}
index++;
}
// 任何一个线程返回,出现了异常,就退出整个调度
if (index < futures.size()) {
// 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录
for (int errorIndex = 0; errorIndex < futures.size(); errorIndex++) {
Future future = futures.get(errorIndex);
if (future.isDone()) {
try {
LoadContext loadContext = (LoadContext) future.get();
if (loadContext instanceof DbLoadContext) {
// 做一下出错处理,记录到store中
dbInterceptor.error((DbLoadContext) loadContext);
}
} catch (InterruptedException e) {
// ignore
} catch (ExecutionException e) {
// ignore
} catch (Exception e) {
logger.error("interceptor process error failed", e);
}
} else {
// 对未完成的进行取消
future.cancel(true);
}
}
} else {
for (int i = 0; i < futures.size(); i++) {
// 收集一下正确处理完成的结果
Future future = futures.get(i);
try {
LoadContext loadContext = (LoadContext) future.get();
if (loadContext instanceof DbLoadContext) {
processedContexts.add((DbLoadContext) loadContext);
}
} catch (InterruptedException e) {
// ignore
} catch (ExecutionException e) {
// ignore
}
}
}
if (exception != null) {
throw exception;
} else {
return processedContexts;
}
}
use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.
the class DbLoadAction method doTwoPhase.
/**
* 首先进行并行执行,出错后转为串行执行
*/
private void doTwoPhase(DbLoadContext context, List<List<EventData>> totalRows, boolean canBatch) {
// 预处理下数据
List<Future<Exception>> results = new ArrayList<Future<Exception>>();
for (List<EventData> rows : totalRows) {
if (CollectionUtils.isEmpty(rows)) {
// 过滤空记录
continue;
}
results.add(executor.submit(new DbLoadWorker(context, rows, canBatch)));
}
boolean partFailed = false;
for (int i = 0; i < results.size(); i++) {
Future<Exception> result = results.get(i);
Exception ex = null;
try {
ex = result.get();
for (EventData data : totalRows.get(i)) {
// 通知加载完成
interceptor.after(context, data);
}
} catch (Exception e) {
ex = e;
}
if (ex != null) {
logger.warn("##load phase one failed!", ex);
partFailed = true;
}
}
if (true == partFailed) {
// if (CollectionUtils.isEmpty(context.getFailedDatas())) {
// logger.error("##load phase one failed but failedDatas is empty!");
// return;
// }
// 尝试的内容换成phase one跑的所有数据,避免因failed datas计算错误而导致丢数据
List<EventData> retryEventDatas = new ArrayList<EventData>();
for (List<EventData> rows : totalRows) {
retryEventDatas.addAll(rows);
}
// 清理failed data数据
context.getFailedDatas().clear();
// 可能为null,manager老版本数据序列化传输时,因为数据库中没有skipLoadException变量配置
Boolean skipLoadException = context.getPipeline().getParameters().getSkipLoadException();
if (skipLoadException != null && skipLoadException) {
// 如果设置为允许跳过单条异常,则一条条执行数据load,准确过滤掉出错的记录,并进行日志记录
for (EventData retryEventData : retryEventDatas) {
// 强制设置batch为false
DbLoadWorker worker = new DbLoadWorker(context, Arrays.asList(retryEventData), false);
try {
Exception ex = worker.call();
if (ex != null) {
// do skip
logger.warn("skip exception for data : {} , caused by {}", retryEventData, ExceptionUtils.getFullStackTrace(ex));
}
} catch (Exception ex) {
// do skip
logger.warn("skip exception for data : {} , caused by {}", retryEventData, ExceptionUtils.getFullStackTrace(ex));
}
}
} else {
// 直接一批进行处理,减少线程调度
// 强制设置batch为false
DbLoadWorker worker = new DbLoadWorker(context, retryEventDatas, false);
try {
Exception ex = worker.call();
if (ex != null) {
// 自己抛自己接
throw ex;
}
} catch (Exception ex) {
logger.error("##load phase two failed!", ex);
throw new LoadException(ex);
}
}
// 清理failed data数据
for (EventData data : retryEventDatas) {
// 通知加载完成
interceptor.after(context, data);
}
}
}
use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.
the class FileLoadAction method dryRun.
private void dryRun(FileLoadContext context, List<FileData> fileDatas, File rootDir) {
for (FileData fileData : fileDatas) {
boolean isLocal = StringUtils.isBlank(fileData.getNameSpace());
String entryName = null;
if (true == isLocal) {
entryName = FilenameUtils.getPath(fileData.getPath()) + FilenameUtils.getName(fileData.getPath());
} else {
entryName = fileData.getNameSpace() + File.separator + fileData.getPath();
}
File sourceFile = new File(rootDir, entryName);
if (true == sourceFile.exists() && false == sourceFile.isDirectory()) {
if (false == isLocal) {
throw new LoadException(fileData + " is not support!");
} else {
// 记录一下文件的meta信息
fileData.setSize(sourceFile.length());
fileData.setLastModifiedTime(sourceFile.lastModified());
context.getProcessedDatas().add(fileData);
}
LoadCounter counter = loadStatsTracker.getStat(context.getIdentity()).getStat(fileData.getPairId());
counter.getFileCount().incrementAndGet();
counter.getFileSize().addAndGet(fileData.getSize());
} else if (fileData.getEventType().isDelete()) {
// 删除对应的文件
if (false == isLocal) {
throw new LoadException(fileData + " is not support!");
} else {
context.getProcessedDatas().add(fileData);
}
} else {
// 失败记录
context.getFailedDatas().add(fileData);
}
}
}
use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.
the class FileLoadAction method doMove.
private void doMove(FileLoadContext context, File rootDir, FileData fileData) throws IOException {
boolean isLocal = StringUtils.isBlank(fileData.getNameSpace());
String entryName = null;
if (true == isLocal) {
entryName = FilenameUtils.getPath(fileData.getPath()) + FilenameUtils.getName(fileData.getPath());
} else {
entryName = fileData.getNameSpace() + File.separator + fileData.getPath();
}
File sourceFile = new File(rootDir, entryName);
if (true == sourceFile.exists() && false == sourceFile.isDirectory()) {
if (false == isLocal) {
throw new LoadException(fileData + " is not support!");
} else {
File targetFile = new File(fileData.getPath());
// copy to product path
NioUtils.copy(sourceFile, targetFile, retry);
if (true == targetFile.exists()) {
// 记录一下文件的meta信息
fileData.setSize(sourceFile.length());
fileData.setLastModifiedTime(sourceFile.lastModified());
context.getProcessedDatas().add(fileData);
} else {
throw new LoadException(String.format("copy/rename [%s] to [%s] failed by unknow reason", sourceFile.getPath(), targetFile.getPath()));
}
}
LoadCounter counter = loadStatsTracker.getStat(context.getIdentity()).getStat(fileData.getPairId());
counter.getFileCount().incrementAndGet();
counter.getFileSize().addAndGet(fileData.getSize());
} else if (fileData.getEventType().isDelete()) {
// 删除对应的文件
if (false == isLocal) {
throw new LoadException(fileData + " is not support!");
} else {
File targetFile = new File(fileData.getPath());
if (NioUtils.delete(targetFile, retry)) {
context.getProcessedDatas().add(fileData);
} else {
context.getFailedDatas().add(fileData);
}
}
} else {
// 失败记录
context.getFailedDatas().add(fileData);
}
}
use of com.alibaba.otter.node.etl.load.exception.LoadException in project otter by alibaba.
the class FileLoadActionTest method testWithOutRootDir.
@Test
public void testWithOutRootDir() throws Exception {
File rootDir = new File("/null");
Identity id = buildIdentity(1L, 2L, 3L);
FileBatch fileBatch = buildFileBatch(id);
fileBatch.getFiles().addAll(buildFileDatas("ns_", EventType.INSERT, 0, 20, false));
try {
fileLoadAction.load(fileBatch, rootDir, null);
} catch (Exception e) {
// expect for LoadException
if (e instanceof LoadException) {
return;
}
throw e;
}
want.fail("unreachable code.");
}
Aggregations