use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.
the class ArchiveBean method doPack.
/**
* 执行压缩
*/
@SuppressWarnings("resource")
private boolean doPack(final File targetArchiveFile, List<FileData> fileDatas, final ArchiveRetriverCallback<FileData> callback) {
// 首先判断下对应的目标文件是否存在,如存在则执行删除
if (true == targetArchiveFile.exists() && false == NioUtils.delete(targetArchiveFile, 3)) {
throw new ArchiveException(String.format("[%s] exist and delete failed", targetArchiveFile.getAbsolutePath()));
}
boolean exist = false;
ZipOutputStream zipOut = null;
Set<String> entryNames = new HashSet<String>();
// 下载成功的任务列表
BlockingQueue<Future<ArchiveEntry>> queue = new LinkedBlockingQueue<Future<ArchiveEntry>>();
ExecutorCompletionService completionService = new ExecutorCompletionService(executor, queue);
final File targetDir = new File(targetArchiveFile.getParentFile(), FilenameUtils.getBaseName(targetArchiveFile.getPath()));
try {
// 创建一个临时目录
FileUtils.forceMkdir(targetDir);
zipOut = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(targetArchiveFile)));
zipOut.setLevel(Deflater.BEST_SPEED);
// 进行并发压缩处理
for (final FileData fileData : fileDatas) {
if (fileData.getEventType().isDelete()) {
// 忽略delete类型的数据打包,因为只需直接在目标进行删除
continue;
}
String namespace = fileData.getNameSpace();
String path = fileData.getPath();
boolean isLocal = StringUtils.isBlank(namespace);
String entryName = null;
if (true == isLocal) {
entryName = FilenameUtils.getPath(path) + FilenameUtils.getName(path);
} else {
entryName = namespace + File.separator + path;
}
// 过滤一些重复的文件数据同步
if (entryNames.contains(entryName) == false) {
entryNames.add(entryName);
} else {
continue;
}
final String name = entryName;
if (true == isLocal && !useLocalFileMutliThread) {
// 采用串行处理,不走临时文件
queue.add(new DummyFuture(new ArchiveEntry(name, callback.retrive(fileData))));
} else {
completionService.submit(new Callable<ArchiveEntry>() {
public ArchiveEntry call() throws Exception {
// 处理下异常,可能失败
InputStream input = null;
OutputStream output = null;
try {
input = callback.retrive(fileData);
if (input instanceof LazyFileInputStream) {
// 获取原始的stream
input = ((LazyFileInputStream) input).getInputSteam();
}
if (input != null) {
File tmp = new File(targetDir, name);
// 尝试创建父路径
NioUtils.create(tmp.getParentFile(), false, 3);
output = new FileOutputStream(tmp);
// 拷贝到文件
NioUtils.copy(input, output);
return new ArchiveEntry(name, new File(targetDir, name));
} else {
return new ArchiveEntry(name);
}
} finally {
IOUtils.closeQuietly(input);
IOUtils.closeQuietly(output);
}
}
});
}
}
for (int i = 0; i < entryNames.size(); i++) {
// 读入流
ArchiveEntry input = null;
InputStream stream = null;
try {
input = queue.take().get();
if (input == null) {
continue;
}
stream = input.getStream();
if (stream == null) {
continue;
}
if (stream instanceof LazyFileInputStream) {
// 获取原始的stream
stream = ((LazyFileInputStream) stream).getInputSteam();
}
exist = true;
zipOut.putNextEntry(new ZipEntry(input.getName()));
// 输出到压缩流中
NioUtils.copy(stream, zipOut);
zipOut.closeEntry();
} finally {
IOUtils.closeQuietly(stream);
}
}
if (exist) {
zipOut.finish();
}
} catch (Exception e) {
throw new ArchiveException(e);
} finally {
IOUtils.closeQuietly(zipOut);
try {
// 删除临时目录
FileUtils.deleteDirectory(targetDir);
} catch (IOException e) {
// ignore
}
}
return exist;
}
use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.
the class FileBatchConflictDetectServiceImpl method onFileConflictDetect.
/**
* 具体冲突检测的行为
*/
private FileBatch onFileConflictDetect(FileConflictDetectEvent event) {
final FileBatch fileBatch = event.getFileBatch();
if (CollectionUtils.isEmpty(fileBatch.getFiles())) {
return fileBatch;
}
ExecutorTemplate executorTemplate = executorTemplateGetter.get();
try {
MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
executorTemplate.start();
// 重新设置下poolSize
Pipeline pipeline = configClientService.findPipeline(fileBatch.getIdentity().getPipelineId());
executorTemplate.adjustPoolSize(pipeline.getParameters().getFileLoadPoolSize());
// 启动
final List<FileData> result = Collections.synchronizedList(new ArrayList<FileData>());
final List<FileData> filter = Collections.synchronizedList(new ArrayList<FileData>());
for (final FileData source : fileBatch.getFiles()) {
EventType type = source.getEventType();
if (type.isDelete()) {
result.add(source);
} else {
executorTemplate.submit(new Runnable() {
public void run() {
MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
// 处理更新类型
String namespace = source.getNameSpace();
String path = source.getPath();
FileData target = null;
int count = 0;
while (count++ < retry) {
// 进行重试处理
try {
if (true == StringUtils.isBlank(namespace)) {
// local file
java.io.File targetFile = new java.io.File(path);
if (true == targetFile.exists()) {
// modified time cost
long lastModified = targetFile.lastModified();
long size = targetFile.length();
// 更新数据
target = new FileData();
target.setLastModifiedTime(lastModified);
target.setSize(size);
}
} else {
// remote file
throw new RuntimeException(source + " is not support!");
}
// 不出异常就跳出
break;
} catch (Exception ex) {
target = null;
}
}
boolean shouldSync = false;
if (target != null) {
if (true == accept(target, source)) {
shouldSync = true;
}
} else {
shouldSync = true;
}
if (true == shouldSync) {
result.add(source);
} else {
filter.add(source);
}
}
});
}
}
// 等待所有都处理完成
executorTemplate.waitForResult();
if (pipeline.getParameters().getDumpEvent() && logger.isInfoEnabled()) {
logger.info(FileloadDumper.dumpFilterFileDatas(fileBatch.getIdentity(), fileBatch.getFiles().size(), result.size(), filter));
}
// 构造返回结果
FileBatch target = new FileBatch();
target.setIdentity(fileBatch.getIdentity());
target.setFiles(result);
return target;
} finally {
if (executorTemplate != null) {
executorTemplateGetter.release(executorTemplate);
}
MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
}
}
use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.
the class FileExtractor method doFileExtract.
/**
* 返回这批变更数据对应的FileInfo.
*
* @param rowBatch
* @return
*/
private List<FileData> doFileExtract(RowBatch rowBatch) {
List<FileData> fileDatas = new ArrayList<FileData>();
// 处理数据
Pipeline pipeline = getPipeline(rowBatch.getIdentity().getPipelineId());
List<EventData> eventDatas = rowBatch.getDatas();
for (EventData eventData : eventDatas) {
if (eventData.getEventType().isDdl()) {
continue;
}
List<DataMediaPair> dataMediaPairs = ConfigHelper.findDataMediaPairByMediaId(pipeline, eventData.getTableId());
if (dataMediaPairs == null) {
throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " dataMediaPair is null,please check");
}
for (DataMediaPair dataMediaPair : dataMediaPairs) {
if (dataMediaPair.getResolverData() == null || dataMediaPair.getResolverData().getExtensionDataType() == null || (dataMediaPair.getResolverData().getExtensionDataType().isClazz() && StringUtils.isBlank(dataMediaPair.getResolverData().getClazzPath())) || (dataMediaPair.getResolverData().getExtensionDataType().isSource() && StringUtils.isBlank(dataMediaPair.getResolverData().getSourceText()))) {
continue;
}
FileResolver fileResolver = null;
if (dataMediaPair.getResolverData() != null) {
fileResolver = extensionFactory.getExtension(FileResolver.class, dataMediaPair.getResolverData());
} else {
continue;
}
if (fileResolver == null) {
throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " the fileResolver className = " + dataMediaPair.getResolverData().getClazzPath() + " is null ,please check the class");
}
if (fileResolver instanceof RemoteDirectoryFetcherAware) {
RemoteDirectoryFetcherAware remoteDirectoryFetcherAware = (RemoteDirectoryFetcherAware) fileResolver;
remoteDirectoryFetcherAware.setRemoteDirectoryFetcher(arandaRemoteDirectoryFetcher);
}
List<FileData> singleRowFileDatas = getSingleRowFileInfos(dataMediaPair.getId(), fileResolver, eventData);
// 做一下去重处理
for (FileData data : singleRowFileDatas) {
if (!fileDatas.contains(data)) {
fileDatas.add(data);
}
}
}
}
// 判断是否需要进行图片重复同步检查
if (pipeline.getParameters().getFileDetect()) {
doFileDetectCollector(pipeline, fileDatas);
}
return fileDatas;
}
use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.
the class FileExtractor method getSingleRowFileInfos.
private List<FileData> getSingleRowFileInfos(long pairId, FileResolver fileResolver, EventData eventData) {
if (eventData.getEventType() == EventType.DELETE && fileResolver.isDeleteRequired() == false) {
return new ArrayList<FileData>();
}
Map<String, String> rowMap = new HashMap<String, String>();
List<EventColumn> keyColumns = eventData.getKeys();
List<EventColumn> eventColumns = eventData.getUpdatedColumns();
for (EventColumn eventColumn : keyColumns) {
rowMap.put(eventColumn.getColumnName().toUpperCase(), eventColumn.getColumnValue());
}
for (EventColumn eventColumn : eventColumns) {
rowMap.put(eventColumn.getColumnName().toUpperCase(), eventColumn.getColumnValue());
}
FileInfo[] fileInfos = fileResolver.getFileInfo(rowMap);
if (fileInfos == null || fileInfos.length == 0) {
return new ArrayList<FileData>();
} else {
List<FileData> fileDatas = new ArrayList<FileData>();
for (FileInfo fileInfo : fileInfos) {
FileData fileData = new FileData();
// 记录一下具体映射规则的id
fileData.setPairId(pairId);
fileData.setTableId(eventData.getTableId());
fileData.setEventType(eventData.getEventType());
fileData.setLastModifiedTime(fileInfo.getLastModifiedTime());
fileData.setNameSpace(fileInfo.getNamespace());
fileData.setPath(fileInfo.getPath());
fileData.setSize(fileInfo.getSize());
fileDatas.add(fileData);
}
return fileDatas;
}
}
use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.
the class FileLoadAction method moveFiles.
/**
* 多线程处理文件加载,使用 fast-fail 策略
*/
private void moveFiles(FileLoadContext context, List<FileData> fileDatas, File rootDir) {
Exception exception = null;
adjustPoolSize(context);
ExecutorCompletionService<Exception> executorComplition = new ExecutorCompletionService<Exception>(executor);
List<Future<Exception>> results = new ArrayList<Future<Exception>>();
for (FileData fileData : fileDatas) {
Future<Exception> future = executorComplition.submit(new FileLoadWorker(context, rootDir, fileData));
results.add(future);
// fast fail
if (future.isDone()) {
// 如果是自己执行的任务(线程池采用 CallerRunsPolicy),则立刻进行检查
try {
exception = future.get();
} catch (Exception e) {
exception = e;
}
if (exception != null) {
for (Future<Exception> result : results) {
if (!result.isDone() && !result.isCancelled()) {
result.cancel(true);
}
}
throw exception instanceof LoadException ? (LoadException) exception : new LoadException(exception);
}
}
}
int resultSize = results.size();
int cursor = 0;
while (cursor < resultSize) {
try {
Future<Exception> result = executorComplition.take();
exception = result.get();
} catch (Exception e) {
exception = e;
break;
}
cursor++;
}
if (cursor != resultSize) {
// 发现任务出错,立刻把正在进行的任务取消
for (Future<Exception> future : results) {
if (!future.isDone() && !future.isCancelled()) {
future.cancel(true);
}
}
}
if (exception != null) {
throw exception instanceof LoadException ? (LoadException) exception : new LoadException(exception);
}
}
Aggregations