use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.
the class FileLoadAction method load.
/**
* 返回结果为已处理成功的记录
*/
public FileLoadContext load(FileBatch fileBatch, File rootDir, WeightController controller) {
if (false == rootDir.exists()) {
throw new LoadException(rootDir.getPath() + " is not exist");
}
FileLoadContext context = buildContext(fileBatch.getIdentity());
context.setPrepareDatas(fileBatch.getFiles());
boolean isDryRun = context.getPipeline().getParameters().isDryRun();
try {
// 复制成功的文件信息
WeightBuckets<FileData> buckets = buildWeightBuckets(fileBatch.getIdentity(), fileBatch.getFiles());
List<Long> weights = buckets.weights();
controller.start(weights);
// 处理数据
for (int i = 0; i < weights.size(); i++) {
Long weight = weights.get(i);
controller.await(weight.intValue());
if (logger.isInfoEnabled()) {
logger.debug("##start load for weight:{}\n", weight);
}
// 处理同一个weight下的数据
List<FileData> items = buckets.getItems(weight);
if (context.getPipeline().getParameters().isDryRun()) {
dryRun(context, items, rootDir);
} else {
moveFiles(context, items, rootDir);
}
controller.single(weight.intValue());
if (logger.isInfoEnabled()) {
logger.debug("##end load for weight:{}\n", weight);
}
}
if (dump || isDryRun) {
MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
logger.info(FileloadDumper.dumpContext("successed", context));
MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
if (dump || isDryRun) {
MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
logger.info(FileloadDumper.dumpContext("error", context));
MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
}
} catch (Exception e) {
if (dump || isDryRun) {
MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
logger.info(FileloadDumper.dumpContext("error", context));
MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
}
throw new LoadException(e);
} finally {
// 不论是否移动成功,删除临时目录
NioUtils.delete(rootDir, 3);
}
return context;
}
use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.
the class FileLoadAction method buildWeightBuckets.
/**
* 构建基于weight权重分组的item集合列表
*/
private WeightBuckets<FileData> buildWeightBuckets(Identity identity, List<FileData> datas) {
WeightBuckets<FileData> buckets = new WeightBuckets<FileData>();
for (FileData data : datas) {
// 获取对应的weight
DataMediaPair pair = ConfigHelper.findDataMediaPair(getPipeline(identity), data.getPairId());
buckets.addItem(pair.getPushWeight(), data);
}
return buckets;
}
use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.
the class FileExtractor method extract.
public void extract(DbBatch dbBatch) throws ExtractException {
List<FileData> fileDatas = doFileExtract(dbBatch.getRowBatch());
FileBatch fileBatch = new FileBatch();
fileBatch.setFiles(fileDatas);
Identity identity = new Identity();
identity.setChannelId(dbBatch.getRowBatch().getIdentity().getChannelId());
identity.setPipelineId(dbBatch.getRowBatch().getIdentity().getPipelineId());
identity.setProcessId(dbBatch.getRowBatch().getIdentity().getProcessId());
fileBatch.setIdentity(identity);
dbBatch.setFileBatch(fileBatch);
}
use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.
the class FileExtractor method doFileDetectCollector.
private void doFileDetectCollector(Pipeline pipeline, List<FileData> fileDatas) {
ExecutorTemplate executorTemplate = executorTemplateGetter.get();
try {
executorTemplate.start();
// 重新设置下poolSize
executorTemplate.adjustPoolSize(pipeline.getParameters().getFileLoadPoolSize());
for (final FileData fileData : fileDatas) {
// 提交进行多线程处理
executorTemplate.submit(new Runnable() {
public void run() {
boolean isAranda = StringUtils.isNotEmpty(fileData.getNameSpace());
int count = 0;
Throwable exception = null;
while (count++ < retry) {
try {
if (isAranda) {
// remote file
throw new RuntimeException(fileData + " is not support!");
} else {
// 处理本地文件
File file = new File(fileData.getPath());
fileData.setLastModifiedTime(file.lastModified());
fileData.setSize(file.length());
}
// 没有异常就退出
return;
} catch (Exception e) {
fileData.setLastModifiedTime(Long.MIN_VALUE);
fileData.setSize(Long.MIN_VALUE);
exception = e;
}
}
if (count >= retry) {
logger.warn(String.format("FileDetectCollector is error! collect failed[%s]", fileData.getNameSpace() + "/" + fileData.getPath()), exception);
}
}
});
}
long start = System.currentTimeMillis();
logger.info("start pipelinep[{}] waitFor FileData Size : {} ", pipeline.getId(), fileDatas.size());
// 等待所有都处理完成
executorTemplate.waitForResult();
logger.info("end pipelinep[{}] waitFor FileData cost : {} ms ", pipeline.getId(), (System.currentTimeMillis() - start));
} finally {
if (executorTemplate != null) {
executorTemplateGetter.release(executorTemplate);
}
}
}
use of com.alibaba.otter.shared.etl.model.FileData in project otter by alibaba.
the class AttachmentHttpPipe method archiveFile.
// 处理对应的附件
private HttpPipeKey archiveFile(final FileBatch fileBatch) {
// 处理构造对应的文件url
String filename = buildFileName(fileBatch.getIdentity(), ClassUtils.getShortClassName(fileBatch.getClass()));
File file = new File(htdocsDir, filename);
// 压缩对应的文件数据
List<FileData> fileDatas = fileBatch.getFiles();
Pipeline pipeline = configClientService.findPipeline(fileBatch.getIdentity().getPipelineId());
int poolSize = pipeline.getParameters().getFileLoadPoolSize();
boolean useLocalFileMutliThread = pipeline.getParameters().getUseLocalFileMutliThread();
ArchiveBean archiveBean = getArchiveBean();
// 调整线程池大小
archiveBean.adjustPoolSize(poolSize);
// 设置是否启用local多线程同步
archiveBean.setUseLocalFileMutliThread(useLocalFileMutliThread);
boolean done = archiveBean.pack(file, fileDatas, new ArchiveRetriverCallback<FileData>() {
public InputStream retrive(FileData fileData) {
boolean miss = false;
try {
if (StringUtils.isNotEmpty(fileData.getNameSpace())) {
throw new RuntimeException(fileData + " is not support!");
} else {
File source = new File(fileData.getPath());
if (source.exists() && source.isFile()) {
return new LazyFileInputStream(source);
} else {
miss = true;
return null;
}
}
} finally {
if (miss && logger.isInfoEnabled()) {
MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
logger.info(FileloadDumper.dumpMissFileDatas(fileBatch.getIdentity(), fileData));
}
}
}
});
if (done == false) {
// 直接返回
return null;
}
HttpPipeKey key = new HttpPipeKey();
key.setUrl(remoteUrlBuilder.getUrl(fileBatch.getIdentity().getPipelineId(), filename));
key.setDataType(PipeDataType.FILE_BATCH);
key.setIdentity(fileBatch.getIdentity());
if (encrypt || pipeline.getParameters().getUseFileEncrypt()) {
// 加密处理
EncryptedData encryptedData = encryptFile(file);
key.setKey(encryptedData.getKey());
key.setCrc(encryptedData.getCrc());
}
return key;
}
Aggregations