use of com.alibaba.otter.shared.etl.model.FileBatch in project otter by alibaba.
the class RowDataHttpPipe method saveDbBatch.
// ======================== help method ===================
// 保存对应的dbBatch
private HttpPipeKey saveDbBatch(DbBatch dbBatch) {
RowBatch rowBatch = dbBatch.getRowBatch();
// 转化为proto对象
BatchProto.RowBatch.Builder rowBatchBuilder = BatchProto.RowBatch.newBuilder();
rowBatchBuilder.setIdentity(build(rowBatch.getIdentity()));
// 处理具体的字段rowData
for (EventData eventData : rowBatch.getDatas()) {
BatchProto.RowData.Builder rowDataBuilder = BatchProto.RowData.newBuilder();
rowDataBuilder.setPairId(eventData.getPairId());
rowDataBuilder.setTableId(eventData.getTableId());
if (eventData.getSchemaName() != null) {
rowDataBuilder.setSchemaName(eventData.getSchemaName());
}
rowDataBuilder.setTableName(eventData.getTableName());
rowDataBuilder.setEventType(eventData.getEventType().getValue());
rowDataBuilder.setExecuteTime(eventData.getExecuteTime());
// add by ljh at 2012-10-31
if (eventData.getSyncMode() != null) {
rowDataBuilder.setSyncMode(eventData.getSyncMode().getValue());
}
if (eventData.getSyncConsistency() != null) {
rowDataBuilder.setSyncConsistency(eventData.getSyncConsistency().getValue());
}
// 构造key column
for (EventColumn keyColumn : eventData.getKeys()) {
rowDataBuilder.addKeys(buildColumn(keyColumn));
}
// 构造old key column
if (CollectionUtils.isEmpty(eventData.getOldKeys()) == false) {
for (EventColumn keyColumn : eventData.getOldKeys()) {
rowDataBuilder.addOldKeys(buildColumn(keyColumn));
}
}
// 构造其他 column
for (EventColumn column : eventData.getColumns()) {
rowDataBuilder.addColumns(buildColumn(column));
}
rowDataBuilder.setRemedy(eventData.isRemedy());
rowDataBuilder.setSize(eventData.getSize());
if (StringUtils.isNotEmpty(eventData.getSql())) {
rowDataBuilder.setSql(eventData.getSql());
}
if (StringUtils.isNotEmpty(eventData.getDdlSchemaName())) {
rowDataBuilder.setDdlSchemaName(eventData.getDdlSchemaName());
}
if (StringUtils.isNotEmpty(eventData.getHint())) {
rowDataBuilder.setHint(eventData.getHint());
}
rowDataBuilder.setWithoutSchema(eventData.isWithoutSchema());
// 添加一条rowData记录
rowBatchBuilder.addRows(rowDataBuilder.build());
}
// 处理下FileBatch
FileBatch fileBatch = dbBatch.getFileBatch();
BatchProto.FileBatch.Builder fileBatchBuilder = null;
fileBatchBuilder = BatchProto.FileBatch.newBuilder();
fileBatchBuilder.setIdentity(build(fileBatch.getIdentity()));
// 构造对应的proto对象
for (FileData fileData : fileBatch.getFiles()) {
BatchProto.FileData.Builder fileDataBuilder = BatchProto.FileData.newBuilder();
fileDataBuilder.setPairId(fileData.getPairId());
fileDataBuilder.setTableId(fileData.getTableId());
if (fileData.getNameSpace() != null) {
fileDataBuilder.setNamespace(fileData.getNameSpace());
}
if (fileData.getPath() != null) {
fileDataBuilder.setPath(fileData.getPath());
}
fileDataBuilder.setEventType(fileData.getEventType().getValue());
fileDataBuilder.setSize(fileData.getSize());
fileDataBuilder.setLastModifiedTime(fileData.getLastModifiedTime());
// 添加一条fileData记录
fileBatchBuilder.addFiles(fileDataBuilder.build());
}
// 处理构造对应的文件url
String filename = buildFileName(rowBatch.getIdentity(), ClassUtils.getShortClassName(dbBatch.getClass()));
// 写入数据
File file = new File(htdocsDir, filename);
OutputStream output = null;
try {
output = new BufferedOutputStream(new FileOutputStream(file));
com.alibaba.otter.node.etl.model.protobuf.BatchProto.RowBatch rowBatchProto = rowBatchBuilder.build();
// 输出大小
output.write(ByteUtils.int2bytes(rowBatchProto.getSerializedSize()));
// 输出row batch
rowBatchProto.writeTo(output);
com.alibaba.otter.node.etl.model.protobuf.BatchProto.FileBatch fileBatchProto = fileBatchBuilder.build();
// 输出大小
output.write(ByteUtils.int2bytes(fileBatchProto.getSerializedSize()));
// 输出file batch
fileBatchProto.writeTo(output);
output.flush();
} catch (IOException e) {
throw new PipeException("write_byte_error", e);
} finally {
IOUtils.closeQuietly(output);
}
HttpPipeKey key = new HttpPipeKey();
key.setUrl(remoteUrlBuilder.getUrl(rowBatch.getIdentity().getPipelineId(), filename));
key.setDataType(PipeDataType.DB_BATCH);
key.setIdentity(rowBatch.getIdentity());
Pipeline pipeline = configClientService.findPipeline(rowBatch.getIdentity().getPipelineId());
if (pipeline.getParameters().getUseFileEncrypt()) {
// 加密处理
EncryptedData encryptedData = encryptFile(file);
key.setKey(encryptedData.getKey());
key.setCrc(encryptedData.getCrc());
}
return key;
}
use of com.alibaba.otter.shared.etl.model.FileBatch in project otter by alibaba.
the class FileBatchConflictDetectServiceImpl method onFileConflictDetect.
/**
* 具体冲突检测的行为
*/
private FileBatch onFileConflictDetect(FileConflictDetectEvent event) {
final FileBatch fileBatch = event.getFileBatch();
if (CollectionUtils.isEmpty(fileBatch.getFiles())) {
return fileBatch;
}
ExecutorTemplate executorTemplate = executorTemplateGetter.get();
try {
MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
executorTemplate.start();
// 重新设置下poolSize
Pipeline pipeline = configClientService.findPipeline(fileBatch.getIdentity().getPipelineId());
executorTemplate.adjustPoolSize(pipeline.getParameters().getFileLoadPoolSize());
// 启动
final List<FileData> result = Collections.synchronizedList(new ArrayList<FileData>());
final List<FileData> filter = Collections.synchronizedList(new ArrayList<FileData>());
for (final FileData source : fileBatch.getFiles()) {
EventType type = source.getEventType();
if (type.isDelete()) {
result.add(source);
} else {
executorTemplate.submit(new Runnable() {
public void run() {
MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
// 处理更新类型
String namespace = source.getNameSpace();
String path = source.getPath();
FileData target = null;
int count = 0;
while (count++ < retry) {
// 进行重试处理
try {
if (true == StringUtils.isBlank(namespace)) {
// local file
java.io.File targetFile = new java.io.File(path);
if (true == targetFile.exists()) {
// modified time cost
long lastModified = targetFile.lastModified();
long size = targetFile.length();
// 更新数据
target = new FileData();
target.setLastModifiedTime(lastModified);
target.setSize(size);
}
} else {
// remote file
throw new RuntimeException(source + " is not support!");
}
// 不出异常就跳出
break;
} catch (Exception ex) {
target = null;
}
}
boolean shouldSync = false;
if (target != null) {
if (true == accept(target, source)) {
shouldSync = true;
}
} else {
shouldSync = true;
}
if (true == shouldSync) {
result.add(source);
} else {
filter.add(source);
}
}
});
}
}
// 等待所有都处理完成
executorTemplate.waitForResult();
if (pipeline.getParameters().getDumpEvent() && logger.isInfoEnabled()) {
logger.info(FileloadDumper.dumpFilterFileDatas(fileBatch.getIdentity(), fileBatch.getFiles().size(), result.size(), filter));
}
// 构造返回结果
FileBatch target = new FileBatch();
target.setIdentity(fileBatch.getIdentity());
target.setFiles(result);
return target;
} finally {
if (executorTemplate != null) {
executorTemplateGetter.release(executorTemplate);
}
MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
}
}
use of com.alibaba.otter.shared.etl.model.FileBatch in project otter by alibaba.
the class ExtractTask method run.
public void run() {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
while (running) {
try {
final EtlEventData etlEventData = arbitrateEventService.extractEvent().await(pipelineId);
Runnable task = new Runnable() {
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "ExtractWorker"));
try {
pipeline = configClientService.findPipeline(pipelineId);
List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
long nextNodeId = etlEventData.getNextNid();
DbBatch dbBatch = rowDataPipeDelegate.get(keys);
// 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
if (dbBatch == null) {
processMissData(pipelineId, "extract miss data with keys:" + keys.toString());
return;
}
// 重新装配一下数据
otterExtractorFactory.extract(dbBatch);
if (dbBatch.getFileBatch() != null && !CollectionUtils.isEmpty(dbBatch.getFileBatch().getFiles()) && pipeline.getParameters().getFileDetect()) {
// 判断一下是否有文件同步,并且需要进行文件对比
// 对比一下中美图片是否有变化
FileBatch fileBatch = fileBatchConflictDetectService.detect(dbBatch.getFileBatch(), nextNodeId);
dbBatch.setFileBatch(fileBatch);
}
List<PipeKey> pipeKeys = rowDataPipeDelegate.put(dbBatch, nextNodeId);
etlEventData.setDesc(pipeKeys);
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.EXTRACT, new AggregationItem(profilingStartTime, profilingEndTime));
}
arbitrateEventService.extractEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%d] extractwork executor is error! data:%s", pipelineId, etlEventData), e);
sendRollbackTermin(pipelineId, e);
} else {
logger.info(String.format("[%d] extractwork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.EXTRACT, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (isInterrupt(e)) {
logger.info(String.format("[%s] extractTask is interrupted!", pipelineId), e);
return;
} else {
logger.error(String.format("[%s] extractTask is error!", pipelineId), e);
sendRollbackTermin(pipelineId, e);
}
}
}
}
use of com.alibaba.otter.shared.etl.model.FileBatch in project otter by alibaba.
the class FileExtractor method extract.
public void extract(DbBatch dbBatch) throws ExtractException {
List<FileData> fileDatas = doFileExtract(dbBatch.getRowBatch());
FileBatch fileBatch = new FileBatch();
fileBatch.setFiles(fileDatas);
Identity identity = new Identity();
identity.setChannelId(dbBatch.getRowBatch().getIdentity().getChannelId());
identity.setPipelineId(dbBatch.getRowBatch().getIdentity().getPipelineId());
identity.setProcessId(dbBatch.getRowBatch().getIdentity().getProcessId());
fileBatch.setIdentity(identity);
dbBatch.setFileBatch(fileBatch);
}
use of com.alibaba.otter.shared.etl.model.FileBatch in project otter by alibaba.
the class RowDataHttpPipe method getDbBatch.
// 处理对应的dbBatch
private DbBatch getDbBatch(HttpPipeKey key) {
String dataUrl = key.getUrl();
Pipeline pipeline = configClientService.findPipeline(key.getIdentity().getPipelineId());
DataRetriever dataRetriever = dataRetrieverFactory.createRetriever(pipeline.getParameters().getRetriever(), dataUrl, downloadDir);
File archiveFile = null;
try {
dataRetriever.connect();
dataRetriever.doRetrieve();
archiveFile = dataRetriever.getDataAsFile();
} catch (Exception e) {
dataRetriever.abort();
throw new PipeException("download_error", e);
} finally {
dataRetriever.disconnect();
}
// 处理下有加密的数据
if (StringUtils.isNotEmpty(key.getKey()) && StringUtils.isNotEmpty(key.getCrc())) {
decodeFile(archiveFile, key.getKey(), key.getCrc());
}
InputStream input = null;
JSONReader reader = null;
try {
input = new BufferedInputStream(new FileInputStream(archiveFile));
DbBatch dbBatch = new DbBatch();
byte[] lengthBytes = new byte[4];
input.read(lengthBytes);
int length = ByteUtils.bytes2int(lengthBytes);
BatchProto.RowBatch rowbatchProto = BatchProto.RowBatch.parseFrom(new LimitedInputStream(input, length));
// 构造原始的model对象
RowBatch rowBatch = new RowBatch();
rowBatch.setIdentity(build(rowbatchProto.getIdentity()));
for (BatchProto.RowData rowDataProto : rowbatchProto.getRowsList()) {
EventData eventData = new EventData();
eventData.setPairId(rowDataProto.getPairId());
eventData.setTableId(rowDataProto.getTableId());
eventData.setTableName(rowDataProto.getTableName());
eventData.setSchemaName(rowDataProto.getSchemaName());
eventData.setEventType(EventType.valuesOf(rowDataProto.getEventType()));
eventData.setExecuteTime(rowDataProto.getExecuteTime());
// add by ljh at 2012-10-31
if (StringUtils.isNotEmpty(rowDataProto.getSyncMode())) {
eventData.setSyncMode(SyncMode.valuesOf(rowDataProto.getSyncMode()));
}
if (StringUtils.isNotEmpty(rowDataProto.getSyncConsistency())) {
eventData.setSyncConsistency(SyncConsistency.valuesOf(rowDataProto.getSyncConsistency()));
}
// 处理主键
List<EventColumn> keys = new ArrayList<EventColumn>();
for (BatchProto.Column columnProto : rowDataProto.getKeysList()) {
keys.add(buildColumn(columnProto));
}
eventData.setKeys(keys);
// 处理old主键
if (CollectionUtils.isEmpty(rowDataProto.getOldKeysList()) == false) {
List<EventColumn> oldKeys = new ArrayList<EventColumn>();
for (BatchProto.Column columnProto : rowDataProto.getOldKeysList()) {
oldKeys.add(buildColumn(columnProto));
}
eventData.setOldKeys(oldKeys);
}
// 处理具体的column value
List<EventColumn> columns = new ArrayList<EventColumn>();
for (BatchProto.Column columnProto : rowDataProto.getColumnsList()) {
columns.add(buildColumn(columnProto));
}
eventData.setColumns(columns);
eventData.setRemedy(rowDataProto.getRemedy());
eventData.setSize(rowDataProto.getSize());
eventData.setSql(rowDataProto.getSql());
eventData.setDdlSchemaName(rowDataProto.getDdlSchemaName());
eventData.setHint(rowDataProto.getHint());
eventData.setWithoutSchema(rowDataProto.getWithoutSchema());
// 添加到总记录
rowBatch.merge(eventData);
}
dbBatch.setRowBatch(rowBatch);
input.read(lengthBytes);
length = ByteUtils.bytes2int(lengthBytes);
BatchProto.FileBatch filebatchProto = BatchProto.FileBatch.parseFrom(new LimitedInputStream(input, length));
// 构造原始的model对象
FileBatch fileBatch = new FileBatch();
fileBatch.setIdentity(build(filebatchProto.getIdentity()));
for (BatchProto.FileData fileDataProto : filebatchProto.getFilesList()) {
FileData fileData = new FileData();
fileData.setPairId(fileDataProto.getPairId());
fileData.setTableId(fileDataProto.getTableId());
fileData.setEventType(EventType.valuesOf(fileDataProto.getEventType()));
fileData.setLastModifiedTime(fileDataProto.getLastModifiedTime());
fileData.setNameSpace(fileDataProto.getNamespace());
fileData.setPath(fileDataProto.getPath());
fileData.setSize(fileDataProto.getSize());
// 添加到filebatch中
fileBatch.getFiles().add(fileData);
}
dbBatch.setFileBatch(fileBatch);
return dbBatch;
} catch (IOException e) {
throw new PipeException("deserial_error", e);
} finally {
IOUtils.closeQuietly(reader);
}
}
Aggregations