use of com.alibaba.otter.shared.etl.model.BatchObject in project otter by alibaba.
the class OtterTransformerTest method test_rowData_mysql_oracle.
@Test
public void test_rowData_mysql_oracle() {
final Pipeline pipeline = new Pipeline();
pipeline.setId(100L);
List<DataMediaPair> pairs = new ArrayList<DataMediaPair>();
DataMediaPair pair1 = new DataMediaPair();
pair1.setId(1L);
pair1.setPipelineId(pipeline.getId());
pair1.setPullWeight(1L);
pair1.setPushWeight(1L);
DbDataMedia mysqlMedia = getMysqlMedia();
mysqlMedia.setId(1L);
pair1.setSource(mysqlMedia);
DbDataMedia oracleMedia = getOracleMedia();
pair1.setTarget(oracleMedia);
pairs.add(pair1);
pipeline.setPairs(pairs);
PipelineParameter param = new PipelineParameter();
param.setSyncMode(SyncMode.ROW);
pipeline.setParameters(param);
new NonStrictExpectations() {
{
configClientService.findPipeline(anyLong);
returns(pipeline);
}
};
Identity identity = new Identity();
identity.setChannelId(100L);
identity.setPipelineId(100L);
identity.setProcessId(100L);
RowBatch rowBatch = new RowBatch();
rowBatch.setIdentity(identity);
EventData eventData = new EventData();
eventData.setTableId(1L);
eventData.setSchemaName("srf");
eventData.setTableName("columns");
eventData.setEventType(EventType.UPDATE);
eventData.setExecuteTime(100L);
eventData.getKeys().add(buildColumn("id", Types.INTEGER, "1", true, false));
eventData.getKeys().add(buildColumn("name", Types.VARCHAR, "ljh", true, false));
eventData.getColumns().add(buildColumn("alias_name", Types.CHAR, "hello", false, false));
eventData.getColumns().add(buildColumn("amount", Types.DECIMAL, "100.01", false, false));
eventData.getColumns().add(buildColumn("text_b", Types.BLOB, "[116,101,120,116,95,98]", false, false));
eventData.getColumns().add(buildColumn("text_c", Types.CLOB, "text_c", false, false));
eventData.getColumns().add(buildColumn("curr_date", Types.DATE, "2011-01-01", false, false));
eventData.getColumns().add(buildColumn("gmt_create", Types.TIMESTAMP, "2011-01-01 11:11:11", false, false));
eventData.getColumns().add(buildColumn("gmt_modify", Types.TIMESTAMP, "2011-01-01 11:11:11", false, false));
rowBatch.merge(eventData);
Map<Class, BatchObject> batchs = otterTransformFactory.transform(rowBatch);
RowBatch result = (RowBatch) batchs.get(EventData.class);
want.number(result.getDatas().size()).isEqualTo(1);
}
use of com.alibaba.otter.shared.etl.model.BatchObject in project otter by alibaba.
the class OtterTransformerTest method test_fileData.
@Test
public void test_fileData() {
final Pipeline pipeline = new Pipeline();
pipeline.setId(100L);
List<DataMediaPair> pairs = new ArrayList<DataMediaPair>();
DataMediaPair pair1 = new DataMediaPair();
pair1.setId(1L);
pair1.setPipelineId(pipeline.getId());
pair1.setPullWeight(1L);
pair1.setPushWeight(1L);
DbDataMedia oracleMedia = getOracleMedia();
oracleMedia.setId(1L);
pair1.setSource(oracleMedia);
DbDataMedia mysqlMedia = getMysqlMedia();
pair1.setTarget(mysqlMedia);
pairs.add(pair1);
pipeline.setPairs(pairs);
new NonStrictExpectations() {
{
configClientService.findPipeline(anyLong);
returns(pipeline);
}
};
Identity identity = new Identity();
identity.setChannelId(100L);
identity.setPipelineId(100L);
identity.setProcessId(100L);
FileBatch fileBatch = new FileBatch();
fileBatch.setIdentity(identity);
File localFile = new File("/tmp", "httpPipeTest.jpg");
FileData localFileData = new FileData();
localFileData.setTableId(1L);
localFileData.setPairId(1L);
localFileData.setPath(localFile.getPath());
fileBatch.getFiles().add(localFileData);
Map<Class, BatchObject> batchs = otterTransformFactory.transform(fileBatch);
FileBatch result = (FileBatch) batchs.get(FileData.class);
want.number(result.getFiles().size()).isEqualTo(1);
}
use of com.alibaba.otter.shared.etl.model.BatchObject in project otter by alibaba.
the class TransformTask method run.
public void run() {
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
while (running) {
try {
final EtlEventData etlEventData = arbitrateEventService.transformEvent().await(pipelineId);
Runnable task = new Runnable() {
@Override
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "transformWorker"));
try {
// 后续可判断同步数据是否为rowData
List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
DbBatch dbBatch = rowDataPipeDelegate.get(keys);
// 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
if (dbBatch == null) {
processMissData(pipelineId, "transform miss data with keys:" + keys.toString());
return;
}
// 根据对应的tid,转化为目标端的tid。后续可进行字段的加工处理
// 暂时认为rowBatchs和fileBatchs不会有异构数据的转化
Map<Class, BatchObject> dataBatchs = otterTransformerFactory.transform(dbBatch.getRowBatch());
// 可能存在同一个Pipeline下有Mq和Db两种同步类型
dbBatch.setRowBatch((RowBatch) dataBatchs.get(EventData.class));
if (dbBatch.getFileBatch() != null) {
Map<Class, BatchObject> fileBatchs = otterTransformerFactory.transform(dbBatch.getFileBatch());
dbBatch.setFileBatch((FileBatch) fileBatchs.get(FileData.class));
}
// 传递给下一个流程
List<PipeKey> nextKeys = rowDataPipeDelegate.put(dbBatch, etlEventData.getNextNid());
etlEventData.setDesc(nextKeys);
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.TRANSFORM, new AggregationItem(profilingStartTime, profilingEndTime));
}
// 处理完成后通知single已完成
arbitrateEventService.transformEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] transformWork executor is error! data:%s", pipelineId, etlEventData), e);
sendRollbackTermin(pipelineId, e);
} else {
logger.info(String.format("[%s] transformWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.TRANSFORM, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (isInterrupt(e)) {
logger.info(String.format("[%s] transformTask is interrupted!", pipelineId), e);
return;
} else {
logger.error(String.format("[%s] transformTask is error!", pipelineId), e);
sendRollbackTermin(pipelineId, e);
}
}
}
}
use of com.alibaba.otter.shared.etl.model.BatchObject in project otter by alibaba.
the class OtterTransformerTest method test_rowData_oracle_mysql.
@Test
public void test_rowData_oracle_mysql() {
final Pipeline pipeline = new Pipeline();
pipeline.setId(100L);
List<DataMediaPair> pairs = new ArrayList<DataMediaPair>();
DataMediaPair pair1 = new DataMediaPair();
pair1.setId(1L);
pair1.setPipelineId(pipeline.getId());
pair1.setPullWeight(1L);
pair1.setPushWeight(1L);
DbDataMedia oracleMedia = getOracleMedia();
oracleMedia.setId(1L);
pair1.setSource(oracleMedia);
DbDataMedia mysqlMedia = getMysqlMedia();
pair1.setTarget(mysqlMedia);
pairs.add(pair1);
pipeline.setPairs(pairs);
PipelineParameter param = new PipelineParameter();
param.setSyncMode(SyncMode.ROW);
pipeline.setParameters(param);
new NonStrictExpectations() {
{
configClientService.findPipeline(anyLong);
returns(pipeline);
}
};
Identity identity = new Identity();
identity.setChannelId(100L);
identity.setPipelineId(100L);
identity.setProcessId(100L);
RowBatch rowBatch = new RowBatch();
rowBatch.setIdentity(identity);
EventData eventData = new EventData();
eventData.setTableId(1L);
eventData.setSchemaName("srf");
eventData.setTableName("columns");
eventData.setEventType(EventType.UPDATE);
eventData.setExecuteTime(100L);
eventData.getKeys().add(buildColumn("id", Types.NUMERIC, "1", true, false));
eventData.getKeys().add(buildColumn("name", Types.VARCHAR, "ljh", true, false));
eventData.getColumns().add(buildColumn("alias_name", Types.CHAR, "hello", false, false));
eventData.getColumns().add(buildColumn("amount", Types.NUMERIC, "100.01", false, false));
eventData.getColumns().add(buildColumn("text_b", Types.BLOB, "[116,101,120,116,95,98]", false, false));
eventData.getColumns().add(buildColumn("text_c", Types.CLOB, "text_c", false, false));
eventData.getColumns().add(buildColumn("curr_date", Types.DATE, "2011-01-01", false, false));
eventData.getColumns().add(buildColumn("gmt_create", Types.DATE, "2011-01-01 11:11:11", false, false));
eventData.getColumns().add(buildColumn("gmt_modify", Types.DATE, "2011-01-01 11:11:11", false, false));
rowBatch.merge(eventData);
Map<Class, BatchObject> batchs = otterTransformFactory.transform(rowBatch);
RowBatch result = (RowBatch) batchs.get(EventData.class);
want.number(result.getDatas().size()).isEqualTo(1);
}
use of com.alibaba.otter.shared.etl.model.BatchObject in project otter by alibaba.
the class OtterTransformerFactory method transform.
/**
* 将一种源数据进行转化,最后得到的结果会根据DataMediaPair中定义的目标对象生成不同的数据对象 <br/>
*
* <pre>
* 返回对象格式:Map
* key : Class对象,代表生成的目标数据对象
* value : 每种目标数据对象的集合数据
* </pre>
*/
public Map<Class, BatchObject> transform(RowBatch rowBatch) {
final Identity identity = translateIdentity(rowBatch.getIdentity());
Map<Class, BatchObject> result = new HashMap<Class, BatchObject>();
// 初始化默认值
result.put(EventData.class, initBatchObject(identity, EventData.class));
for (EventData eventData : rowBatch.getDatas()) {
// 处理eventData
Long tableId = eventData.getTableId();
Pipeline pipeline = configClientService.findPipeline(identity.getPipelineId());
// 针对每个同步数据,可能会存在多路复制的情况
List<DataMediaPair> dataMediaPairs = ConfigHelper.findDataMediaPairByMediaId(pipeline, tableId);
for (DataMediaPair pair : dataMediaPairs) {
if (!pair.getSource().getId().equals(tableId)) {
// 过滤tableID不为源的同步
continue;
}
OtterTransformer translate = lookup(pair.getSource(), pair.getTarget());
// 进行转化
Object item = translate.transform(eventData, new OtterTransformerContext(identity, pair, pipeline));
if (item == null) {
continue;
}
// 合并结果
merge(identity, result, item);
}
}
return result;
}
Aggregations