use of com.alibaba.otter.shared.etl.model.Identity in project otter by alibaba.
the class RowDataHttpPipe method build.
// 从proto对象构造回object
private Identity build(BatchProto.Identity identityProto) {
Identity identity = new Identity();
identity.setChannelId(identityProto.getChannelId());
identity.setPipelineId(identityProto.getPipelineId());
identity.setProcessId(identityProto.getProcessId());
return identity;
}
use of com.alibaba.otter.shared.etl.model.Identity in project otter by alibaba.
the class SelectTask method processSelect.
private void processSelect() {
while (running) {
try {
// 等待ProcessTermin exhaust,会阻塞
// ProcessTermin发现出现rollback,会立即通知暂停,比分布式permit及时性高
canStartSelector.get();
// 判断当前是否为工作节点,S模块不能出现双节点工作,selector容易出现数据错乱
if (needCheck) {
checkContinueWork();
}
// 出现阻塞挂起时,等待mananger处理完成,解挂开启同步
// 出现rollback后能及时停住
arbitrateEventService.toolEvent().waitForPermit(pipelineId);
// 使用startVersion要解决的一个问题:出现rollback时,尽可能判断取出来的数据是rollback前还是rollback后,想办法丢弃rollback前的数据。
// (因为出现rollback,之前取出去的几个批次的数据其实是没有执行成功,get取出来的数据会是其后一批数据,如果不丢弃的话,会出现后面的数据先执行,然后又回到出错的点,再执行一遍)
// int startVersion = rversion.get();
Message gotMessage = otterSelector.selector();
// modify by ljh at 2012-09-10,startVersion获取操作应该放在拿到数据之后
// 放在前面 : (遇到一个并发bug)
// // a.
// 先拿startVersion,再获取数据,在拿数据过程中rollback开始并完成了,导致selector返回时数据已经取到了末尾
// // b. 在进行version判断时发现已经有变化,导致又触发一次拿数据的过程,此时的get
// cursor已经到队列的末尾,拿不出任何数据,所以出现死等情况
// 放在后面 : (一点点瑕疵)
// // a.
// 并发操作rollback和selector时,针对拿到rollback前的老数据,此时startVersion还未初始化,导致判断不出出现过rollback操作,后面的变更数据会提前同步
// (概率性会比较高,取决于selector和初始化startVersion的时间间隔)
int startVersion = rversion.get();
if (canStartSelector.state() == false) {
// 是否出现异常
// 回滚在出现异常的瞬间,拿出来的数据,因为otterSelector.selector()会循环,可能出现了rollback,其还未感知到
rollback(gotMessage.getId());
continue;
}
if (CollectionUtils.isEmpty(gotMessage.getDatas())) {
// 处理下空数据,也得更新下游标,可能是回环数据被过滤掉
// 添加到待响应的buffer列表,不需要await termin信号,因为没启动过s/e/t/l流程
batchBuffer.put(new BatchTermin(gotMessage.getId(), false));
continue;
}
final EtlEventData etlEventData = arbitrateEventService.selectEvent().await(pipelineId);
if (rversion.get() != startVersion) {
// 说明存在过变化,中间出现过rollback,需要丢弃该数据
logger.warn("rollback happend , should skip this data and get new message.");
// 确认一下rollback是否完成
canStartSelector.get();
// 这时不管有没有数据,都需要执行一次s/e/t/l
gotMessage = otterSelector.selector();
}
final Message message = gotMessage;
final BatchTermin batchTermin = new BatchTermin(message.getId(), etlEventData.getProcessId());
// 添加到待响应的buffer列表
batchBuffer.put(batchTermin);
Runnable task = new Runnable() {
public void run() {
// 设置profiling信息
boolean profiling = isProfiling();
Long profilingStartTime = null;
if (profiling) {
profilingStartTime = System.currentTimeMillis();
}
MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
String currentName = Thread.currentThread().getName();
Thread.currentThread().setName(createTaskName(pipelineId, "SelectWorker"));
try {
pipeline = configClientService.findPipeline(pipelineId);
List<EventData> eventData = message.getDatas();
long startTime = etlEventData.getStartTime();
if (!CollectionUtils.isEmpty(eventData)) {
startTime = eventData.get(0).getExecuteTime();
}
Channel channel = configClientService.findChannelByPipelineId(pipelineId);
RowBatch rowBatch = new RowBatch();
// 构造唯一标识
Identity identity = new Identity();
identity.setChannelId(channel.getId());
identity.setPipelineId(pipelineId);
identity.setProcessId(etlEventData.getProcessId());
rowBatch.setIdentity(identity);
// 进行数据合并
for (EventData data : eventData) {
rowBatch.merge(data);
}
long nextNodeId = etlEventData.getNextNid();
List<PipeKey> pipeKeys = rowDataPipeDelegate.put(new DbBatch(rowBatch), nextNodeId);
etlEventData.setDesc(pipeKeys);
etlEventData.setNumber((long) eventData.size());
// 使用原始数据的第一条
etlEventData.setFirstTime(startTime);
etlEventData.setBatchId(message.getId());
if (profiling) {
Long profilingEndTime = System.currentTimeMillis();
stageAggregationCollector.push(pipelineId, StageType.SELECT, new AggregationItem(profilingStartTime, profilingEndTime));
}
arbitrateEventService.selectEvent().single(etlEventData);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] selectwork executor is error! data:%s", pipelineId, etlEventData), e);
sendRollbackTermin(pipelineId, e);
} else {
logger.info(String.format("[%s] selectwork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
}
} finally {
Thread.currentThread().setName(currentName);
MDC.remove(OtterConstants.splitPipelineLogFileKey);
}
}
};
// 构造pending任务,可在关闭线程时退出任务
SetlFuture extractFuture = new SetlFuture(StageType.SELECT, etlEventData.getProcessId(), pendingFuture, task);
executorService.execute(extractFuture);
} catch (Throwable e) {
if (!isInterrupt(e)) {
logger.error(String.format("[%s] selectTask is error!", pipelineId), e);
sendRollbackTermin(pipelineId, e);
} else {
logger.info(String.format("[%s] selectTask is interrrupt!", pipelineId), e);
return;
}
}
}
}
use of com.alibaba.otter.shared.etl.model.Identity in project otter by alibaba.
the class FileLoadActionTest method testWithOutRootDir.
@Test
public void testWithOutRootDir() throws Exception {
File rootDir = new File("/null");
Identity id = buildIdentity(1L, 2L, 3L);
FileBatch fileBatch = buildFileBatch(id);
fileBatch.getFiles().addAll(buildFileDatas("ns_", EventType.INSERT, 0, 20, false));
try {
fileLoadAction.load(fileBatch, rootDir, null);
} catch (Exception e) {
// expect for LoadException
if (e instanceof LoadException) {
return;
}
throw e;
}
want.fail("unreachable code.");
}
use of com.alibaba.otter.shared.etl.model.Identity in project otter by alibaba.
the class LocalFileLoaderActionTest method test_load_file.
@Test
public void test_load_file() {
final Pipeline pipeline = new Pipeline();
pipeline.setId(100L);
List<DataMediaPair> pairs = generatorDataMediaPair(10);
pipeline.setPairs(pairs);
new NonStrictExpectations() {
{
configClientService.findPipeline(anyLong);
returns(pipeline);
}
};
Identity identity = new Identity();
identity.setChannelId(100L);
identity.setPipelineId(100L);
identity.setProcessId(100L);
FileBatch fileBatch = new FileBatch();
fileBatch.setIdentity(identity);
fileBatch.getFiles().addAll(generatorLocalFileData("fileLoad", 10));
WeightController controller = new WeightController(1);
fileLoadAction.load(fileBatch, new File(tmp + File.separator + OTTERLOAD), controller);
File target = new File(tmp + File.separator + OTTERLOAD + "_loaded/");
want.number(target.listFiles().length).isEqualTo(10);
NioUtils.delete(target);
}
use of com.alibaba.otter.shared.etl.model.Identity in project otter by alibaba.
the class OtterTransformerTest method test_rowData_oracle_mysql.
@Test
public void test_rowData_oracle_mysql() {
final Pipeline pipeline = new Pipeline();
pipeline.setId(100L);
List<DataMediaPair> pairs = new ArrayList<DataMediaPair>();
DataMediaPair pair1 = new DataMediaPair();
pair1.setId(1L);
pair1.setPipelineId(pipeline.getId());
pair1.setPullWeight(1L);
pair1.setPushWeight(1L);
DbDataMedia oracleMedia = getOracleMedia();
oracleMedia.setId(1L);
pair1.setSource(oracleMedia);
DbDataMedia mysqlMedia = getMysqlMedia();
pair1.setTarget(mysqlMedia);
pairs.add(pair1);
pipeline.setPairs(pairs);
PipelineParameter param = new PipelineParameter();
param.setSyncMode(SyncMode.ROW);
pipeline.setParameters(param);
new NonStrictExpectations() {
{
configClientService.findPipeline(anyLong);
returns(pipeline);
}
};
Identity identity = new Identity();
identity.setChannelId(100L);
identity.setPipelineId(100L);
identity.setProcessId(100L);
RowBatch rowBatch = new RowBatch();
rowBatch.setIdentity(identity);
EventData eventData = new EventData();
eventData.setTableId(1L);
eventData.setSchemaName("srf");
eventData.setTableName("columns");
eventData.setEventType(EventType.UPDATE);
eventData.setExecuteTime(100L);
eventData.getKeys().add(buildColumn("id", Types.NUMERIC, "1", true, false));
eventData.getKeys().add(buildColumn("name", Types.VARCHAR, "ljh", true, false));
eventData.getColumns().add(buildColumn("alias_name", Types.CHAR, "hello", false, false));
eventData.getColumns().add(buildColumn("amount", Types.NUMERIC, "100.01", false, false));
eventData.getColumns().add(buildColumn("text_b", Types.BLOB, "[116,101,120,116,95,98]", false, false));
eventData.getColumns().add(buildColumn("text_c", Types.CLOB, "text_c", false, false));
eventData.getColumns().add(buildColumn("curr_date", Types.DATE, "2011-01-01", false, false));
eventData.getColumns().add(buildColumn("gmt_create", Types.DATE, "2011-01-01 11:11:11", false, false));
eventData.getColumns().add(buildColumn("gmt_modify", Types.DATE, "2011-01-01 11:11:11", false, false));
rowBatch.merge(eventData);
Map<Class, BatchObject> batchs = otterTransformFactory.transform(rowBatch);
RowBatch result = (RowBatch) batchs.get(EventData.class);
want.number(result.getDatas().size()).isEqualTo(1);
}
Aggregations