Search in sources :

Example 1 with DbBatch

use of com.alibaba.otter.shared.etl.model.DbBatch in project otter by alibaba.

the class TransformTask method run.

public void run() {
    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
    while (running) {
        try {
            final EtlEventData etlEventData = arbitrateEventService.transformEvent().await(pipelineId);
            Runnable task = new Runnable() {

                @Override
                public void run() {
                    // 设置profiling信息
                    boolean profiling = isProfiling();
                    Long profilingStartTime = null;
                    if (profiling) {
                        profilingStartTime = System.currentTimeMillis();
                    }
                    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
                    String currentName = Thread.currentThread().getName();
                    Thread.currentThread().setName(createTaskName(pipelineId, "transformWorker"));
                    try {
                        // 后续可判断同步数据是否为rowData
                        List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
                        DbBatch dbBatch = rowDataPipeDelegate.get(keys);
                        // 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
                        if (dbBatch == null) {
                            processMissData(pipelineId, "transform miss data with keys:" + keys.toString());
                            return;
                        }
                        // 根据对应的tid,转化为目标端的tid。后续可进行字段的加工处理
                        // 暂时认为rowBatchs和fileBatchs不会有异构数据的转化
                        Map<Class, BatchObject> dataBatchs = otterTransformerFactory.transform(dbBatch.getRowBatch());
                        // 可能存在同一个Pipeline下有Mq和Db两种同步类型
                        dbBatch.setRowBatch((RowBatch) dataBatchs.get(EventData.class));
                        if (dbBatch.getFileBatch() != null) {
                            Map<Class, BatchObject> fileBatchs = otterTransformerFactory.transform(dbBatch.getFileBatch());
                            dbBatch.setFileBatch((FileBatch) fileBatchs.get(FileData.class));
                        }
                        // 传递给下一个流程
                        List<PipeKey> nextKeys = rowDataPipeDelegate.put(dbBatch, etlEventData.getNextNid());
                        etlEventData.setDesc(nextKeys);
                        if (profiling) {
                            Long profilingEndTime = System.currentTimeMillis();
                            stageAggregationCollector.push(pipelineId, StageType.TRANSFORM, new AggregationItem(profilingStartTime, profilingEndTime));
                        }
                        // 处理完成后通知single已完成
                        arbitrateEventService.transformEvent().single(etlEventData);
                    } catch (Throwable e) {
                        if (!isInterrupt(e)) {
                            logger.error(String.format("[%s] transformWork executor is error! data:%s", pipelineId, etlEventData), e);
                            sendRollbackTermin(pipelineId, e);
                        } else {
                            logger.info(String.format("[%s] transformWork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
                        }
                    } finally {
                        Thread.currentThread().setName(currentName);
                        MDC.remove(OtterConstants.splitPipelineLogFileKey);
                    }
                }
            };
            // 构造pending任务,可在关闭线程时退出任务
            SetlFuture extractFuture = new SetlFuture(StageType.TRANSFORM, etlEventData.getProcessId(), pendingFuture, task);
            executorService.execute(extractFuture);
        } catch (Throwable e) {
            if (isInterrupt(e)) {
                logger.info(String.format("[%s] transformTask is interrupted!", pipelineId), e);
                return;
            } else {
                logger.error(String.format("[%s] transformTask is error!", pipelineId), e);
                sendRollbackTermin(pipelineId, e);
            }
        }
    }
}
Also used : PipeKey(com.alibaba.otter.node.etl.common.pipe.PipeKey) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData) BatchObject(com.alibaba.otter.shared.etl.model.BatchObject) AggregationItem(com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem) List(java.util.List) SetlFuture(com.alibaba.otter.node.etl.extract.SetlFuture)

Example 2 with DbBatch

use of com.alibaba.otter.shared.etl.model.DbBatch in project otter by alibaba.

the class MemoryPipeTest method test_timeout.

@Test
public void test_timeout() {
    RowDataMemoryPipe pipe = new RowDataMemoryPipe();
    // 1s后超时
    pipe.setTimeout(1 * 1000L);
    pipe.setDownloadDir(tmp);
    try {
        pipe.afterPropertiesSet();
    } catch (Exception e) {
        want.fail();
    }
    DbBatch source = new DbBatch();
    RowBatch rowBatch = new RowBatch();
    Identity identity = new Identity();
    identity.setChannelId(100L);
    identity.setPipelineId(100L);
    identity.setProcessId(100L);
    rowBatch.setIdentity(identity);
    source.setRowBatch(rowBatch);
    MemoryPipeKey key = pipe.put(source);
    try {
        Thread.sleep(1500L);
    } catch (InterruptedException e) {
        want.fail();
    }
    DbBatch target = pipe.get(key);
    // 返回结果为空
    want.bool(target == null).is(true);
}
Also used : RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) MemoryPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.memory.MemoryPipeKey) Identity(com.alibaba.otter.shared.etl.model.Identity) RowDataMemoryPipe(com.alibaba.otter.node.etl.common.pipe.impl.memory.RowDataMemoryPipe) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) Test(org.testng.annotations.Test) BaseOtterTest(com.alibaba.otter.node.etl.BaseOtterTest)

Example 3 with DbBatch

use of com.alibaba.otter.shared.etl.model.DbBatch in project otter by alibaba.

the class DatabaseExtractorTest method test_global_row.

@Test
public void test_global_row() {
    final Pipeline pipeline = new Pipeline();
    pipeline.setId(100L);
    pipeline.getParameters().setSyncMode(SyncMode.ROW);
    // 设置为全局
    pipeline.getParameters().setSyncConsistency(SyncConsistency.MEDIA);
    int start = RandomUtils.nextInt();
    int count = 10;
    List<DataMediaPair> pairs = getDataMediaPairForMysql(start, count);
    pipeline.setPairs(pairs);
    new NonStrictExpectations() {

        {
            configClientService.findPipeline(100L);
            returns(pipeline);
        }
    };
    // 构造数据
    RowBatch rowBatch = new RowBatch();
    rowBatch.setIdentity(identity);
    for (int tableId = start; tableId < start + count; tableId++) {
        for (int i = start; i < start + count; i++) {
            EventData eventData = getEventData(tableId, i);
            eventData.setSchemaName("srf");
            eventData.setTableName("columns");
            rowBatch.merge(eventData);
        }
    }
    databaseExtractor.extract(new DbBatch(rowBatch));
    want.number(rowBatch.getDatas().size()).isEqualTo(count);
}
Also used : DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) EventData(com.alibaba.otter.shared.etl.model.EventData) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) Test(org.testng.annotations.Test) BaseDbTest(com.alibaba.otter.node.etl.BaseDbTest)

Example 4 with DbBatch

use of com.alibaba.otter.shared.etl.model.DbBatch in project otter by alibaba.

the class FreedomExtractorTest method test_mysql.

@Test
public void test_mysql() {
    final Pipeline pipeline = new Pipeline();
    pipeline.setId(100L);
    int start = RandomUtils.nextInt();
    int count = 10;
    List<DataMediaPair> pairs = getDataMediaPairForMysql(start, count);
    pipeline.setPairs(pairs);
    new NonStrictExpectations() {

        {
            configClientService.findPipeline(100L);
            returns(pipeline);
        }
    };
    // 构造数据
    RowBatch rowBatch = new RowBatch();
    rowBatch.setIdentity(identity);
    for (int tableId = start; tableId < start + count; tableId++) {
        for (int i = start; i < start + count; i++) {
            EventData eventData = getEventData(tableId, i);
            eventData.setSchemaName("retl");
            eventData.setTableName("retl_buffer");
            rowBatch.merge(eventData);
        }
    }
    DbBatch dbBatch = new DbBatch(rowBatch);
    freedomExtractor.extract(dbBatch);
    want.collection(dbBatch.getRowBatch().getDatas()).sizeEq(count * count);
}
Also used : DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) EventData(com.alibaba.otter.shared.etl.model.EventData) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) Test(org.testng.annotations.Test) BaseDbTest(com.alibaba.otter.node.etl.BaseDbTest)

Example 5 with DbBatch

use of com.alibaba.otter.shared.etl.model.DbBatch in project otter by alibaba.

the class OtterLoaderFactoryIntegration method test_simple.

@Test
public void test_simple() {
    Identity identity = new Identity();
    identity.setChannelId(100L);
    identity.setPipelineId(100L);
    identity.setProcessId(100L);
    RowBatch rowBatch = new RowBatch();
    rowBatch.setIdentity(identity);
    FileBatch fileBatch = new FileBatch();
    fileBatch.setIdentity(identity);
    final DbBatch dbBatch = new DbBatch();
    dbBatch.setRowBatch(rowBatch);
    dbBatch.setFileBatch(fileBatch);
    final CountDownLatch latch = new CountDownLatch(1);
    executorService.submit(new Runnable() {

        public void run() {
            System.out.println("first run!!!!!!");
            otterLoaderFactory.load(dbBatch);
            latch.countDown();
        }
    });
    try {
        latch.await();
    } catch (InterruptedException e) {
    }
}
Also used : FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) Identity(com.alibaba.otter.shared.etl.model.Identity) CountDownLatch(java.util.concurrent.CountDownLatch) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) Test(org.testng.annotations.Test) BaseDbTest(com.alibaba.otter.node.etl.BaseDbTest)

Aggregations

DbBatch (com.alibaba.otter.shared.etl.model.DbBatch)16 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)12 Test (org.testng.annotations.Test)9 EventData (com.alibaba.otter.shared.etl.model.EventData)7 Identity (com.alibaba.otter.shared.etl.model.Identity)7 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)6 BaseOtterTest (com.alibaba.otter.node.etl.BaseOtterTest)5 PipeKey (com.alibaba.otter.node.etl.common.pipe.PipeKey)5 BaseDbTest (com.alibaba.otter.node.etl.BaseDbTest)4 AggregationItem (com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem)4 EtlEventData (com.alibaba.otter.shared.arbitrate.model.EtlEventData)4 DataMediaPair (com.alibaba.otter.shared.common.model.config.data.DataMediaPair)4 FileBatch (com.alibaba.otter.shared.etl.model.FileBatch)4 MemoryPipeKey (com.alibaba.otter.node.etl.common.pipe.impl.memory.MemoryPipeKey)3 RpcPipeKey (com.alibaba.otter.node.etl.common.pipe.impl.rpc.RpcPipeKey)3 SetlFuture (com.alibaba.otter.node.etl.extract.SetlFuture)3 File (java.io.File)3 List (java.util.List)3 NodeCommmunicationClient (com.alibaba.otter.node.common.communication.NodeCommmunicationClient)2 PipeException (com.alibaba.otter.node.etl.common.pipe.exception.PipeException)2