Search in sources :

Example 16 with EtlEventData

use of com.alibaba.otter.shared.arbitrate.model.EtlEventData in project otter by alibaba.

the class SelectTask method processSelect.

private void processSelect() {
    while (running) {
        try {
            // 等待ProcessTermin exhaust,会阻塞
            // ProcessTermin发现出现rollback,会立即通知暂停,比分布式permit及时性高
            canStartSelector.get();
            // 判断当前是否为工作节点,S模块不能出现双节点工作,selector容易出现数据错乱
            if (needCheck) {
                checkContinueWork();
            }
            // 出现阻塞挂起时,等待mananger处理完成,解挂开启同步
            // 出现rollback后能及时停住
            arbitrateEventService.toolEvent().waitForPermit(pipelineId);
            // 使用startVersion要解决的一个问题:出现rollback时,尽可能判断取出来的数据是rollback前还是rollback后,想办法丢弃rollback前的数据。
            // (因为出现rollback,之前取出去的几个批次的数据其实是没有执行成功,get取出来的数据会是其后一批数据,如果不丢弃的话,会出现后面的数据先执行,然后又回到出错的点,再执行一遍)
            // int startVersion = rversion.get();
            Message gotMessage = otterSelector.selector();
            // modify by ljh at 2012-09-10,startVersion获取操作应该放在拿到数据之后
            // 放在前面 : (遇到一个并发bug)
            // // a.
            // 先拿startVersion,再获取数据,在拿数据过程中rollback开始并完成了,导致selector返回时数据已经取到了末尾
            // // b. 在进行version判断时发现已经有变化,导致又触发一次拿数据的过程,此时的get
            // cursor已经到队列的末尾,拿不出任何数据,所以出现死等情况
            // 放在后面 : (一点点瑕疵)
            // // a.
            // 并发操作rollback和selector时,针对拿到rollback前的老数据,此时startVersion还未初始化,导致判断不出出现过rollback操作,后面的变更数据会提前同步
            // (概率性会比较高,取决于selector和初始化startVersion的时间间隔)
            int startVersion = rversion.get();
            if (canStartSelector.state() == false) {
                // 是否出现异常
                // 回滚在出现异常的瞬间,拿出来的数据,因为otterSelector.selector()会循环,可能出现了rollback,其还未感知到
                rollback(gotMessage.getId());
                continue;
            }
            if (CollectionUtils.isEmpty(gotMessage.getDatas())) {
                // 处理下空数据,也得更新下游标,可能是回环数据被过滤掉
                // 添加到待响应的buffer列表,不需要await termin信号,因为没启动过s/e/t/l流程
                batchBuffer.put(new BatchTermin(gotMessage.getId(), false));
                continue;
            }
            final EtlEventData etlEventData = arbitrateEventService.selectEvent().await(pipelineId);
            if (rversion.get() != startVersion) {
                // 说明存在过变化,中间出现过rollback,需要丢弃该数据
                logger.warn("rollback happend , should skip this data and get new message.");
                // 确认一下rollback是否完成
                canStartSelector.get();
                // 这时不管有没有数据,都需要执行一次s/e/t/l
                gotMessage = otterSelector.selector();
            }
            final Message message = gotMessage;
            final BatchTermin batchTermin = new BatchTermin(message.getId(), etlEventData.getProcessId());
            // 添加到待响应的buffer列表
            batchBuffer.put(batchTermin);
            Runnable task = new Runnable() {

                public void run() {
                    // 设置profiling信息
                    boolean profiling = isProfiling();
                    Long profilingStartTime = null;
                    if (profiling) {
                        profilingStartTime = System.currentTimeMillis();
                    }
                    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
                    String currentName = Thread.currentThread().getName();
                    Thread.currentThread().setName(createTaskName(pipelineId, "SelectWorker"));
                    try {
                        pipeline = configClientService.findPipeline(pipelineId);
                        List<EventData> eventData = message.getDatas();
                        long startTime = etlEventData.getStartTime();
                        if (!CollectionUtils.isEmpty(eventData)) {
                            startTime = eventData.get(0).getExecuteTime();
                        }
                        Channel channel = configClientService.findChannelByPipelineId(pipelineId);
                        RowBatch rowBatch = new RowBatch();
                        // 构造唯一标识
                        Identity identity = new Identity();
                        identity.setChannelId(channel.getId());
                        identity.setPipelineId(pipelineId);
                        identity.setProcessId(etlEventData.getProcessId());
                        rowBatch.setIdentity(identity);
                        // 进行数据合并
                        for (EventData data : eventData) {
                            rowBatch.merge(data);
                        }
                        long nextNodeId = etlEventData.getNextNid();
                        List<PipeKey> pipeKeys = rowDataPipeDelegate.put(new DbBatch(rowBatch), nextNodeId);
                        etlEventData.setDesc(pipeKeys);
                        etlEventData.setNumber((long) eventData.size());
                        // 使用原始数据的第一条
                        etlEventData.setFirstTime(startTime);
                        etlEventData.setBatchId(message.getId());
                        if (profiling) {
                            Long profilingEndTime = System.currentTimeMillis();
                            stageAggregationCollector.push(pipelineId, StageType.SELECT, new AggregationItem(profilingStartTime, profilingEndTime));
                        }
                        arbitrateEventService.selectEvent().single(etlEventData);
                    } catch (Throwable e) {
                        if (!isInterrupt(e)) {
                            logger.error(String.format("[%s] selectwork executor is error! data:%s", pipelineId, etlEventData), e);
                            sendRollbackTermin(pipelineId, e);
                        } else {
                            logger.info(String.format("[%s] selectwork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
                        }
                    } finally {
                        Thread.currentThread().setName(currentName);
                        MDC.remove(OtterConstants.splitPipelineLogFileKey);
                    }
                }
            };
            // 构造pending任务,可在关闭线程时退出任务
            SetlFuture extractFuture = new SetlFuture(StageType.SELECT, etlEventData.getProcessId(), pendingFuture, task);
            executorService.execute(extractFuture);
        } catch (Throwable e) {
            if (!isInterrupt(e)) {
                logger.error(String.format("[%s] selectTask is error!", pipelineId), e);
                sendRollbackTermin(pipelineId, e);
            } else {
                logger.info(String.format("[%s] selectTask is interrrupt!", pipelineId), e);
                return;
            }
        }
    }
}
Also used : Message(com.alibaba.otter.node.etl.select.selector.Message) Channel(com.alibaba.otter.shared.common.model.config.channel.Channel) PipeKey(com.alibaba.otter.node.etl.common.pipe.PipeKey) TerminEventData(com.alibaba.otter.shared.arbitrate.model.TerminEventData) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData) EventData(com.alibaba.otter.shared.etl.model.EventData) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) AggregationItem(com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem) Identity(com.alibaba.otter.shared.etl.model.Identity) SetlFuture(com.alibaba.otter.node.etl.extract.SetlFuture)

Example 17 with EtlEventData

use of com.alibaba.otter.shared.arbitrate.model.EtlEventData in project otter by alibaba.

the class SelectRpcArbitrateEvent method await.

public EtlEventData await(Long pipelineId) throws InterruptedException {
    Assert.notNull(pipelineId);
    PermitMonitor permitMonitor = ArbitrateFactory.getInstance(pipelineId, PermitMonitor.class);
    // 阻塞等待授权
    permitMonitor.waitForPermit();
    SelectProcessListener selectProcessListener = ArbitrateFactory.getInstance(pipelineId, SelectProcessListener.class);
    // 符合条件的processId
    Long processId = selectProcessListener.waitForProcess();
    ChannelStatus status = permitMonitor.getChannelPermit();
    if (status.isStart()) {
        // 即时查询一下当前的状态,状态随时可能会变
        try {
            EtlEventData eventData = new EtlEventData();
            eventData.setPipelineId(pipelineId);
            eventData.setProcessId(processId);
            // 返回当前时间
            eventData.setStartTime(new Date().getTime());
            // 获取下一个处理节点信息
            Node node = LoadBalanceFactory.getNextExtractNode(pipelineId);
            if (node == null) {
                // terminEvent.single(termin);
                throw new ArbitrateException("Select_single", "no next node");
            } else {
                eventData.setNextNid(node.getId());
                // 标记为已使用
                markUsed(eventData);
                // 只有这一条路返回
                return eventData;
            }
        } catch (ZkNoNodeException e) {
            logger.error("pipeline[{}] processId[{}] is invalid , retry again", pipelineId, processId);
            // /出现节点不存在,说明出现了error情况,递归调用重新获取一次
            return await(pipelineId);
        } catch (ZkException e) {
            throw new ArbitrateException("Select_await", e.getMessage(), e);
        }
    } else {
        logger.warn("pipelineId[{}] select ignore processId[{}] by status[{}]", new Object[] { pipelineId, processId, status });
        // add by ljh 2013-02-01
        // 遇到一个bug:
        // a. 某台机器发起了一个RESTART指令,然后开始删除process列表
        // b. 此时另一个台机器(select工作节点),并没有收到PAUSE的推送,导致还会再创建一个process节点
        // c. 后续收到PAUSE指令后,丢弃了processId,就出现了unused的processId
        // 这里删除了,要考虑一个问题,就是和restart指令在并行删除同一个processId时的并发考虑,目前来看没问题
        String path = StagePathUtils.getProcess(pipelineId, processId);
        // 忽略删除失败
        zookeeper.delete(path);
        // 递归调用
        return await(pipelineId);
    }
}
Also used : ZkNoNodeException(org.I0Itec.zkclient.exception.ZkNoNodeException) ZkException(org.I0Itec.zkclient.exception.ZkException) PermitMonitor(com.alibaba.otter.shared.arbitrate.impl.setl.monitor.PermitMonitor) Node(com.alibaba.otter.shared.common.model.config.node.Node) SelectProcessListener(com.alibaba.otter.shared.arbitrate.impl.setl.rpc.monitor.SelectProcessListener) ArbitrateException(com.alibaba.otter.shared.arbitrate.exception.ArbitrateException) ChannelStatus(com.alibaba.otter.shared.common.model.config.channel.ChannelStatus) Date(java.util.Date) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData)

Example 18 with EtlEventData

use of com.alibaba.otter.shared.arbitrate.model.EtlEventData in project otter by alibaba.

the class TransformRpcArbitrateEvent method await.

public EtlEventData await(Long pipelineId) throws InterruptedException {
    Assert.notNull(pipelineId);
    PermitMonitor permitMonitor = ArbitrateFactory.getInstance(pipelineId, PermitMonitor.class);
    // 阻塞等待授权
    permitMonitor.waitForPermit();
    RpcStageController stageController = ArbitrateFactory.getInstance(pipelineId, RpcStageController.class);
    // 符合条件的processId
    Long processId = stageController.waitForProcess(StageType.TRANSFORM);
    ChannelStatus status = permitMonitor.getChannelPermit();
    if (status.isStart() || status.isPause()) {
        // pause状态也让其处理,避免误删除pause状态的processId,导致通道挂起
        EtlEventData eventData = stageController.getLastData(processId);
        // 下一个节点信息即为自己
        eventData.setNextNid(ArbitrateConfigUtils.getCurrentNid());
        return eventData;
    } else {
        logger.warn("pipelineId[{}] transform ignore processId[{}] by status[{}]", new Object[] { pipelineId, processId, status });
        // 递归调用
        return await(pipelineId);
    }
}
Also used : PermitMonitor(com.alibaba.otter.shared.arbitrate.impl.setl.monitor.PermitMonitor) ChannelStatus(com.alibaba.otter.shared.common.model.config.channel.ChannelStatus) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData)

Example 19 with EtlEventData

use of com.alibaba.otter.shared.arbitrate.model.EtlEventData in project otter by alibaba.

the class ExtractZooKeeperArbitrateEvent method await.

// private TerminArbitrateEvent terminEvent;
/**
     * <pre>
     * 算法:
     * 1. 检查当前的Permit,阻塞等待其授权(解决Channel的pause状态处理)
     * 2. 开始阻塞获取符合条件的processId
     * 3. 检查当前的即时Permit状态 (在阻塞获取processId过程会出现一些error信号,process节点会被删除)
     * 4. 获取Select传递的EventData数据,添加next node信息后直接返回
     * </pre>
     * 
     * @return
     */
public EtlEventData await(Long pipelineId) throws InterruptedException {
    Assert.notNull(pipelineId);
    PermitMonitor permitMonitor = ArbitrateFactory.getInstance(pipelineId, PermitMonitor.class);
    // 阻塞等待授权
    permitMonitor.waitForPermit();
    ExtractStageListener extractStageListener = ArbitrateFactory.getInstance(pipelineId, ExtractStageListener.class);
    // 符合条件的processId
    Long processId = extractStageListener.waitForProcess();
    ChannelStatus status = permitMonitor.getChannelPermit();
    if (status.isStart()) {
        // 即时查询一下当前的状态,状态随时可能会变
        // 根据pipelineId+processId构造对应的path
        String path = StagePathUtils.getSelectStage(pipelineId, processId);
        try {
            byte[] data = zookeeper.readData(path);
            EtlEventData eventData = JsonUtils.unmarshalFromByte(data, EtlEventData.class);
            // 获取下一个处理节点信息
            Node node = LoadBalanceFactory.getNextTransformNode(pipelineId);
            if (node == null) {
                // terminEvent.single(termin);
                throw new ArbitrateException("Extract_single", "no next node");
            } else {
                eventData.setNextNid(node.getId());
                // 只有这一条路返回
                return eventData;
            }
        } catch (ZkNoNodeException e) {
            logger.error("pipeline[{}] processId[{}] is invalid , retry again", pipelineId, processId);
            // /出现节点不存在,说明出现了error情况,递归调用重新获取一次
            return await(pipelineId);
        } catch (ZkException e) {
            throw new ArbitrateException("Extract_await", e.getMessage(), e);
        }
    } else {
        logger.warn("pipelineId[{}] extract ignore processId[{}] by status[{}]", new Object[] { pipelineId, processId, status });
        // 递归调用
        return await(pipelineId);
    }
}
Also used : ZkNoNodeException(org.I0Itec.zkclient.exception.ZkNoNodeException) ZkException(org.I0Itec.zkclient.exception.ZkException) PermitMonitor(com.alibaba.otter.shared.arbitrate.impl.setl.monitor.PermitMonitor) Node(com.alibaba.otter.shared.common.model.config.node.Node) ArbitrateException(com.alibaba.otter.shared.arbitrate.exception.ArbitrateException) ChannelStatus(com.alibaba.otter.shared.common.model.config.channel.ChannelStatus) ExtractStageListener(com.alibaba.otter.shared.arbitrate.impl.setl.zookeeper.monitor.ExtractStageListener) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData)

Example 20 with EtlEventData

use of com.alibaba.otter.shared.arbitrate.model.EtlEventData in project otter by alibaba.

the class MemoryStageController method termin.

/**
     * 处理异常termin结束
     */
public synchronized void termin(TerminType type) {
    // 构建termin信号
    List<Long> processIds = new ArrayList<Long>(progress.keySet());
    // 做一下排序
    Collections.sort(processIds);
    for (Long processId : processIds) {
        EtlEventData eventData = progress.get(processId).getData();
        TerminEventData data = new TerminEventData();
        data.setPipelineId(getPipelineId());
        data.setType(type);
        data.setCode("channel");
        data.setDesc(type.toString());
        data.setProcessId(processId);
        if (eventData != null) {
            data.setBatchId(eventData.getBatchId());
            data.setCurrNid(eventData.getCurrNid());
            data.setStartTime(eventData.getStartTime());
            data.setEndTime(eventData.getEndTime());
            data.setFirstTime(eventData.getFirstTime());
            data.setNumber(eventData.getNumber());
            data.setSize(eventData.getSize());
            data.setExts(eventData.getExts());
        }
        offerTermin(data);
        progress.remove(processId);
    }
    // 重新初始化一下select调度
    initSelect();
}
Also used : TerminEventData(com.alibaba.otter.shared.arbitrate.model.TerminEventData) ArrayList(java.util.ArrayList) AtomicLong(java.util.concurrent.atomic.AtomicLong) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData)

Aggregations

EtlEventData (com.alibaba.otter.shared.arbitrate.model.EtlEventData)25 ArrayList (java.util.ArrayList)10 Mock (mockit.Mock)8 PermitMonitor (com.alibaba.otter.shared.arbitrate.impl.setl.monitor.PermitMonitor)7 TerminEventData (com.alibaba.otter.shared.arbitrate.model.TerminEventData)7 ChannelStatus (com.alibaba.otter.shared.common.model.config.channel.ChannelStatus)7 ZkException (org.I0Itec.zkclient.exception.ZkException)7 ZkNoNodeException (org.I0Itec.zkclient.exception.ZkNoNodeException)7 ArbitrateException (com.alibaba.otter.shared.arbitrate.exception.ArbitrateException)6 BaseArbitrateEventTest (com.alibaba.otter.shared.arbitrate.setl.event.BaseArbitrateEventTest)6 Test (org.testng.annotations.Test)6 SelectZooKeeperArbitrateEvent (com.alibaba.otter.shared.arbitrate.impl.setl.zookeeper.SelectZooKeeperArbitrateEvent)5 AggregationItem (com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem)4 PipeKey (com.alibaba.otter.node.etl.common.pipe.PipeKey)4 ExtractZooKeeperArbitrateEvent (com.alibaba.otter.shared.arbitrate.impl.setl.zookeeper.ExtractZooKeeperArbitrateEvent)4 Node (com.alibaba.otter.shared.common.model.config.node.Node)4 DbBatch (com.alibaba.otter.shared.etl.model.DbBatch)4 SetlFuture (com.alibaba.otter.node.etl.extract.SetlFuture)3 TransformZooKeeperArbitrateEvent (com.alibaba.otter.shared.arbitrate.impl.setl.zookeeper.TransformZooKeeperArbitrateEvent)3 Date (java.util.Date)3