Search in sources :

Example 6 with Pipeline

use of com.alibaba.otter.shared.common.model.config.pipeline.Pipeline in project otter by alibaba.

the class RowDataPipeDelegate method put.

/**
 * 将对应的数据传递到指定的Node id节点上
 */
public List<PipeKey> put(final DbBatch data, Long nid) throws PipeException {
    List<PipeKey> keys = new ArrayList<PipeKey>();
    if (isLocal(nid)) {
        keys.add(rowDataMemoryPipe.put(data));
    } else {
        Future<PipeKey> future = null;
        Pipeline pipeline = configClientService.findPipeline(data.getRowBatch().getIdentity().getPipelineId());
        if (data.getFileBatch() != null && !CollectionUtils.isEmpty(data.getFileBatch().getFiles())) {
            future = executorService.submit(new Callable<PipeKey>() {

                public PipeKey call() throws Exception {
                    try {
                        MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(data.getFileBatch().getIdentity().getPipelineId()));
                        return attachmentHttpPipe.put(data.getFileBatch());
                    } finally {
                        MDC.remove(OtterConstants.splitPipelineLogFileKey);
                    }
                }
            });
        }
        try {
            PipeChooseMode pipeChooseMode = pipeline.getParameters().getPipeChooseType();
            if (pipeChooseMode.isAutomatic()) {
                if (calculateSize(data) <= sizeThresold) {
                    keys.add(rowDataRpcPipe.put(data));
                } else {
                    keys.add(rowDataHttpPipe.put(data));
                }
            } else if (pipeChooseMode.isRpc()) {
                keys.add(rowDataRpcPipe.put(data));
            } else if (pipeChooseMode.isHttp()) {
                keys.add(rowDataHttpPipe.put(data));
            } else {
                throw new PipeException("pipeChooseMode is error!" + pipeChooseMode);
            }
            // 等待一下附件处理
            if (future != null) {
                keys.add(future.get());
            }
        } catch (Exception e) {
            throw new PipeException(e);
        }
    }
    return keys;
}
Also used : PipeChooseMode(com.alibaba.otter.shared.common.model.config.pipeline.PipelineParameter.PipeChooseMode) ArrayList(java.util.ArrayList) MemoryPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.memory.MemoryPipeKey) RpcPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.rpc.RpcPipeKey) HttpPipeKey(com.alibaba.otter.node.etl.common.pipe.impl.http.HttpPipeKey) PipeKey(com.alibaba.otter.node.etl.common.pipe.PipeKey) PipeException(com.alibaba.otter.node.etl.common.pipe.exception.PipeException) Callable(java.util.concurrent.Callable) PipeException(com.alibaba.otter.node.etl.common.pipe.exception.PipeException) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Example 7 with Pipeline

use of com.alibaba.otter.shared.common.model.config.pipeline.Pipeline in project otter by alibaba.

the class DatabaseExtractor method extract.

@Override
public void extract(DbBatch dbBatch) throws ExtractException {
    Assert.notNull(dbBatch);
    Assert.notNull(dbBatch.getRowBatch());
    // 读取配置
    Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
    boolean mustDb = pipeline.getParameters().getSyncConsistency().isMedia();
    // 如果是行记录是必须进行数据库反查
    boolean isRow = pipeline.getParameters().getSyncMode().isRow();
    // 读取一次配置
    // 调整下线程池,Extractor会被池化处理
    adjustPoolSize(pipeline.getParameters().getExtractPoolSize());
    ExecutorCompletionService completionService = new ExecutorCompletionService(executor);
    // 进行并发提交
    ExtractException exception = null;
    // 每个表进行处理
    List<DataItem> items = new ArrayList<DataItem>();
    List<Future> futures = new ArrayList<Future>();
    List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
    for (EventData eventData : eventDatas) {
        if (eventData.getEventType().isDdl()) {
            continue;
        }
        DataItem item = new DataItem(eventData);
        // 针对row模式,需要去检查一下当前是否已经包含row记录的所有字段,如果发现字段不足,则执行一次数据库查询
        boolean flag = mustDb || (eventData.getSyncConsistency() != null && eventData.getSyncConsistency().isMedia());
        // 增加一种case, 针对oracle erosa有时侯结果记录只有主键,没有变更字段,需要做一次反查
        if (!flag && CollectionUtils.isEmpty(eventData.getUpdatedColumns())) {
            DataMedia dataMedia = ConfigHelper.findDataMedia(pipeline, eventData.getTableId());
            if (dataMedia.getSource().getType().isOracle()) {
                flag |= true;
                // 针对这类数据,也统一视为补救的操作,可能erosa解析时反查数据库也不存在记录
                eventData.setRemedy(true);
            }
        }
        if (isRow && !flag) {
            // 提前判断一次,避免进入多线程进行竞争
            // 针对view视图的情况,会有后续再判断一次
            flag = checkNeedDbForRowMode(pipeline, eventData);
        }
        if (flag && (eventData.getEventType().isInsert() || eventData.getEventType().isUpdate())) {
            // 判断是否需要反查
            // 提交进行并行查询
            Future future = completionService.submit(new DatabaseExtractWorker(pipeline, item), null);
            if (future.isDone()) {
                // 立即判断一次,因为使用了CallerRun可能当场跑出结果,针对有异常时快速响应,而不是等跑完所有的才抛异常
                try {
                    future.get();
                } catch (InterruptedException e) {
                    // 取消完之后立马退出
                    cancel(futures);
                    throw new ExtractException(e);
                } catch (ExecutionException e) {
                    // 取消完之后立马退出
                    cancel(futures);
                    throw new ExtractException(e);
                }
            }
            // 记录一下添加的任务
            futures.add(future);
        }
        // 按顺序添加
        items.add(item);
    }
    // 开始处理结果
    int index = 0;
    while (index < futures.size()) {
        // 循环处理发出去的所有任务
        try {
            // 它也可能被打断
            Future future = completionService.take();
            future.get();
        } catch (InterruptedException e) {
            exception = new ExtractException(e);
            // 如何一个future出现了异常,就退出
            break;
        } catch (ExecutionException e) {
            exception = new ExtractException(e);
            // 如何一个future出现了异常,就退出
            break;
        }
        index++;
    }
    if (index < futures.size()) {
        // 小于代表有错误,需要对未完成的记录进行cancel操作,对已完成的结果进行收集,做重复录入过滤记录
        cancel(futures);
        throw exception;
    } else {
        // 全部成功分支, 构造返回结果也要保证原始的顺序
        for (int i = 0; i < items.size(); i++) {
            DataItem item = items.get(i);
            if (item.filter) {
                // 忽略需要被过滤的数据,比如数据库反查时记录已经不存在
                eventDatas.remove(item.getEventData());
            }
        }
    }
}
Also used : ExtractException(com.alibaba.otter.node.etl.extract.exceptions.ExtractException) ArrayList(java.util.ArrayList) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) Future(java.util.concurrent.Future) ExecutionException(java.util.concurrent.ExecutionException) DataMedia(com.alibaba.otter.shared.common.model.config.data.DataMedia)

Example 8 with Pipeline

use of com.alibaba.otter.shared.common.model.config.pipeline.Pipeline in project otter by alibaba.

the class FileExtractor method doFileExtract.

/**
 * 返回这批变更数据对应的FileInfo.
 *
 * @param rowBatch
 * @return
 */
private List<FileData> doFileExtract(RowBatch rowBatch) {
    List<FileData> fileDatas = new ArrayList<FileData>();
    // 处理数据
    Pipeline pipeline = getPipeline(rowBatch.getIdentity().getPipelineId());
    List<EventData> eventDatas = rowBatch.getDatas();
    for (EventData eventData : eventDatas) {
        if (eventData.getEventType().isDdl()) {
            continue;
        }
        List<DataMediaPair> dataMediaPairs = ConfigHelper.findDataMediaPairByMediaId(pipeline, eventData.getTableId());
        if (dataMediaPairs == null) {
            throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " dataMediaPair is null,please check");
        }
        for (DataMediaPair dataMediaPair : dataMediaPairs) {
            if (dataMediaPair.getResolverData() == null || dataMediaPair.getResolverData().getExtensionDataType() == null || (dataMediaPair.getResolverData().getExtensionDataType().isClazz() && StringUtils.isBlank(dataMediaPair.getResolverData().getClazzPath())) || (dataMediaPair.getResolverData().getExtensionDataType().isSource() && StringUtils.isBlank(dataMediaPair.getResolverData().getSourceText()))) {
                continue;
            }
            FileResolver fileResolver = null;
            if (dataMediaPair.getResolverData() != null) {
                fileResolver = extensionFactory.getExtension(FileResolver.class, dataMediaPair.getResolverData());
            } else {
                continue;
            }
            if (fileResolver == null) {
                throw new ExtractException("ERROR ## the dataMediaId = " + eventData.getTableId() + " the fileResolver className  = " + dataMediaPair.getResolverData().getClazzPath() + " is null ,please check the class");
            }
            if (fileResolver instanceof RemoteDirectoryFetcherAware) {
                RemoteDirectoryFetcherAware remoteDirectoryFetcherAware = (RemoteDirectoryFetcherAware) fileResolver;
                remoteDirectoryFetcherAware.setRemoteDirectoryFetcher(arandaRemoteDirectoryFetcher);
            }
            List<FileData> singleRowFileDatas = getSingleRowFileInfos(dataMediaPair.getId(), fileResolver, eventData);
            // 做一下去重处理
            for (FileData data : singleRowFileDatas) {
                if (!fileDatas.contains(data)) {
                    fileDatas.add(data);
                }
            }
        }
    }
    // 判断是否需要进行图片重复同步检查
    if (pipeline.getParameters().getFileDetect()) {
        doFileDetectCollector(pipeline, fileDatas);
    }
    return fileDatas;
}
Also used : ExtractException(com.alibaba.otter.node.etl.extract.exceptions.ExtractException) RemoteDirectoryFetcherAware(com.alibaba.otter.shared.etl.extend.fileresolver.support.RemoteDirectoryFetcherAware) DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) ArrayList(java.util.ArrayList) FileResolver(com.alibaba.otter.shared.etl.extend.fileresolver.FileResolver) FileData(com.alibaba.otter.shared.etl.model.FileData) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Example 9 with Pipeline

use of com.alibaba.otter.shared.common.model.config.pipeline.Pipeline in project otter by alibaba.

the class GroupExtractor method extract.

@Override
public void extract(DbBatch dbBatch) throws ExtractException {
    Assert.notNull(dbBatch);
    Assert.notNull(dbBatch.getRowBatch());
    Pipeline pipeline = getPipeline(dbBatch.getRowBatch().getIdentity().getPipelineId());
    List<DataMediaPair> dataMediaPairs = pipeline.getPairs();
    /**
     * Key = TableId<br>
     * Value = a List of this tableId's column need to sync<br>
     */
    Map<Long, List<ColumnGroup>> groupColumns = new HashMap<Long, List<ColumnGroup>>();
    for (DataMediaPair dataMediaPair : dataMediaPairs) {
        List<ColumnGroup> columnGroups = dataMediaPair.getColumnGroups();
        if (!CollectionUtils.isEmpty(columnGroups)) {
            groupColumns.put(dataMediaPair.getSource().getId(), columnGroups);
        }
    }
    List<EventData> eventDatas = dbBatch.getRowBatch().getDatas();
    for (EventData eventData : eventDatas) {
        if (eventData.getEventType().isDdl()) {
            continue;
        }
        List<ColumnGroup> columnGroups = groupColumns.get(eventData.getTableId());
        if (!CollectionUtils.isEmpty(columnGroups)) {
            for (ColumnGroup columnGroup : columnGroups) {
                if (columnGroup != null && !CollectionUtils.isEmpty(columnGroup.getColumnPairs())) {
                    groupFilter(eventData, columnGroup);
                }
            }
        }
    }
}
Also used : DataMediaPair(com.alibaba.otter.shared.common.model.config.data.DataMediaPair) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) ColumnGroup(com.alibaba.otter.shared.common.model.config.data.ColumnGroup) EventData(com.alibaba.otter.shared.etl.model.EventData) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Example 10 with Pipeline

use of com.alibaba.otter.shared.common.model.config.pipeline.Pipeline in project otter by alibaba.

the class RemoteUrlBuilder method getUrl.

public String getUrl(Long pipelineId, String filePath) {
    Node node = configClientService.currentNode();
    Pipeline pipeline = configClientService.findPipeline(pipelineId);
    String ip = node.getIp();
    if (node.getParameters().getUseExternalIp() || pipeline.getParameters().getUseExternalIp()) {
        ip = node.getParameters().getExternalIp();
        if (StringUtils.isEmpty(ip)) {
            throw new ArchiveException(String.format("pipelineId:%s useExternalIp by nid[%s] has no external ip", String.valueOf(pipelineId), String.valueOf(node.getId())));
        }
    }
    // 注意为其下载端口
    Integer port = node.getParameters().getDownloadPort();
    if (port == null || port < 0) {
        port = defaultDownloadPort;
    }
    return MessageFormat.format(urlFormat, ip, String.valueOf(port), filePath);
}
Also used : Node(com.alibaba.otter.shared.common.model.config.node.Node) ArchiveException(com.alibaba.otter.node.etl.common.pipe.impl.http.archive.ArchiveException) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)

Aggregations

Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)105 Channel (com.alibaba.otter.shared.common.model.config.channel.Channel)38 ArrayList (java.util.ArrayList)37 Node (com.alibaba.otter.shared.common.model.config.node.Node)22 Test (org.testng.annotations.Test)20 DataMediaPair (com.alibaba.otter.shared.common.model.config.data.DataMediaPair)19 EventData (com.alibaba.otter.shared.etl.model.EventData)19 Mock (mockit.Mock)19 ManagerException (com.alibaba.otter.manager.biz.common.exceptions.ManagerException)17 RepeatConfigureException (com.alibaba.otter.manager.biz.common.exceptions.RepeatConfigureException)17 Identity (com.alibaba.otter.shared.etl.model.Identity)12 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)12 BaseDbTest (com.alibaba.otter.node.etl.BaseDbTest)10 ChannelArbitrateEvent (com.alibaba.otter.shared.arbitrate.impl.manage.ChannelArbitrateEvent)10 PipelineArbitrateEvent (com.alibaba.otter.shared.arbitrate.impl.manage.PipelineArbitrateEvent)9 PipelineParameter (com.alibaba.otter.shared.common.model.config.pipeline.PipelineParameter)9 FileBatch (com.alibaba.otter.shared.etl.model.FileBatch)9 FileData (com.alibaba.otter.shared.etl.model.FileData)9 HashMap (java.util.HashMap)9 BeforeClass (org.testng.annotations.BeforeClass)9