Search in sources :

Example 6 with FileBatch

use of com.alibaba.otter.shared.etl.model.FileBatch in project otter by alibaba.

the class RowDataHttpPipe method saveDbBatch.

// ======================== help method ===================
// 保存对应的dbBatch
private HttpPipeKey saveDbBatch(DbBatch dbBatch) {
    RowBatch rowBatch = dbBatch.getRowBatch();
    // 转化为proto对象
    BatchProto.RowBatch.Builder rowBatchBuilder = BatchProto.RowBatch.newBuilder();
    rowBatchBuilder.setIdentity(build(rowBatch.getIdentity()));
    // 处理具体的字段rowData
    for (EventData eventData : rowBatch.getDatas()) {
        BatchProto.RowData.Builder rowDataBuilder = BatchProto.RowData.newBuilder();
        rowDataBuilder.setPairId(eventData.getPairId());
        rowDataBuilder.setTableId(eventData.getTableId());
        if (eventData.getSchemaName() != null) {
            rowDataBuilder.setSchemaName(eventData.getSchemaName());
        }
        rowDataBuilder.setTableName(eventData.getTableName());
        rowDataBuilder.setEventType(eventData.getEventType().getValue());
        rowDataBuilder.setExecuteTime(eventData.getExecuteTime());
        // add by ljh at 2012-10-31
        if (eventData.getSyncMode() != null) {
            rowDataBuilder.setSyncMode(eventData.getSyncMode().getValue());
        }
        if (eventData.getSyncConsistency() != null) {
            rowDataBuilder.setSyncConsistency(eventData.getSyncConsistency().getValue());
        }
        // 构造key column
        for (EventColumn keyColumn : eventData.getKeys()) {
            rowDataBuilder.addKeys(buildColumn(keyColumn));
        }
        // 构造old key column
        if (CollectionUtils.isEmpty(eventData.getOldKeys()) == false) {
            for (EventColumn keyColumn : eventData.getOldKeys()) {
                rowDataBuilder.addOldKeys(buildColumn(keyColumn));
            }
        }
        // 构造其他 column
        for (EventColumn column : eventData.getColumns()) {
            rowDataBuilder.addColumns(buildColumn(column));
        }
        rowDataBuilder.setRemedy(eventData.isRemedy());
        rowDataBuilder.setSize(eventData.getSize());
        if (StringUtils.isNotEmpty(eventData.getSql())) {
            rowDataBuilder.setSql(eventData.getSql());
        }
        if (StringUtils.isNotEmpty(eventData.getDdlSchemaName())) {
            rowDataBuilder.setDdlSchemaName(eventData.getDdlSchemaName());
        }
        if (StringUtils.isNotEmpty(eventData.getHint())) {
            rowDataBuilder.setHint(eventData.getHint());
        }
        rowDataBuilder.setWithoutSchema(eventData.isWithoutSchema());
        // 添加一条rowData记录
        rowBatchBuilder.addRows(rowDataBuilder.build());
    }
    // 处理下FileBatch
    FileBatch fileBatch = dbBatch.getFileBatch();
    BatchProto.FileBatch.Builder fileBatchBuilder = null;
    fileBatchBuilder = BatchProto.FileBatch.newBuilder();
    fileBatchBuilder.setIdentity(build(fileBatch.getIdentity()));
    // 构造对应的proto对象
    for (FileData fileData : fileBatch.getFiles()) {
        BatchProto.FileData.Builder fileDataBuilder = BatchProto.FileData.newBuilder();
        fileDataBuilder.setPairId(fileData.getPairId());
        fileDataBuilder.setTableId(fileData.getTableId());
        if (fileData.getNameSpace() != null) {
            fileDataBuilder.setNamespace(fileData.getNameSpace());
        }
        if (fileData.getPath() != null) {
            fileDataBuilder.setPath(fileData.getPath());
        }
        fileDataBuilder.setEventType(fileData.getEventType().getValue());
        fileDataBuilder.setSize(fileData.getSize());
        fileDataBuilder.setLastModifiedTime(fileData.getLastModifiedTime());
        // 添加一条fileData记录
        fileBatchBuilder.addFiles(fileDataBuilder.build());
    }
    // 处理构造对应的文件url
    String filename = buildFileName(rowBatch.getIdentity(), ClassUtils.getShortClassName(dbBatch.getClass()));
    // 写入数据
    File file = new File(htdocsDir, filename);
    OutputStream output = null;
    try {
        output = new BufferedOutputStream(new FileOutputStream(file));
        com.alibaba.otter.node.etl.model.protobuf.BatchProto.RowBatch rowBatchProto = rowBatchBuilder.build();
        // 输出大小
        output.write(ByteUtils.int2bytes(rowBatchProto.getSerializedSize()));
        // 输出row batch
        rowBatchProto.writeTo(output);
        com.alibaba.otter.node.etl.model.protobuf.BatchProto.FileBatch fileBatchProto = fileBatchBuilder.build();
        // 输出大小
        output.write(ByteUtils.int2bytes(fileBatchProto.getSerializedSize()));
        // 输出file batch
        fileBatchProto.writeTo(output);
        output.flush();
    } catch (IOException e) {
        throw new PipeException("write_byte_error", e);
    } finally {
        IOUtils.closeQuietly(output);
    }
    HttpPipeKey key = new HttpPipeKey();
    key.setUrl(remoteUrlBuilder.getUrl(rowBatch.getIdentity().getPipelineId(), filename));
    key.setDataType(PipeDataType.DB_BATCH);
    key.setIdentity(rowBatch.getIdentity());
    Pipeline pipeline = configClientService.findPipeline(rowBatch.getIdentity().getPipelineId());
    if (pipeline.getParameters().getUseFileEncrypt()) {
        // 加密处理
        EncryptedData encryptedData = encryptFile(file);
        key.setKey(encryptedData.getKey());
        key.setCrc(encryptedData.getCrc());
    }
    return key;
}
Also used : EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) EventData(com.alibaba.otter.shared.etl.model.EventData) EncryptedData(com.alibaba.otter.node.etl.common.io.EncryptedData) FileData(com.alibaba.otter.shared.etl.model.FileData) BufferedOutputStream(java.io.BufferedOutputStream) FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) IOException(java.io.IOException) BatchProto(com.alibaba.otter.node.etl.model.protobuf.BatchProto) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) FileOutputStream(java.io.FileOutputStream) PipeException(com.alibaba.otter.node.etl.common.pipe.exception.PipeException) File(java.io.File)

Example 7 with FileBatch

use of com.alibaba.otter.shared.etl.model.FileBatch in project otter by alibaba.

the class FileBatchConflictDetectServiceImpl method onFileConflictDetect.

/**
     * 具体冲突检测的行为
     */
private FileBatch onFileConflictDetect(FileConflictDetectEvent event) {
    final FileBatch fileBatch = event.getFileBatch();
    if (CollectionUtils.isEmpty(fileBatch.getFiles())) {
        return fileBatch;
    }
    ExecutorTemplate executorTemplate = executorTemplateGetter.get();
    try {
        MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
        executorTemplate.start();
        // 重新设置下poolSize
        Pipeline pipeline = configClientService.findPipeline(fileBatch.getIdentity().getPipelineId());
        executorTemplate.adjustPoolSize(pipeline.getParameters().getFileLoadPoolSize());
        // 启动
        final List<FileData> result = Collections.synchronizedList(new ArrayList<FileData>());
        final List<FileData> filter = Collections.synchronizedList(new ArrayList<FileData>());
        for (final FileData source : fileBatch.getFiles()) {
            EventType type = source.getEventType();
            if (type.isDelete()) {
                result.add(source);
            } else {
                executorTemplate.submit(new Runnable() {

                    public void run() {
                        MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId()));
                        // 处理更新类型
                        String namespace = source.getNameSpace();
                        String path = source.getPath();
                        FileData target = null;
                        int count = 0;
                        while (count++ < retry) {
                            // 进行重试处理
                            try {
                                if (true == StringUtils.isBlank(namespace)) {
                                    // local file
                                    java.io.File targetFile = new java.io.File(path);
                                    if (true == targetFile.exists()) {
                                        // modified time cost
                                        long lastModified = targetFile.lastModified();
                                        long size = targetFile.length();
                                        // 更新数据
                                        target = new FileData();
                                        target.setLastModifiedTime(lastModified);
                                        target.setSize(size);
                                    }
                                } else {
                                    // remote file
                                    throw new RuntimeException(source + " is not support!");
                                }
                                // 不出异常就跳出
                                break;
                            } catch (Exception ex) {
                                target = null;
                            }
                        }
                        boolean shouldSync = false;
                        if (target != null) {
                            if (true == accept(target, source)) {
                                shouldSync = true;
                            }
                        } else {
                            shouldSync = true;
                        }
                        if (true == shouldSync) {
                            result.add(source);
                        } else {
                            filter.add(source);
                        }
                    }
                });
            }
        }
        // 等待所有都处理完成
        executorTemplate.waitForResult();
        if (pipeline.getParameters().getDumpEvent() && logger.isInfoEnabled()) {
            logger.info(FileloadDumper.dumpFilterFileDatas(fileBatch.getIdentity(), fileBatch.getFiles().size(), result.size(), filter));
        }
        // 构造返回结果
        FileBatch target = new FileBatch();
        target.setIdentity(fileBatch.getIdentity());
        target.setFiles(result);
        return target;
    } finally {
        if (executorTemplate != null) {
            executorTemplateGetter.release(executorTemplate);
        }
        MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
    }
}
Also used : FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) ExecutorTemplate(com.alibaba.otter.shared.common.utils.thread.ExecutorTemplate) EventType(com.alibaba.otter.shared.etl.model.EventType) ConflictEventType(com.alibaba.otter.node.etl.conflict.model.ConflictEventType) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) FileData(com.alibaba.otter.shared.etl.model.FileData)

Example 8 with FileBatch

use of com.alibaba.otter.shared.etl.model.FileBatch in project otter by alibaba.

the class ExtractTask method run.

public void run() {
    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
    while (running) {
        try {
            final EtlEventData etlEventData = arbitrateEventService.extractEvent().await(pipelineId);
            Runnable task = new Runnable() {

                public void run() {
                    // 设置profiling信息
                    boolean profiling = isProfiling();
                    Long profilingStartTime = null;
                    if (profiling) {
                        profilingStartTime = System.currentTimeMillis();
                    }
                    MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(pipelineId));
                    String currentName = Thread.currentThread().getName();
                    Thread.currentThread().setName(createTaskName(pipelineId, "ExtractWorker"));
                    try {
                        pipeline = configClientService.findPipeline(pipelineId);
                        List<PipeKey> keys = (List<PipeKey>) etlEventData.getDesc();
                        long nextNodeId = etlEventData.getNextNid();
                        DbBatch dbBatch = rowDataPipeDelegate.get(keys);
                        // 可能拿到为null,因为内存不足或者网络异常,长时间阻塞时,导致从pipe拿数据出现异常,数据可能被上一个节点已经删除
                        if (dbBatch == null) {
                            processMissData(pipelineId, "extract miss data with keys:" + keys.toString());
                            return;
                        }
                        // 重新装配一下数据
                        otterExtractorFactory.extract(dbBatch);
                        if (dbBatch.getFileBatch() != null && !CollectionUtils.isEmpty(dbBatch.getFileBatch().getFiles()) && pipeline.getParameters().getFileDetect()) {
                            // 判断一下是否有文件同步,并且需要进行文件对比
                            // 对比一下中美图片是否有变化
                            FileBatch fileBatch = fileBatchConflictDetectService.detect(dbBatch.getFileBatch(), nextNodeId);
                            dbBatch.setFileBatch(fileBatch);
                        }
                        List<PipeKey> pipeKeys = rowDataPipeDelegate.put(dbBatch, nextNodeId);
                        etlEventData.setDesc(pipeKeys);
                        if (profiling) {
                            Long profilingEndTime = System.currentTimeMillis();
                            stageAggregationCollector.push(pipelineId, StageType.EXTRACT, new AggregationItem(profilingStartTime, profilingEndTime));
                        }
                        arbitrateEventService.extractEvent().single(etlEventData);
                    } catch (Throwable e) {
                        if (!isInterrupt(e)) {
                            logger.error(String.format("[%d] extractwork executor is error! data:%s", pipelineId, etlEventData), e);
                            sendRollbackTermin(pipelineId, e);
                        } else {
                            logger.info(String.format("[%d] extractwork executor is interrrupt! data:%s", pipelineId, etlEventData), e);
                        }
                    } finally {
                        Thread.currentThread().setName(currentName);
                        MDC.remove(OtterConstants.splitPipelineLogFileKey);
                    }
                }
            };
            // 构造pending任务,可在关闭线程时退出任务
            SetlFuture extractFuture = new SetlFuture(StageType.EXTRACT, etlEventData.getProcessId(), pendingFuture, task);
            executorService.execute(extractFuture);
        } catch (Throwable e) {
            if (isInterrupt(e)) {
                logger.info(String.format("[%s] extractTask is interrupted!", pipelineId), e);
                return;
            } else {
                logger.error(String.format("[%s] extractTask is error!", pipelineId), e);
                sendRollbackTermin(pipelineId, e);
            }
        }
    }
}
Also used : FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) PipeKey(com.alibaba.otter.node.etl.common.pipe.PipeKey) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) EtlEventData(com.alibaba.otter.shared.arbitrate.model.EtlEventData) AggregationItem(com.alibaba.otter.node.etl.common.jmx.StageAggregation.AggregationItem) List(java.util.List)

Example 9 with FileBatch

use of com.alibaba.otter.shared.etl.model.FileBatch in project otter by alibaba.

the class FileExtractor method extract.

public void extract(DbBatch dbBatch) throws ExtractException {
    List<FileData> fileDatas = doFileExtract(dbBatch.getRowBatch());
    FileBatch fileBatch = new FileBatch();
    fileBatch.setFiles(fileDatas);
    Identity identity = new Identity();
    identity.setChannelId(dbBatch.getRowBatch().getIdentity().getChannelId());
    identity.setPipelineId(dbBatch.getRowBatch().getIdentity().getPipelineId());
    identity.setProcessId(dbBatch.getRowBatch().getIdentity().getProcessId());
    fileBatch.setIdentity(identity);
    dbBatch.setFileBatch(fileBatch);
}
Also used : FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) Identity(com.alibaba.otter.shared.etl.model.Identity) FileData(com.alibaba.otter.shared.etl.model.FileData)

Example 10 with FileBatch

use of com.alibaba.otter.shared.etl.model.FileBatch in project otter by alibaba.

the class RowDataHttpPipe method getDbBatch.

// 处理对应的dbBatch
private DbBatch getDbBatch(HttpPipeKey key) {
    String dataUrl = key.getUrl();
    Pipeline pipeline = configClientService.findPipeline(key.getIdentity().getPipelineId());
    DataRetriever dataRetriever = dataRetrieverFactory.createRetriever(pipeline.getParameters().getRetriever(), dataUrl, downloadDir);
    File archiveFile = null;
    try {
        dataRetriever.connect();
        dataRetriever.doRetrieve();
        archiveFile = dataRetriever.getDataAsFile();
    } catch (Exception e) {
        dataRetriever.abort();
        throw new PipeException("download_error", e);
    } finally {
        dataRetriever.disconnect();
    }
    // 处理下有加密的数据
    if (StringUtils.isNotEmpty(key.getKey()) && StringUtils.isNotEmpty(key.getCrc())) {
        decodeFile(archiveFile, key.getKey(), key.getCrc());
    }
    InputStream input = null;
    JSONReader reader = null;
    try {
        input = new BufferedInputStream(new FileInputStream(archiveFile));
        DbBatch dbBatch = new DbBatch();
        byte[] lengthBytes = new byte[4];
        input.read(lengthBytes);
        int length = ByteUtils.bytes2int(lengthBytes);
        BatchProto.RowBatch rowbatchProto = BatchProto.RowBatch.parseFrom(new LimitedInputStream(input, length));
        // 构造原始的model对象
        RowBatch rowBatch = new RowBatch();
        rowBatch.setIdentity(build(rowbatchProto.getIdentity()));
        for (BatchProto.RowData rowDataProto : rowbatchProto.getRowsList()) {
            EventData eventData = new EventData();
            eventData.setPairId(rowDataProto.getPairId());
            eventData.setTableId(rowDataProto.getTableId());
            eventData.setTableName(rowDataProto.getTableName());
            eventData.setSchemaName(rowDataProto.getSchemaName());
            eventData.setEventType(EventType.valuesOf(rowDataProto.getEventType()));
            eventData.setExecuteTime(rowDataProto.getExecuteTime());
            // add by ljh at 2012-10-31
            if (StringUtils.isNotEmpty(rowDataProto.getSyncMode())) {
                eventData.setSyncMode(SyncMode.valuesOf(rowDataProto.getSyncMode()));
            }
            if (StringUtils.isNotEmpty(rowDataProto.getSyncConsistency())) {
                eventData.setSyncConsistency(SyncConsistency.valuesOf(rowDataProto.getSyncConsistency()));
            }
            // 处理主键
            List<EventColumn> keys = new ArrayList<EventColumn>();
            for (BatchProto.Column columnProto : rowDataProto.getKeysList()) {
                keys.add(buildColumn(columnProto));
            }
            eventData.setKeys(keys);
            // 处理old主键
            if (CollectionUtils.isEmpty(rowDataProto.getOldKeysList()) == false) {
                List<EventColumn> oldKeys = new ArrayList<EventColumn>();
                for (BatchProto.Column columnProto : rowDataProto.getOldKeysList()) {
                    oldKeys.add(buildColumn(columnProto));
                }
                eventData.setOldKeys(oldKeys);
            }
            // 处理具体的column value
            List<EventColumn> columns = new ArrayList<EventColumn>();
            for (BatchProto.Column columnProto : rowDataProto.getColumnsList()) {
                columns.add(buildColumn(columnProto));
            }
            eventData.setColumns(columns);
            eventData.setRemedy(rowDataProto.getRemedy());
            eventData.setSize(rowDataProto.getSize());
            eventData.setSql(rowDataProto.getSql());
            eventData.setDdlSchemaName(rowDataProto.getDdlSchemaName());
            eventData.setHint(rowDataProto.getHint());
            eventData.setWithoutSchema(rowDataProto.getWithoutSchema());
            // 添加到总记录
            rowBatch.merge(eventData);
        }
        dbBatch.setRowBatch(rowBatch);
        input.read(lengthBytes);
        length = ByteUtils.bytes2int(lengthBytes);
        BatchProto.FileBatch filebatchProto = BatchProto.FileBatch.parseFrom(new LimitedInputStream(input, length));
        // 构造原始的model对象
        FileBatch fileBatch = new FileBatch();
        fileBatch.setIdentity(build(filebatchProto.getIdentity()));
        for (BatchProto.FileData fileDataProto : filebatchProto.getFilesList()) {
            FileData fileData = new FileData();
            fileData.setPairId(fileDataProto.getPairId());
            fileData.setTableId(fileDataProto.getTableId());
            fileData.setEventType(EventType.valuesOf(fileDataProto.getEventType()));
            fileData.setLastModifiedTime(fileDataProto.getLastModifiedTime());
            fileData.setNameSpace(fileDataProto.getNamespace());
            fileData.setPath(fileDataProto.getPath());
            fileData.setSize(fileDataProto.getSize());
            // 添加到filebatch中
            fileBatch.getFiles().add(fileData);
        }
        dbBatch.setFileBatch(fileBatch);
        return dbBatch;
    } catch (IOException e) {
        throw new PipeException("deserial_error", e);
    } finally {
        IOUtils.closeQuietly(reader);
    }
}
Also used : EventColumn(com.alibaba.otter.shared.etl.model.EventColumn) ArrayList(java.util.ArrayList) DbBatch(com.alibaba.otter.shared.etl.model.DbBatch) EventData(com.alibaba.otter.shared.etl.model.EventData) BufferedInputStream(java.io.BufferedInputStream) FileData(com.alibaba.otter.shared.etl.model.FileData) FileBatch(com.alibaba.otter.shared.etl.model.FileBatch) BufferedInputStream(java.io.BufferedInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) DataRetriever(com.alibaba.otter.node.etl.common.io.download.DataRetriever) IOException(java.io.IOException) BatchProto(com.alibaba.otter.node.etl.model.protobuf.BatchProto) IOException(java.io.IOException) PipeException(com.alibaba.otter.node.etl.common.pipe.exception.PipeException) FileInputStream(java.io.FileInputStream) Pipeline(com.alibaba.otter.shared.common.model.config.pipeline.Pipeline) RowBatch(com.alibaba.otter.shared.etl.model.RowBatch) PipeException(com.alibaba.otter.node.etl.common.pipe.exception.PipeException) JSONReader(com.alibaba.fastjson.JSONReader) File(java.io.File)

Aggregations

FileBatch (com.alibaba.otter.shared.etl.model.FileBatch)18 Pipeline (com.alibaba.otter.shared.common.model.config.pipeline.Pipeline)9 FileData (com.alibaba.otter.shared.etl.model.FileData)9 Identity (com.alibaba.otter.shared.etl.model.Identity)9 File (java.io.File)8 Test (org.testng.annotations.Test)8 RowBatch (com.alibaba.otter.shared.etl.model.RowBatch)7 BaseDbTest (com.alibaba.otter.node.etl.BaseDbTest)5 DbBatch (com.alibaba.otter.shared.etl.model.DbBatch)4 IOException (java.io.IOException)4 BaseOtterTest (com.alibaba.otter.node.etl.BaseOtterTest)3 WeightController (com.alibaba.otter.node.etl.load.loader.weight.WeightController)3 Node (com.alibaba.otter.shared.common.model.config.node.Node)3 EventColumn (com.alibaba.otter.shared.etl.model.EventColumn)3 EventData (com.alibaba.otter.shared.etl.model.EventData)3 ArrayList (java.util.ArrayList)3 PipeException (com.alibaba.otter.node.etl.common.pipe.exception.PipeException)2 HttpPipeKey (com.alibaba.otter.node.etl.common.pipe.impl.http.HttpPipeKey)2 LoadException (com.alibaba.otter.node.etl.load.exception.LoadException)2 FileLoadContext (com.alibaba.otter.node.etl.load.loader.db.context.FileLoadContext)2