Search in sources :

Example 1 with Reader

use of com.dtstack.taier.develop.common.template.Reader in project Taier by DTStack.

the class ImpalaSyncBuilder method syncWriterBuild.

@Override
public Writer syncWriterBuild(List<Long> targetIds, Map<String, Object> targetMap, Reader reader) {
    TableLocationType tableLocationType = TableLocationType.getTableLocationType((String) targetMap.get(TableLocationType.key()));
    if (tableLocationType == null) {
        throw new RdosDefineException("不支持的表存储类型");
    }
    if (tableLocationType == TableLocationType.HIVE) {
        Map<String, Object> clone = new HashMap<>(targetMap);
        String writeMode = (String) clone.get("writeMode");
        writeMode = writeMode != null && writeMode.trim().length() != 0 ? SyncWriteMode.tranferHiveMode(writeMode) : SyncWriteMode.HIVE_OVERWRITE.getMode();
        clone.put("writeMode", writeMode);
        // 设置hdfs index字段
        List column = (List) clone.get("column");
        List<Column> allColumns = (List<Column>) clone.get("allColumns");
        List<Column> partitionColumns = (List<Column>) clone.get("partitionColumns");
        Map<String, Column> allColumnsMap = allColumns.stream().collect(Collectors.toMap(Column::getName, item -> item));
        for (Object col : column) {
            String name = (String) ((Map<String, Object>) col).get("key");
            ((Map<String, Object>) col).put("index", allColumnsMap.get(name).getIndex());
        }
        // 设置 fullColumnNames 和 fullColumnTypes  脏数据记录的时候需要
        // 需要去掉分区字段
        Set<String> partitionColumnNameSet = CollectionUtils.isEmpty(partitionColumns) ? new HashSet<>() : partitionColumns.stream().map(pColumn -> pColumn.getName()).collect(Collectors.toSet());
        List<String> fullColumnNames = new ArrayList<>();
        List<String> fullColumnTypes = new ArrayList<>();
        for (Column allColumn : allColumns) {
            if (!partitionColumnNameSet.contains(allColumn.getName())) {
                fullColumnNames.add(allColumn.getName());
                fullColumnTypes.add(allColumn.getType());
            }
        }
        clone.put("fullColumnNames", fullColumnNames);
        clone.put("fullColumnTypes", fullColumnTypes);
        String partition = (String) clone.get("partition");
        // fileName 逻辑参考自HiveWriter
        String fileName = StringUtils.isNotEmpty(partition) ? partition : "";
        clone.put("fileName", fileName);
        return objToObject(clone, ImpalaHdfsWriter.class);
    } else if (tableLocationType == TableLocationType.KUDU) {
        KuduWriter kuduWriter = objToObject(targetMap, KuduWriter.class);
        String kuduTableName = (String) targetMap.get("kuduTableName");
        LOGGER.info("syncWriterBuild format impala  kuduTableName :{} ", kuduTableName);
        kuduWriter.setTable(kuduTableName);
        return kuduWriter;
    }
    return null;
}
Also used : ImpalaUtils(com.dtstack.taier.develop.utils.develop.sync.util.ImpalaUtils) StringUtils(org.apache.commons.lang.StringUtils) java.util(java.util) SourceDTOType(com.dtstack.taier.develop.enums.develop.SourceDTOType) ImpalaHdfsWriter(com.dtstack.taier.develop.utils.develop.sync.template.ImpalaHdfsWriter) Column(com.dtstack.taier.pluginapi.pojo.Column) LoggerFactory(org.slf4j.LoggerFactory) ISourceDTO(com.dtstack.dtcenter.loader.dto.source.ISourceDTO) RdosDefineException(com.dtstack.taier.common.exception.RdosDefineException) SyncWriteMode(com.dtstack.taier.develop.enums.develop.SyncWriteMode) CustomThreadRunsPolicy(com.dtstack.taier.common.CustomThreadRunsPolicy) CollectionUtils(org.apache.commons.collections.CollectionUtils) Writer(com.dtstack.taier.develop.common.template.Writer) DataSourceType(com.dtstack.dtcenter.loader.source.DataSourceType) ImpalaHdfsReader(com.dtstack.taier.develop.utils.develop.sync.template.ImpalaHdfsReader) KuduWriter(com.dtstack.taier.develop.utils.develop.sync.template.KuduWriter) KuduReader(com.dtstack.taier.develop.utils.develop.sync.template.KuduReader) Logger(org.slf4j.Logger) Reader(com.dtstack.taier.develop.common.template.Reader) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) TableLocationType(com.dtstack.taier.develop.enums.develop.TableLocationType) Component(org.springframework.stereotype.Component) JSON(com.alibaba.fastjson.JSON) JSONObject(com.alibaba.fastjson.JSONObject) Assert(org.springframework.util.Assert) RdosDefineException(com.dtstack.taier.common.exception.RdosDefineException) TableLocationType(com.dtstack.taier.develop.enums.develop.TableLocationType) KuduWriter(com.dtstack.taier.develop.utils.develop.sync.template.KuduWriter) Column(com.dtstack.taier.pluginapi.pojo.Column) JSONObject(com.alibaba.fastjson.JSONObject)

Example 2 with Reader

use of com.dtstack.taier.develop.common.template.Reader in project Taier by DTStack.

the class DatasourceService method syncReaderBuild.

private Reader syncReaderBuild(final Integer sourceType, final Map<String, Object> sourceMap, final List<Long> sourceIds) throws IOException {
    Reader reader = null;
    if (Objects.nonNull(RDBMSSourceType.getByDataSourceType(sourceType)) && !DataSourceType.HIVE.getVal().equals(sourceType) && !DataSourceType.HIVE1X.getVal().equals(sourceType) && !DataSourceType.HIVE3X.getVal().equals(sourceType) && !DataSourceType.CarbonData.getVal().equals(sourceType) && !DataSourceType.IMPALA.getVal().equals(sourceType) && !DataSourceType.SparkThrift2_1.getVal().equals(sourceType)) {
        reader = PublicUtil.objectToObject(sourceMap, RDBReader.class);
        ((RDBBase) reader).setSourceIds(sourceIds);
        return reader;
    }
    if (DataSourceType.HDFS.getVal().equals(sourceType)) {
        return PublicUtil.objectToObject(sourceMap, HDFSReader.class);
    }
    if (DataSourceType.HIVE.getVal().equals(sourceType) || DataSourceType.HIVE3X.getVal().equals(sourceType) || DataSourceType.HIVE1X.getVal().equals(sourceType) || DataSourceType.SparkThrift2_1.getVal().equals(sourceType)) {
        return PublicUtil.objectToObject(sourceMap, HiveReader.class);
    }
    if (DataSourceType.HBASE.getVal().equals(sourceType)) {
        return PublicUtil.objectToObject(sourceMap, HBaseReader.class);
    }
    if (DataSourceType.FTP.getVal().equals(sourceType)) {
        reader = PublicUtil.objectToObject(sourceMap, FtpReader.class);
        if (sourceMap.containsKey("isFirstLineHeader") && (Boolean) sourceMap.get("isFirstLineHeader")) {
            ((FtpReader) reader).setFirstLineHeader(true);
        } else {
            ((FtpReader) reader).setFirstLineHeader(false);
        }
        return reader;
    }
    if (DataSourceType.MAXCOMPUTE.getVal().equals(sourceType)) {
        reader = PublicUtil.objectToObject(sourceMap, OdpsReader.class);
        ((OdpsBase) reader).setSourceId(sourceIds.get(0));
        return reader;
    }
    if (DataSourceType.ES.getVal().equals(sourceType)) {
        return PublicUtil.objectToObject(sourceMap, EsReader.class);
    }
    if (DataSourceType.MONGODB.getVal().equals(sourceType)) {
        return PublicUtil.objectToObject(sourceMap, MongoDbReader.class);
    }
    if (DataSourceType.CarbonData.getVal().equals(sourceType)) {
        return PublicUtil.objectToObject(sourceMap, CarbonDataReader.class);
    }
    if (DataSourceType.Kudu.getVal().equals(sourceType)) {
        return syncBuilderFactory.getSyncBuilder(DataSourceType.Kudu.getVal()).syncReaderBuild(sourceMap, sourceIds);
    }
    if (DataSourceType.INFLUXDB.getVal().equals(sourceType)) {
        return PublicUtil.objectToObject(sourceMap, InfluxDBReader.class);
    }
    if (DataSourceType.IMPALA.getVal().equals(sourceType)) {
        // setSftpConf时,设置的hdfsConfig和sftpConf
        if (sourceMap.containsKey(HADOOP_CONFIG)) {
            Object impalaConfig = sourceMap.get(HADOOP_CONFIG);
            if (impalaConfig instanceof Map) {
                sourceMap.put(HADOOP_CONFIG, impalaConfig);
                sourceMap.put("sftpConf", ((Map) impalaConfig).get("sftpConf"));
            }
        }
        return syncBuilderFactory.getSyncBuilder(DataSourceType.IMPALA.getVal()).syncReaderBuild(sourceMap, sourceIds);
    }
    if (DataSourceType.AWS_S3.getVal().equals(sourceType)) {
        return PublicUtil.objectToObject(sourceMap, AwsS3Reader.class);
    }
    throw new RdosDefineException("暂不支持" + DataSourceType.getSourceType(sourceType).name() + "作为数据同步的源");
}
Also used : RDBReader(com.dtstack.taier.develop.utils.develop.sync.template.RDBReader) OdpsReader(com.dtstack.taier.develop.utils.develop.sync.template.OdpsReader) RdosDefineException(com.dtstack.taier.common.exception.RdosDefineException) FtpReader(com.dtstack.taier.develop.utils.develop.sync.template.FtpReader) OdpsBase(com.dtstack.taier.develop.utils.develop.sync.template.OdpsBase) HBaseReader(com.dtstack.taier.develop.utils.develop.sync.template.HBaseReader) AwsS3Reader(com.dtstack.taier.develop.utils.develop.sync.template.AwsS3Reader) MongoDbReader(com.dtstack.taier.develop.utils.develop.sync.template.MongoDbReader) OdpsReader(com.dtstack.taier.develop.utils.develop.sync.template.OdpsReader) FtpReader(com.dtstack.taier.develop.utils.develop.sync.template.FtpReader) EsReader(com.dtstack.taier.develop.utils.develop.sync.template.EsReader) HiveReader(com.dtstack.taier.develop.utils.develop.sync.template.HiveReader) HDFSReader(com.dtstack.taier.develop.utils.develop.sync.template.HDFSReader) CarbonDataReader(com.dtstack.taier.develop.utils.develop.sync.template.CarbonDataReader) Reader(com.dtstack.taier.develop.common.template.Reader) InfluxDBReader(com.dtstack.taier.develop.utils.develop.sync.template.InfluxDBReader) RDBReader(com.dtstack.taier.develop.utils.develop.sync.template.RDBReader) JSONObject(com.alibaba.fastjson.JSONObject) RDBBase(com.dtstack.taier.develop.utils.develop.sync.template.RDBBase) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 3 with Reader

use of com.dtstack.taier.develop.common.template.Reader in project Taier by DTStack.

the class DatasourceService method getSyncSql.

/**
 * 配置或修改离线任务
 *
 * @param isFilter 获取数据同步脚本时候是否进行过滤用户名密码操作
 * @return
 * @throws IOException
 */
public String getSyncSql(final TaskResourceParam param, boolean isFilter) {
    // 来源集合
    final Map<String, Object> sourceMap = param.getSourceMap();
    // 目标集合
    final Map<String, Object> targetMap = param.getTargetMap();
    // 流控、错误集合
    final Map<String, Object> settingMap = param.getSettingMap();
    try {
        this.setReaderJson(sourceMap, param.getId(), param.getTenantId(), isFilter);
        this.setWriterJson(targetMap, param.getId(), param.getTenantId(), isFilter);
        Reader reader = null;
        Writer writer = null;
        Setting setting = null;
        final Integer sourceType = Integer.parseInt(sourceMap.get("dataSourceType").toString());
        final Integer targetType = Integer.parseInt(targetMap.get("dataSourceType").toString());
        if (!this.checkDataSourcePermission(sourceType, EDataSourcePermission.READ.getType())) {
            throw new RdosDefineException(ErrorCode.SOURCE_CAN_NOT_AS_INPUT);
        }
        if (!this.checkDataSourcePermission(targetType, EDataSourcePermission.WRITE.getType())) {
            throw new RdosDefineException(ErrorCode.SOURCE_CAN_NOT_AS_OUTPUT);
        }
        final List<Long> sourceIds = (List<Long>) sourceMap.get("sourceIds");
        final List<Long> targetIds = (List<Long>) targetMap.get("sourceIds");
        reader = this.syncReaderBuild(sourceType, sourceMap, sourceIds);
        writer = this.syncWriterBuild(targetType, targetIds, targetMap, reader);
        setting = PublicUtil.objectToObject(settingMap, DefaultSetting.class);
        // 检查有效性
        if (writer instanceof HiveWriter) {
            final HiveWriter hiveWriter = (HiveWriter) writer;
            if (!hiveWriter.isValid()) {
                throw new RdosDefineException(hiveWriter.getErrMsg());
            }
        }
        if (param.getCreateModel() == TaskCreateModelType.TEMPLATE.getType()) {
            // 脚本模式直接返回
            return this.getJobText(this.putDefaultEmptyValueForReader(sourceType, reader), this.putDefaultEmptyValueForWriter(targetType, writer), this.putDefaultEmptyValueForSetting(setting));
        }
        // 获得数据同步job.xml的配置
        final String jobXml = this.getJobText(reader, writer, setting);
        final String parserXml = this.getParserText(sourceMap, targetMap, settingMap);
        final JSONObject sql = new JSONObject(3);
        sql.put("job", jobXml);
        sql.put("parser", parserXml);
        sql.put("createModel", TaskCreateModelType.GUIDE.getType());
        this.batchTaskParamService.checkParams(this.batchTaskParamService.checkSyncJobParams(sql.toJSONString()), param.getTaskVariables());
        return sql.toJSONString();
    } catch (final Exception e) {
        LOGGER.error("", e);
        throw new RdosDefineException("解析同步任务失败: " + e.getMessage(), ErrorCode.SERVER_EXCEPTION);
    }
}
Also used : HiveWriter(com.dtstack.taier.develop.utils.develop.sync.template.HiveWriter) RdosDefineException(com.dtstack.taier.common.exception.RdosDefineException) DefaultSetting(com.dtstack.taier.develop.utils.develop.sync.template.DefaultSetting) Setting(com.dtstack.taier.develop.common.template.Setting) HBaseReader(com.dtstack.taier.develop.utils.develop.sync.template.HBaseReader) AwsS3Reader(com.dtstack.taier.develop.utils.develop.sync.template.AwsS3Reader) MongoDbReader(com.dtstack.taier.develop.utils.develop.sync.template.MongoDbReader) OdpsReader(com.dtstack.taier.develop.utils.develop.sync.template.OdpsReader) FtpReader(com.dtstack.taier.develop.utils.develop.sync.template.FtpReader) EsReader(com.dtstack.taier.develop.utils.develop.sync.template.EsReader) HiveReader(com.dtstack.taier.develop.utils.develop.sync.template.HiveReader) HDFSReader(com.dtstack.taier.develop.utils.develop.sync.template.HDFSReader) CarbonDataReader(com.dtstack.taier.develop.utils.develop.sync.template.CarbonDataReader) Reader(com.dtstack.taier.develop.common.template.Reader) InfluxDBReader(com.dtstack.taier.develop.utils.develop.sync.template.InfluxDBReader) RDBReader(com.dtstack.taier.develop.utils.develop.sync.template.RDBReader) SftpException(com.jcraft.jsch.SftpException) RdosDefineException(com.dtstack.taier.common.exception.RdosDefineException) IOException(java.io.IOException) PubSvcDefineException(com.dtstack.taier.common.exception.PubSvcDefineException) DtCenterDefException(com.dtstack.taier.common.exception.DtCenterDefException) JSONObject(com.alibaba.fastjson.JSONObject) DefaultSetting(com.dtstack.taier.develop.utils.develop.sync.template.DefaultSetting) JSONObject(com.alibaba.fastjson.JSONObject) ArrayList(java.util.ArrayList) List(java.util.List) HDFSWriter(com.dtstack.taier.develop.utils.develop.sync.template.HDFSWriter) CarbonDataWriter(com.dtstack.taier.develop.utils.develop.sync.template.CarbonDataWriter) EsWriter(com.dtstack.taier.develop.utils.develop.sync.template.EsWriter) OdpsWriter(com.dtstack.taier.develop.utils.develop.sync.template.OdpsWriter) HiveWriter(com.dtstack.taier.develop.utils.develop.sync.template.HiveWriter) RedisWriter(com.dtstack.taier.develop.utils.develop.sync.template.RedisWriter) AwsS3Writer(com.dtstack.taier.develop.utils.develop.sync.template.AwsS3Writer) HBaseWriter(com.dtstack.taier.develop.utils.develop.sync.template.HBaseWriter) RDBWriter(com.dtstack.taier.develop.utils.develop.sync.template.RDBWriter) InceptorWriter(com.dtstack.taier.develop.utils.develop.sync.template.InceptorWriter) MongoDbWriter(com.dtstack.taier.develop.utils.develop.sync.template.MongoDbWriter) FtpWriter(com.dtstack.taier.develop.utils.develop.sync.template.FtpWriter) Writer(com.dtstack.taier.develop.common.template.Writer)

Example 4 with Reader

use of com.dtstack.taier.develop.common.template.Reader in project Taier by DTStack.

the class JobTemplate method toJobJsonString.

public String toJobJsonString() {
    Reader reader = newReader();
    Writer writer = newWrite();
    Setting setting = newSetting();
    JSONObject content = new JSONObject(2);
    content.put("reader", reader.toReaderJson());
    content.put("writer", writer.toWriterJson());
    JSONObject jobJson = new JSONObject(2);
    jobJson.put("content", Lists.newArrayList(content));
    jobJson.put("setting", setting.toSettingJson());
    StringBuilder job = new StringBuilder();
    job.append("{ \"job\":");
    job.append(jobJson.toJSONString());
    job.append(" }");
    return job.toString();
}
Also used : JSONObject(com.alibaba.fastjson.JSONObject) Setting(com.dtstack.taier.develop.common.template.Setting) Reader(com.dtstack.taier.develop.common.template.Reader) Writer(com.dtstack.taier.develop.common.template.Writer)

Aggregations

JSONObject (com.alibaba.fastjson.JSONObject)4 Reader (com.dtstack.taier.develop.common.template.Reader)4 RdosDefineException (com.dtstack.taier.common.exception.RdosDefineException)3 Writer (com.dtstack.taier.develop.common.template.Writer)3 Setting (com.dtstack.taier.develop.common.template.Setting)2 AwsS3Reader (com.dtstack.taier.develop.utils.develop.sync.template.AwsS3Reader)2 CarbonDataReader (com.dtstack.taier.develop.utils.develop.sync.template.CarbonDataReader)2 EsReader (com.dtstack.taier.develop.utils.develop.sync.template.EsReader)2 FtpReader (com.dtstack.taier.develop.utils.develop.sync.template.FtpReader)2 HBaseReader (com.dtstack.taier.develop.utils.develop.sync.template.HBaseReader)2 HDFSReader (com.dtstack.taier.develop.utils.develop.sync.template.HDFSReader)2 HiveReader (com.dtstack.taier.develop.utils.develop.sync.template.HiveReader)2 InfluxDBReader (com.dtstack.taier.develop.utils.develop.sync.template.InfluxDBReader)2 MongoDbReader (com.dtstack.taier.develop.utils.develop.sync.template.MongoDbReader)2 OdpsReader (com.dtstack.taier.develop.utils.develop.sync.template.OdpsReader)2 RDBReader (com.dtstack.taier.develop.utils.develop.sync.template.RDBReader)2 JSON (com.alibaba.fastjson.JSON)1 ISourceDTO (com.dtstack.dtcenter.loader.dto.source.ISourceDTO)1 DataSourceType (com.dtstack.dtcenter.loader.source.DataSourceType)1 CustomThreadRunsPolicy (com.dtstack.taier.common.CustomThreadRunsPolicy)1