Search in sources :

Example 1 with KuduWriter

use of com.dtstack.taier.develop.utils.develop.sync.template.KuduWriter in project Taier by DTStack.

the class ImpalaSyncBuilder method syncWriterBuild.

@Override
public Writer syncWriterBuild(List<Long> targetIds, Map<String, Object> targetMap, Reader reader) {
    TableLocationType tableLocationType = TableLocationType.getTableLocationType((String) targetMap.get(TableLocationType.key()));
    if (tableLocationType == null) {
        throw new RdosDefineException("不支持的表存储类型");
    }
    if (tableLocationType == TableLocationType.HIVE) {
        Map<String, Object> clone = new HashMap<>(targetMap);
        String writeMode = (String) clone.get("writeMode");
        writeMode = writeMode != null && writeMode.trim().length() != 0 ? SyncWriteMode.tranferHiveMode(writeMode) : SyncWriteMode.HIVE_OVERWRITE.getMode();
        clone.put("writeMode", writeMode);
        // 设置hdfs index字段
        List column = (List) clone.get("column");
        List<Column> allColumns = (List<Column>) clone.get("allColumns");
        List<Column> partitionColumns = (List<Column>) clone.get("partitionColumns");
        Map<String, Column> allColumnsMap = allColumns.stream().collect(Collectors.toMap(Column::getName, item -> item));
        for (Object col : column) {
            String name = (String) ((Map<String, Object>) col).get("key");
            ((Map<String, Object>) col).put("index", allColumnsMap.get(name).getIndex());
        }
        // 设置 fullColumnNames 和 fullColumnTypes  脏数据记录的时候需要
        // 需要去掉分区字段
        Set<String> partitionColumnNameSet = CollectionUtils.isEmpty(partitionColumns) ? new HashSet<>() : partitionColumns.stream().map(pColumn -> pColumn.getName()).collect(Collectors.toSet());
        List<String> fullColumnNames = new ArrayList<>();
        List<String> fullColumnTypes = new ArrayList<>();
        for (Column allColumn : allColumns) {
            if (!partitionColumnNameSet.contains(allColumn.getName())) {
                fullColumnNames.add(allColumn.getName());
                fullColumnTypes.add(allColumn.getType());
            }
        }
        clone.put("fullColumnNames", fullColumnNames);
        clone.put("fullColumnTypes", fullColumnTypes);
        String partition = (String) clone.get("partition");
        // fileName 逻辑参考自HiveWriter
        String fileName = StringUtils.isNotEmpty(partition) ? partition : "";
        clone.put("fileName", fileName);
        return objToObject(clone, ImpalaHdfsWriter.class);
    } else if (tableLocationType == TableLocationType.KUDU) {
        KuduWriter kuduWriter = objToObject(targetMap, KuduWriter.class);
        String kuduTableName = (String) targetMap.get("kuduTableName");
        LOGGER.info("syncWriterBuild format impala  kuduTableName :{} ", kuduTableName);
        kuduWriter.setTable(kuduTableName);
        return kuduWriter;
    }
    return null;
}
Also used : ImpalaUtils(com.dtstack.taier.develop.utils.develop.sync.util.ImpalaUtils) StringUtils(org.apache.commons.lang.StringUtils) java.util(java.util) SourceDTOType(com.dtstack.taier.develop.enums.develop.SourceDTOType) ImpalaHdfsWriter(com.dtstack.taier.develop.utils.develop.sync.template.ImpalaHdfsWriter) Column(com.dtstack.taier.pluginapi.pojo.Column) LoggerFactory(org.slf4j.LoggerFactory) ISourceDTO(com.dtstack.dtcenter.loader.dto.source.ISourceDTO) RdosDefineException(com.dtstack.taier.common.exception.RdosDefineException) SyncWriteMode(com.dtstack.taier.develop.enums.develop.SyncWriteMode) CustomThreadRunsPolicy(com.dtstack.taier.common.CustomThreadRunsPolicy) CollectionUtils(org.apache.commons.collections.CollectionUtils) Writer(com.dtstack.taier.develop.common.template.Writer) DataSourceType(com.dtstack.dtcenter.loader.source.DataSourceType) ImpalaHdfsReader(com.dtstack.taier.develop.utils.develop.sync.template.ImpalaHdfsReader) KuduWriter(com.dtstack.taier.develop.utils.develop.sync.template.KuduWriter) KuduReader(com.dtstack.taier.develop.utils.develop.sync.template.KuduReader) Logger(org.slf4j.Logger) Reader(com.dtstack.taier.develop.common.template.Reader) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) TableLocationType(com.dtstack.taier.develop.enums.develop.TableLocationType) Component(org.springframework.stereotype.Component) JSON(com.alibaba.fastjson.JSON) JSONObject(com.alibaba.fastjson.JSONObject) Assert(org.springframework.util.Assert) RdosDefineException(com.dtstack.taier.common.exception.RdosDefineException) TableLocationType(com.dtstack.taier.develop.enums.develop.TableLocationType) KuduWriter(com.dtstack.taier.develop.utils.develop.sync.template.KuduWriter) Column(com.dtstack.taier.pluginapi.pojo.Column) JSONObject(com.alibaba.fastjson.JSONObject)

Aggregations

JSON (com.alibaba.fastjson.JSON)1 JSONObject (com.alibaba.fastjson.JSONObject)1 ISourceDTO (com.dtstack.dtcenter.loader.dto.source.ISourceDTO)1 DataSourceType (com.dtstack.dtcenter.loader.source.DataSourceType)1 CustomThreadRunsPolicy (com.dtstack.taier.common.CustomThreadRunsPolicy)1 RdosDefineException (com.dtstack.taier.common.exception.RdosDefineException)1 Reader (com.dtstack.taier.develop.common.template.Reader)1 Writer (com.dtstack.taier.develop.common.template.Writer)1 SourceDTOType (com.dtstack.taier.develop.enums.develop.SourceDTOType)1 SyncWriteMode (com.dtstack.taier.develop.enums.develop.SyncWriteMode)1 TableLocationType (com.dtstack.taier.develop.enums.develop.TableLocationType)1 ImpalaHdfsReader (com.dtstack.taier.develop.utils.develop.sync.template.ImpalaHdfsReader)1 ImpalaHdfsWriter (com.dtstack.taier.develop.utils.develop.sync.template.ImpalaHdfsWriter)1 KuduReader (com.dtstack.taier.develop.utils.develop.sync.template.KuduReader)1 KuduWriter (com.dtstack.taier.develop.utils.develop.sync.template.KuduWriter)1 ImpalaUtils (com.dtstack.taier.develop.utils.develop.sync.util.ImpalaUtils)1 Column (com.dtstack.taier.pluginapi.pojo.Column)1 Maps (com.google.common.collect.Maps)1 java.util (java.util)1 Collectors (java.util.stream.Collectors)1