Search in sources :

Example 1 with Column

use of com.dtstack.taier.pluginapi.pojo.Column in project Taier by DTStack.

the class ImpalaSyncBuilder method syncWriterBuild.

@Override
public Writer syncWriterBuild(List<Long> targetIds, Map<String, Object> targetMap, Reader reader) {
    TableLocationType tableLocationType = TableLocationType.getTableLocationType((String) targetMap.get(TableLocationType.key()));
    if (tableLocationType == null) {
        throw new RdosDefineException("不支持的表存储类型");
    }
    if (tableLocationType == TableLocationType.HIVE) {
        Map<String, Object> clone = new HashMap<>(targetMap);
        String writeMode = (String) clone.get("writeMode");
        writeMode = writeMode != null && writeMode.trim().length() != 0 ? SyncWriteMode.tranferHiveMode(writeMode) : SyncWriteMode.HIVE_OVERWRITE.getMode();
        clone.put("writeMode", writeMode);
        // 设置hdfs index字段
        List column = (List) clone.get("column");
        List<Column> allColumns = (List<Column>) clone.get("allColumns");
        List<Column> partitionColumns = (List<Column>) clone.get("partitionColumns");
        Map<String, Column> allColumnsMap = allColumns.stream().collect(Collectors.toMap(Column::getName, item -> item));
        for (Object col : column) {
            String name = (String) ((Map<String, Object>) col).get("key");
            ((Map<String, Object>) col).put("index", allColumnsMap.get(name).getIndex());
        }
        // 设置 fullColumnNames 和 fullColumnTypes  脏数据记录的时候需要
        // 需要去掉分区字段
        Set<String> partitionColumnNameSet = CollectionUtils.isEmpty(partitionColumns) ? new HashSet<>() : partitionColumns.stream().map(pColumn -> pColumn.getName()).collect(Collectors.toSet());
        List<String> fullColumnNames = new ArrayList<>();
        List<String> fullColumnTypes = new ArrayList<>();
        for (Column allColumn : allColumns) {
            if (!partitionColumnNameSet.contains(allColumn.getName())) {
                fullColumnNames.add(allColumn.getName());
                fullColumnTypes.add(allColumn.getType());
            }
        }
        clone.put("fullColumnNames", fullColumnNames);
        clone.put("fullColumnTypes", fullColumnTypes);
        String partition = (String) clone.get("partition");
        // fileName 逻辑参考自HiveWriter
        String fileName = StringUtils.isNotEmpty(partition) ? partition : "";
        clone.put("fileName", fileName);
        return objToObject(clone, ImpalaHdfsWriter.class);
    } else if (tableLocationType == TableLocationType.KUDU) {
        KuduWriter kuduWriter = objToObject(targetMap, KuduWriter.class);
        String kuduTableName = (String) targetMap.get("kuduTableName");
        LOGGER.info("syncWriterBuild format impala  kuduTableName :{} ", kuduTableName);
        kuduWriter.setTable(kuduTableName);
        return kuduWriter;
    }
    return null;
}
Also used : ImpalaUtils(com.dtstack.taier.develop.utils.develop.sync.util.ImpalaUtils) StringUtils(org.apache.commons.lang.StringUtils) java.util(java.util) SourceDTOType(com.dtstack.taier.develop.enums.develop.SourceDTOType) ImpalaHdfsWriter(com.dtstack.taier.develop.utils.develop.sync.template.ImpalaHdfsWriter) Column(com.dtstack.taier.pluginapi.pojo.Column) LoggerFactory(org.slf4j.LoggerFactory) ISourceDTO(com.dtstack.dtcenter.loader.dto.source.ISourceDTO) RdosDefineException(com.dtstack.taier.common.exception.RdosDefineException) SyncWriteMode(com.dtstack.taier.develop.enums.develop.SyncWriteMode) CustomThreadRunsPolicy(com.dtstack.taier.common.CustomThreadRunsPolicy) CollectionUtils(org.apache.commons.collections.CollectionUtils) Writer(com.dtstack.taier.develop.common.template.Writer) DataSourceType(com.dtstack.dtcenter.loader.source.DataSourceType) ImpalaHdfsReader(com.dtstack.taier.develop.utils.develop.sync.template.ImpalaHdfsReader) KuduWriter(com.dtstack.taier.develop.utils.develop.sync.template.KuduWriter) KuduReader(com.dtstack.taier.develop.utils.develop.sync.template.KuduReader) Logger(org.slf4j.Logger) Reader(com.dtstack.taier.develop.common.template.Reader) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) TableLocationType(com.dtstack.taier.develop.enums.develop.TableLocationType) Component(org.springframework.stereotype.Component) JSON(com.alibaba.fastjson.JSON) JSONObject(com.alibaba.fastjson.JSONObject) Assert(org.springframework.util.Assert) RdosDefineException(com.dtstack.taier.common.exception.RdosDefineException) TableLocationType(com.dtstack.taier.develop.enums.develop.TableLocationType) KuduWriter(com.dtstack.taier.develop.utils.develop.sync.template.KuduWriter) Column(com.dtstack.taier.pluginapi.pojo.Column) JSONObject(com.alibaba.fastjson.JSONObject)

Example 2 with Column

use of com.dtstack.taier.pluginapi.pojo.Column in project Taier by DTStack.

the class ImpalaHdfsReader method toReaderJson.

@Override
public JSONObject toReaderJson() {
    HDFSReader hdfsReader = new HDFSReader();
    hdfsReader.setHadoopConfig(hadoopConfig);
    hdfsReader.setFieldDelimiter(fieldDelimiter);
    // 前端传入的column参数没有index hdfs读取需要此参数
    Map<String, Column> allColumnsMap = allColumns.stream().collect(Collectors.toMap(Column::getName, item -> item));
    for (Object col : column) {
        String name = (String) ((Map<String, Object>) col).get("key");
        ((Map<String, Object>) col).put("index", allColumnsMap.get(name).getIndex());
    }
    hdfsReader.setColumn(column);
    hdfsReader.setDefaultFS(defaultFS);
    hdfsReader.setEncoding(encoding);
    hdfsReader.setExtralConfig(super.getExtralConfig());
    hdfsReader.setFileType(fileType);
    if (StringUtils.isNotEmpty(partition)) {
        hdfsReader.setPath(path + "/" + partition);
    } else {
        hdfsReader.setPath(path);
    }
    if (MapUtils.isNotEmpty(sftpConf)) {
        hdfsReader.setSftpConf(sftpConf);
    }
    if (StringUtils.isNotEmpty(remoteDir)) {
        hdfsReader.setRemoteDir(remoteDir);
    }
    hdfsReader.setPath(hdfsReader.getPath().trim());
    return hdfsReader.toReaderJson();
}
Also used : StringUtils(org.apache.commons.lang.StringUtils) MapUtils(org.apache.commons.collections.MapUtils) Column(com.dtstack.taier.pluginapi.pojo.Column) Map(java.util.Map) JSONObject(com.alibaba.fastjson.JSONObject) Reader(com.dtstack.taier.develop.common.template.Reader) Collectors(java.util.stream.Collectors) Column(com.dtstack.taier.pluginapi.pojo.Column) JSONObject(com.alibaba.fastjson.JSONObject) Map(java.util.Map)

Example 3 with Column

use of com.dtstack.taier.pluginapi.pojo.Column in project Taier by DTStack.

the class ImpalaUtils method getImpalaHiveTableDetailInfo.

public static Map<String, Object> getImpalaHiveTableDetailInfo(ISourceDTO iSourceDTO, String tableName) {
    IClient client = ClientCache.getClient(DataSourceType.IMPALA.getVal());
    SqlQueryDTO sqlQueryDTO = SqlQueryDTO.builder().tableName(tableName).build();
    com.dtstack.dtcenter.loader.dto.Table tableInfo = client.getTable(iSourceDTO, sqlQueryDTO);
    List<ColumnMetaDTO> columnMetaDTOList = tableInfo.getColumns();
    List<Column> columns = new ArrayList<>();
    List<Column> partitionColumns = new ArrayList<>();
    ColumnMetaDTO columnMetaDTO = null;
    for (int i = 0; i < columnMetaDTOList.size(); i++) {
        columnMetaDTO = columnMetaDTOList.get(i);
        Column column = new Column();
        column.setName(columnMetaDTO.getKey());
        column.setType(columnMetaDTO.getType());
        column.setComment(columnMetaDTO.getComment());
        column.setIndex(i);
        columns.add(column);
        if (columnMetaDTO.getPart()) {
            partitionColumns.add(column);
        }
    }
    Map<String, Object> map = new HashMap<>();
    map.put("allColumns", columns);
    map.put("partitionColumns", partitionColumns);
    map.put("path", tableInfo.getPath());
    map.put("fieldDelimiter", tableInfo.getDelim());
    return map;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IClient(com.dtstack.dtcenter.loader.client.IClient) ColumnMetaDTO(com.dtstack.dtcenter.loader.dto.ColumnMetaDTO) Column(com.dtstack.taier.pluginapi.pojo.Column) SqlQueryDTO(com.dtstack.dtcenter.loader.dto.SqlQueryDTO)

Aggregations

Column (com.dtstack.taier.pluginapi.pojo.Column)3 JSONObject (com.alibaba.fastjson.JSONObject)2 Reader (com.dtstack.taier.develop.common.template.Reader)2 Collectors (java.util.stream.Collectors)2 StringUtils (org.apache.commons.lang.StringUtils)2 JSON (com.alibaba.fastjson.JSON)1 IClient (com.dtstack.dtcenter.loader.client.IClient)1 ColumnMetaDTO (com.dtstack.dtcenter.loader.dto.ColumnMetaDTO)1 SqlQueryDTO (com.dtstack.dtcenter.loader.dto.SqlQueryDTO)1 ISourceDTO (com.dtstack.dtcenter.loader.dto.source.ISourceDTO)1 DataSourceType (com.dtstack.dtcenter.loader.source.DataSourceType)1 CustomThreadRunsPolicy (com.dtstack.taier.common.CustomThreadRunsPolicy)1 RdosDefineException (com.dtstack.taier.common.exception.RdosDefineException)1 Writer (com.dtstack.taier.develop.common.template.Writer)1 SourceDTOType (com.dtstack.taier.develop.enums.develop.SourceDTOType)1 SyncWriteMode (com.dtstack.taier.develop.enums.develop.SyncWriteMode)1 TableLocationType (com.dtstack.taier.develop.enums.develop.TableLocationType)1 ImpalaHdfsReader (com.dtstack.taier.develop.utils.develop.sync.template.ImpalaHdfsReader)1 ImpalaHdfsWriter (com.dtstack.taier.develop.utils.develop.sync.template.ImpalaHdfsWriter)1 KuduReader (com.dtstack.taier.develop.utils.develop.sync.template.KuduReader)1