Search in sources :

Example 1 with HiveFileFormat

use of org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveFileFormat in project incubator-inlong by apache.

the class HiveSinkHelper method createBulkWriterFactory.

public static BulkWriter.Factory<Row> createBulkWriterFactory(HiveSinkInfo hiveSinkInfo, Configuration config) {
    String[] fieldNames = getFieldNames(hiveSinkInfo).toArray(new String[0]);
    LogicalType[] fieldTypes = getFieldLogicalTypes(hiveSinkInfo).toArray(new LogicalType[0]);
    RowType rowType = RowType.of(fieldTypes, fieldNames);
    HiveFileFormat hiveFileFormat = hiveSinkInfo.getHiveFileFormat();
    if (hiveFileFormat instanceof ParquetFileFormat) {
        return ParquetRowWriterBuilder.createWriterFactory(rowType, (ParquetFileFormat) hiveFileFormat);
    } else if (hiveFileFormat instanceof TextFileFormat) {
        return new TextRowWriter.Factory((TextFileFormat) hiveFileFormat, fieldTypes, config);
    } else if (hiveFileFormat instanceof OrcFileFormat) {
        return OrcBulkWriterFactory.createWriterFactory(rowType, fieldTypes, config);
    } else {
        throw new IllegalArgumentException("Unsupported hive file format " + hiveFileFormat.getClass().getName());
    }
}
Also used : TextRowWriter(org.apache.inlong.sort.flink.hive.formats.TextRowWriter) OrcFileFormat(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.OrcFileFormat) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) HiveFileFormat(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveFileFormat) ParquetFileFormat(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.ParquetFileFormat) TextFileFormat(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat)

Example 2 with HiveFileFormat

use of org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveFileFormat in project incubator-inlong by apache.

the class SinkInfoUtils method createHiveSinkInfo.

/**
 * Create Hive sink info.
 */
private static HiveSinkInfo createHiveSinkInfo(HiveSinkResponse hiveInfo, List<FieldInfo> sinkFields) {
    if (hiveInfo.getJdbcUrl() == null) {
        throw new RuntimeException(String.format("HiveSink={%s} server url cannot be empty", hiveInfo));
    }
    if (CollectionUtils.isEmpty(hiveInfo.getFieldList())) {
        throw new RuntimeException(String.format("HiveSink={%s} fields cannot be empty", hiveInfo));
    }
    // Use the field separator in Hive, the default is TextFile
    Character separator = (char) Integer.parseInt(hiveInfo.getDataSeparator());
    HiveFileFormat fileFormat;
    String format = hiveInfo.getFileFormat();
    if (Constant.FILE_FORMAT_ORC.equalsIgnoreCase(format)) {
        fileFormat = new HiveSinkInfo.OrcFileFormat(1000);
    } else if (Constant.FILE_FORMAT_SEQUENCE.equalsIgnoreCase(format)) {
        fileFormat = new HiveSinkInfo.SequenceFileFormat(separator, 100);
    } else if (Constant.FILE_FORMAT_PARQUET.equalsIgnoreCase(format)) {
        fileFormat = new HiveSinkInfo.ParquetFileFormat();
    } else {
        fileFormat = new HiveSinkInfo.TextFileFormat(separator);
    }
    // The primary partition field, in Sink must be HiveTimePartitionInfo
    List<HivePartitionInfo> partitionList = new ArrayList<>();
    String primary = hiveInfo.getPrimaryPartition();
    if (StringUtils.isNotEmpty(primary)) {
        // Hive partitions are by day, hour, and minute
        String unit = hiveInfo.getPartitionUnit();
        HiveTimePartitionInfo timePartitionInfo = new HiveTimePartitionInfo(primary, PARTITION_TIME_FORMAT_MAP.get(unit));
        partitionList.add(timePartitionInfo);
    }
    // TODO the type be set according to the type of the field itself.
    if (StringUtils.isNotEmpty(hiveInfo.getSecondaryPartition())) {
        partitionList.add(new HiveSinkInfo.HiveFieldPartitionInfo(hiveInfo.getSecondaryPartition()));
    }
    // dataPath = hdfsUrl + / + warehouseDir + / + dbName + .db/ + tableName
    StringBuilder dataPathBuilder = new StringBuilder();
    String hdfsUrl = hiveInfo.getHdfsDefaultFs();
    String warehouseDir = hiveInfo.getWarehouseDir();
    if (hdfsUrl.endsWith("/")) {
        dataPathBuilder.append(hdfsUrl, 0, hdfsUrl.length() - 1);
    } else {
        dataPathBuilder.append(hdfsUrl);
    }
    if (warehouseDir.endsWith("/")) {
        dataPathBuilder.append(warehouseDir, 0, warehouseDir.length() - 1);
    } else {
        dataPathBuilder.append(warehouseDir);
    }
    String dataPath = dataPathBuilder.append("/").append(hiveInfo.getDbName()).append(".db/").append(hiveInfo.getTableName()).toString();
    return new HiveSinkInfo(sinkFields.toArray(new FieldInfo[0]), hiveInfo.getJdbcUrl(), hiveInfo.getDbName(), hiveInfo.getTableName(), hiveInfo.getUsername(), hiveInfo.getPassword(), dataPath, partitionList.toArray(new HiveSinkInfo.HivePartitionInfo[0]), fileFormat);
}
Also used : HiveTimePartitionInfo(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveTimePartitionInfo) ArrayList(java.util.ArrayList) HivePartitionInfo(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HivePartitionInfo) HiveSinkInfo(org.apache.inlong.sort.protocol.sink.HiveSinkInfo) HiveFileFormat(org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveFileFormat) FieldInfo(org.apache.inlong.sort.protocol.FieldInfo)

Aggregations

HiveFileFormat (org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveFileFormat)2 ArrayList (java.util.ArrayList)1 LogicalType (org.apache.flink.table.types.logical.LogicalType)1 RowType (org.apache.flink.table.types.logical.RowType)1 TextRowWriter (org.apache.inlong.sort.flink.hive.formats.TextRowWriter)1 FieldInfo (org.apache.inlong.sort.protocol.FieldInfo)1 HiveSinkInfo (org.apache.inlong.sort.protocol.sink.HiveSinkInfo)1 HivePartitionInfo (org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HivePartitionInfo)1 HiveTimePartitionInfo (org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveTimePartitionInfo)1 OrcFileFormat (org.apache.inlong.sort.protocol.sink.HiveSinkInfo.OrcFileFormat)1 ParquetFileFormat (org.apache.inlong.sort.protocol.sink.HiveSinkInfo.ParquetFileFormat)1 TextFileFormat (org.apache.inlong.sort.protocol.sink.HiveSinkInfo.TextFileFormat)1