use of org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveFileFormat in project incubator-inlong by apache.
the class HiveSinkHelper method createBulkWriterFactory.
public static BulkWriter.Factory<Row> createBulkWriterFactory(HiveSinkInfo hiveSinkInfo, Configuration config) {
String[] fieldNames = getFieldNames(hiveSinkInfo).toArray(new String[0]);
LogicalType[] fieldTypes = getFieldLogicalTypes(hiveSinkInfo).toArray(new LogicalType[0]);
RowType rowType = RowType.of(fieldTypes, fieldNames);
HiveFileFormat hiveFileFormat = hiveSinkInfo.getHiveFileFormat();
if (hiveFileFormat instanceof ParquetFileFormat) {
return ParquetRowWriterBuilder.createWriterFactory(rowType, (ParquetFileFormat) hiveFileFormat);
} else if (hiveFileFormat instanceof TextFileFormat) {
return new TextRowWriter.Factory((TextFileFormat) hiveFileFormat, fieldTypes, config);
} else if (hiveFileFormat instanceof OrcFileFormat) {
return OrcBulkWriterFactory.createWriterFactory(rowType, fieldTypes, config);
} else {
throw new IllegalArgumentException("Unsupported hive file format " + hiveFileFormat.getClass().getName());
}
}
use of org.apache.inlong.sort.protocol.sink.HiveSinkInfo.HiveFileFormat in project incubator-inlong by apache.
the class SinkInfoUtils method createHiveSinkInfo.
/**
* Create Hive sink info.
*/
private static HiveSinkInfo createHiveSinkInfo(HiveSinkResponse hiveInfo, List<FieldInfo> sinkFields) {
if (hiveInfo.getJdbcUrl() == null) {
throw new RuntimeException(String.format("HiveSink={%s} server url cannot be empty", hiveInfo));
}
if (CollectionUtils.isEmpty(hiveInfo.getFieldList())) {
throw new RuntimeException(String.format("HiveSink={%s} fields cannot be empty", hiveInfo));
}
// Use the field separator in Hive, the default is TextFile
Character separator = (char) Integer.parseInt(hiveInfo.getDataSeparator());
HiveFileFormat fileFormat;
String format = hiveInfo.getFileFormat();
if (Constant.FILE_FORMAT_ORC.equalsIgnoreCase(format)) {
fileFormat = new HiveSinkInfo.OrcFileFormat(1000);
} else if (Constant.FILE_FORMAT_SEQUENCE.equalsIgnoreCase(format)) {
fileFormat = new HiveSinkInfo.SequenceFileFormat(separator, 100);
} else if (Constant.FILE_FORMAT_PARQUET.equalsIgnoreCase(format)) {
fileFormat = new HiveSinkInfo.ParquetFileFormat();
} else {
fileFormat = new HiveSinkInfo.TextFileFormat(separator);
}
// The primary partition field, in Sink must be HiveTimePartitionInfo
List<HivePartitionInfo> partitionList = new ArrayList<>();
String primary = hiveInfo.getPrimaryPartition();
if (StringUtils.isNotEmpty(primary)) {
// Hive partitions are by day, hour, and minute
String unit = hiveInfo.getPartitionUnit();
HiveTimePartitionInfo timePartitionInfo = new HiveTimePartitionInfo(primary, PARTITION_TIME_FORMAT_MAP.get(unit));
partitionList.add(timePartitionInfo);
}
// TODO the type be set according to the type of the field itself.
if (StringUtils.isNotEmpty(hiveInfo.getSecondaryPartition())) {
partitionList.add(new HiveSinkInfo.HiveFieldPartitionInfo(hiveInfo.getSecondaryPartition()));
}
// dataPath = hdfsUrl + / + warehouseDir + / + dbName + .db/ + tableName
StringBuilder dataPathBuilder = new StringBuilder();
String hdfsUrl = hiveInfo.getHdfsDefaultFs();
String warehouseDir = hiveInfo.getWarehouseDir();
if (hdfsUrl.endsWith("/")) {
dataPathBuilder.append(hdfsUrl, 0, hdfsUrl.length() - 1);
} else {
dataPathBuilder.append(hdfsUrl);
}
if (warehouseDir.endsWith("/")) {
dataPathBuilder.append(warehouseDir, 0, warehouseDir.length() - 1);
} else {
dataPathBuilder.append(warehouseDir);
}
String dataPath = dataPathBuilder.append("/").append(hiveInfo.getDbName()).append(".db/").append(hiveInfo.getTableName()).toString();
return new HiveSinkInfo(sinkFields.toArray(new FieldInfo[0]), hiveInfo.getJdbcUrl(), hiveInfo.getDbName(), hiveInfo.getTableName(), hiveInfo.getUsername(), hiveInfo.getPassword(), dataPath, partitionList.toArray(new HiveSinkInfo.HivePartitionInfo[0]), fileFormat);
}
Aggregations