use of org.apache.iceberg.MetricsConfig in project hive by apache.
the class HiveTableUtil method importFiles.
/**
* Import files from given partitions to an Iceberg table.
* @param sourceLocation location of the HMS table
* @param format inputformat class name of the HMS table
* @param partitionSpecProxy list of HMS table partitions wrapped in partitionSpecProxy
* @param partitionKeys list of partition keys
* @param icebergTableProperties destination iceberg table properties
* @param conf a Hadoop configuration
*/
public static void importFiles(String sourceLocation, String format, PartitionSpecProxy partitionSpecProxy, List<FieldSchema> partitionKeys, Properties icebergTableProperties, Configuration conf) throws MetaException {
RemoteIterator<LocatedFileStatus> filesIterator = null;
// this operation must be done before the iceberg table is created
if (partitionSpecProxy.size() == 0) {
filesIterator = getFilesIterator(new Path(sourceLocation), conf);
}
Table icebergTable = Catalogs.createTable(conf, icebergTableProperties);
AppendFiles append = icebergTable.newAppend();
PartitionSpec spec = icebergTable.spec();
MetricsConfig metricsConfig = MetricsConfig.fromProperties(icebergTable.properties());
String nameMappingString = icebergTable.properties().get(TableProperties.DEFAULT_NAME_MAPPING);
NameMapping nameMapping = nameMappingString != null ? NameMappingParser.fromJson(nameMappingString) : null;
try {
if (partitionSpecProxy.size() == 0) {
List<DataFile> dataFiles = getDataFiles(filesIterator, Collections.emptyMap(), format, spec, metricsConfig, nameMapping, conf);
dataFiles.forEach(append::appendFile);
} else {
PartitionSpecProxy.PartitionIterator partitionIterator = partitionSpecProxy.getPartitionIterator();
List<Callable<Void>> tasks = new ArrayList<>();
while (partitionIterator.hasNext()) {
Partition partition = partitionIterator.next();
Callable<Void> task = () -> {
Path partitionPath = new Path(partition.getSd().getLocation());
String partitionName = Warehouse.makePartName(partitionKeys, partition.getValues());
Map<String, String> partitionSpec = Warehouse.makeSpecFromName(partitionName);
RemoteIterator<LocatedFileStatus> iterator = getFilesIterator(partitionPath, conf);
List<DataFile> dataFiles = getDataFiles(iterator, partitionSpec, format.toLowerCase(), spec, metricsConfig, nameMapping, conf);
synchronized (append) {
dataFiles.forEach(append::appendFile);
}
return null;
};
tasks.add(task);
}
int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_SERVER2_ICEBERG_METADATA_GENERATOR_THREADS);
ExecutorService executor = Executors.newFixedThreadPool(numThreads, new ThreadFactoryBuilder().setNameFormat("iceberg-metadata-generator-%d").setDaemon(true).build());
executor.invokeAll(tasks);
executor.shutdown();
}
append.commit();
} catch (IOException | InterruptedException e) {
throw new MetaException("Cannot import hive data into iceberg table.\n" + e.getMessage());
}
}
Aggregations