use of org.apache.iceberg.mapping.NameMapping in project hive by apache.
the class HiveTableUtil method importFiles.
/**
* Import files from given partitions to an Iceberg table.
* @param sourceLocation location of the HMS table
* @param format inputformat class name of the HMS table
* @param partitionSpecProxy list of HMS table partitions wrapped in partitionSpecProxy
* @param partitionKeys list of partition keys
* @param icebergTableProperties destination iceberg table properties
* @param conf a Hadoop configuration
*/
public static void importFiles(String sourceLocation, String format, PartitionSpecProxy partitionSpecProxy, List<FieldSchema> partitionKeys, Properties icebergTableProperties, Configuration conf) throws MetaException {
RemoteIterator<LocatedFileStatus> filesIterator = null;
// this operation must be done before the iceberg table is created
if (partitionSpecProxy.size() == 0) {
filesIterator = getFilesIterator(new Path(sourceLocation), conf);
}
Table icebergTable = Catalogs.createTable(conf, icebergTableProperties);
AppendFiles append = icebergTable.newAppend();
PartitionSpec spec = icebergTable.spec();
MetricsConfig metricsConfig = MetricsConfig.fromProperties(icebergTable.properties());
String nameMappingString = icebergTable.properties().get(TableProperties.DEFAULT_NAME_MAPPING);
NameMapping nameMapping = nameMappingString != null ? NameMappingParser.fromJson(nameMappingString) : null;
try {
if (partitionSpecProxy.size() == 0) {
List<DataFile> dataFiles = getDataFiles(filesIterator, Collections.emptyMap(), format, spec, metricsConfig, nameMapping, conf);
dataFiles.forEach(append::appendFile);
} else {
PartitionSpecProxy.PartitionIterator partitionIterator = partitionSpecProxy.getPartitionIterator();
List<Callable<Void>> tasks = new ArrayList<>();
while (partitionIterator.hasNext()) {
Partition partition = partitionIterator.next();
Callable<Void> task = () -> {
Path partitionPath = new Path(partition.getSd().getLocation());
String partitionName = Warehouse.makePartName(partitionKeys, partition.getValues());
Map<String, String> partitionSpec = Warehouse.makeSpecFromName(partitionName);
RemoteIterator<LocatedFileStatus> iterator = getFilesIterator(partitionPath, conf);
List<DataFile> dataFiles = getDataFiles(iterator, partitionSpec, format.toLowerCase(), spec, metricsConfig, nameMapping, conf);
synchronized (append) {
dataFiles.forEach(append::appendFile);
}
return null;
};
tasks.add(task);
}
int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_SERVER2_ICEBERG_METADATA_GENERATOR_THREADS);
ExecutorService executor = Executors.newFixedThreadPool(numThreads, new ThreadFactoryBuilder().setNameFormat("iceberg-metadata-generator-%d").setDaemon(true).build());
executor.invokeAll(tasks);
executor.shutdown();
}
append.commit();
} catch (IOException | InterruptedException e) {
throw new MetaException("Cannot import hive data into iceberg table.\n" + e.getMessage());
}
}
use of org.apache.iceberg.mapping.NameMapping in project hive by apache.
the class HiveIcebergMetaHook method preAlterTable.
@Override
public void preAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context) throws MetaException {
setupAlterOperationType(hmsTable, context);
catalogProperties = getCatalogProperties(hmsTable);
try {
icebergTable = IcebergTableUtil.getTable(conf, catalogProperties);
} catch (NoSuchTableException nte) {
context.getProperties().put(MIGRATE_HIVE_TO_ICEBERG, "true");
// If the iceberg table does not exist, then this is an ALTER command aimed at migrating the table to iceberg
// First we must check whether it's eligible for migration to iceberg
// If so, we will create the iceberg table in commitAlterTable and go ahead with the migration
assertTableCanBeMigrated(hmsTable);
isTableMigration = true;
StorageDescriptor sd = hmsTable.getSd();
preAlterTableProperties = new PreAlterTableProperties();
preAlterTableProperties.tableLocation = sd.getLocation();
preAlterTableProperties.format = sd.getInputFormat();
preAlterTableProperties.schema = schema(catalogProperties, hmsTable);
preAlterTableProperties.partitionKeys = hmsTable.getPartitionKeys();
context.getProperties().put(HiveMetaHook.ALLOW_PARTITION_KEY_CHANGE, "true");
// If there are partition keys specified remove them from the HMS table and add them to the column list
if (hmsTable.isSetPartitionKeys() && !hmsTable.getPartitionKeys().isEmpty()) {
List<PartitionTransformSpec> spec = PartitionTransform.getPartitionTransformSpec(hmsTable.getPartitionKeys());
if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec)) {
throw new MetaException("Query state attached to Session state must be not null. " + "Partition transform metadata cannot be saved.");
}
hmsTable.getSd().getCols().addAll(hmsTable.getPartitionKeys());
hmsTable.setPartitionKeysIsSet(false);
}
preAlterTableProperties.spec = spec(conf, preAlterTableProperties.schema, hmsTable);
sd.setInputFormat(HiveIcebergInputFormat.class.getCanonicalName());
sd.setOutputFormat(HiveIcebergOutputFormat.class.getCanonicalName());
sd.setSerdeInfo(new SerDeInfo("icebergSerde", HiveIcebergSerDe.class.getCanonicalName(), Collections.emptyMap()));
setCommonHmsTablePropertiesForIceberg(hmsTable);
// set an additional table prop to designate that this table has been migrated to Iceberg, i.e.
// all or some of its data files have not been written out using the Iceberg writer, and therefore those data
// files do not contain Iceberg field IDs. This makes certain schema evolution operations problematic, so we
// want to disable these ops for now using this new table prop
hmsTable.getParameters().put(MIGRATED_TO_ICEBERG, "true");
NameMapping nameMapping = MappingUtil.create(preAlterTableProperties.schema);
hmsTable.getParameters().put(TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(nameMapping));
}
if (AlterTableType.ADDCOLS.equals(currentAlterTableOp)) {
handleAddColumns(hmsTable);
} else if (AlterTableType.REPLACE_COLUMNS.equals(currentAlterTableOp)) {
assertNotMigratedTable(hmsTable.getParameters(), currentAlterTableOp.getName().toUpperCase());
handleReplaceColumns(hmsTable);
} else if (AlterTableType.RENAME_COLUMN.equals(currentAlterTableOp)) {
// passing in the "CHANGE COLUMN" string instead, since RENAME COLUMN is not part of SQL syntax (not to mention
// that users can change data types or reorder columns too with this alter op type, so its name is misleading..)
assertNotMigratedTable(hmsTable.getParameters(), "CHANGE COLUMN");
handleChangeColumn(hmsTable);
}
}
Aggregations