use of org.apache.iceberg.mr.Catalogs in project hive by apache.
the class HiveIcebergMetaHook method getCatalogProperties.
/**
* Calculates the properties we would like to send to the catalog.
* <ul>
* <li>The base of the properties is the properties stored at the Hive Metastore for the given table
* <li>We add the {@link Catalogs#LOCATION} as the table location
* <li>We add the {@link Catalogs#NAME} as TableIdentifier defined by the database name and table name
* <li>We add the serdeProperties of the HMS table
* <li>We remove some parameters that we don't want to push down to the Iceberg table props
* </ul>
* @param hmsTable Table for which we are calculating the properties
* @return The properties we can provide for Iceberg functions, like {@link Catalogs}
*/
private static Properties getCatalogProperties(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
Properties properties = new Properties();
hmsTable.getParameters().entrySet().stream().filter(e -> e.getKey() != null && e.getValue() != null).forEach(e -> {
// translate key names between HMS and Iceberg where needed
String icebergKey = HiveTableOperations.translateToIcebergProp(e.getKey());
properties.put(icebergKey, e.getValue());
});
if (properties.get(Catalogs.LOCATION) == null && hmsTable.getSd() != null && hmsTable.getSd().getLocation() != null) {
properties.put(Catalogs.LOCATION, hmsTable.getSd().getLocation());
}
if (properties.get(Catalogs.NAME) == null) {
properties.put(Catalogs.NAME, TableIdentifier.of(hmsTable.getDbName(), hmsTable.getTableName()).toString());
}
SerDeInfo serdeInfo = hmsTable.getSd().getSerdeInfo();
if (serdeInfo != null) {
serdeInfo.getParameters().entrySet().stream().filter(e -> e.getKey() != null && e.getValue() != null).forEach(e -> {
String icebergKey = HiveTableOperations.translateToIcebergProp(e.getKey());
properties.put(icebergKey, e.getValue());
});
}
// Remove HMS table parameters we don't want to propagate to Iceberg
PROPERTIES_TO_REMOVE.forEach(properties::remove);
return properties;
}
use of org.apache.iceberg.mr.Catalogs in project hive by apache.
the class IcebergTableUtil method getTable.
/**
* Load the iceberg table either from the {@link QueryState} or through the configured catalog. Look for the table
* object stored in the query state. If it's null, it means the table was not loaded yet within the same query
* therefore we claim it through the Catalogs API and then store it in query state.
* @param configuration a Hadoop configuration
* @param properties controlling properties
* @return an Iceberg table
*/
static Table getTable(Configuration configuration, Properties properties) {
String metaTable = properties.getProperty("metaTable");
String tableName = properties.getProperty(Catalogs.NAME);
if (metaTable != null) {
properties.setProperty(Catalogs.NAME, tableName + "." + metaTable);
}
String tableIdentifier = properties.getProperty(Catalogs.NAME);
return SessionStateUtil.getResource(configuration, tableIdentifier).filter(o -> o instanceof Table).map(o -> (Table) o).orElseGet(() -> {
LOG.debug("Iceberg table {} is not found in QueryState. Loading table from configured catalog", tableIdentifier);
Table tab = Catalogs.loadTable(configuration, properties);
SessionStateUtil.addResource(configuration, tableIdentifier, tab);
return tab;
});
}
Aggregations