use of org.apache.iceberg.Table in project hive by apache.
the class TestHiveIcebergStatistics method testAnalyzeTableComputeStatisticsForColumns.
@Test
public void testAnalyzeTableComputeStatisticsForColumns() throws IOException, TException, InterruptedException {
String dbName = "default";
String tableName = "orders";
Table table = testTables.createTable(shell, tableName, ORDER_SCHEMA, fileFormat, ORDER_RECORDS);
shell.executeStatement("ANALYZE TABLE " + dbName + "." + tableName + " COMPUTE STATISTICS FOR COLUMNS");
validateBasicStats(table, dbName, tableName);
}
use of org.apache.iceberg.Table in project hive by apache.
the class TestCatalogs method testCreateDropTableToCatalog.
@Test
public void testCreateDropTableToCatalog() throws IOException {
TableIdentifier identifier = TableIdentifier.of("test", "table");
String defaultCatalogName = "default";
String warehouseLocation = temp.newFolder("hadoop", "warehouse").toString();
setCustomCatalogProperties(defaultCatalogName, warehouseLocation);
Properties missingSchema = new Properties();
missingSchema.put("name", identifier.toString());
missingSchema.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName);
AssertHelpers.assertThrows("Should complain about table schema not set", NullPointerException.class, "schema not set", () -> Catalogs.createTable(conf, missingSchema));
Properties missingIdentifier = new Properties();
missingIdentifier.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
missingIdentifier.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName);
AssertHelpers.assertThrows("Should complain about table identifier not set", NullPointerException.class, "identifier not set", () -> Catalogs.createTable(conf, missingIdentifier));
Properties properties = new Properties();
properties.put("name", identifier.toString());
properties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
properties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(SPEC));
properties.put("dummy", "test");
properties.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName);
Catalogs.createTable(conf, properties);
HadoopCatalog catalog = new CustomHadoopCatalog(conf, warehouseLocation);
Table table = catalog.loadTable(identifier);
Assert.assertEquals(SchemaParser.toJson(SCHEMA), SchemaParser.toJson(table.schema()));
Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec()));
Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties());
AssertHelpers.assertThrows("Should complain about table identifier not set", NullPointerException.class, "identifier not set", () -> Catalogs.dropTable(conf, new Properties()));
Properties dropProperties = new Properties();
dropProperties.put("name", identifier.toString());
dropProperties.put(InputFormatConfig.CATALOG_NAME, defaultCatalogName);
Catalogs.dropTable(conf, dropProperties);
AssertHelpers.assertThrows("Should complain about table not found", NoSuchTableException.class, "Table does not exist", () -> Catalogs.loadTable(conf, dropProperties));
}
use of org.apache.iceberg.Table in project hive by apache.
the class TestCatalogs method testLoadTableFromLocation.
@Test
public void testLoadTableFromLocation() throws IOException {
conf.set(InputFormatConfig.CATALOG, Catalogs.LOCATION);
AssertHelpers.assertThrows("Should complain about table location not set", IllegalArgumentException.class, "location not set", () -> Catalogs.loadTable(conf));
HadoopTables tables = new HadoopTables();
Table hadoopTable = tables.create(SCHEMA, temp.newFolder("hadoop_tables").toString());
conf.set(InputFormatConfig.TABLE_LOCATION, hadoopTable.location());
Assert.assertEquals(hadoopTable.location(), Catalogs.loadTable(conf).location());
}
use of org.apache.iceberg.Table in project hive by apache.
the class TestCatalogs method testLoadTableFromCatalog.
@Test
public void testLoadTableFromCatalog() throws IOException {
String defaultCatalogName = "default";
String warehouseLocation = temp.newFolder("hadoop", "warehouse").toString();
setCustomCatalogProperties(defaultCatalogName, warehouseLocation);
AssertHelpers.assertThrows("Should complain about table identifier not set", IllegalArgumentException.class, "identifier not set", () -> Catalogs.loadTable(conf));
HadoopCatalog catalog = new CustomHadoopCatalog(conf, warehouseLocation);
Table hadoopCatalogTable = catalog.createTable(TableIdentifier.of("table"), SCHEMA);
conf.set(InputFormatConfig.TABLE_IDENTIFIER, "table");
Assert.assertEquals(hadoopCatalogTable.location(), Catalogs.loadTable(conf).location());
}
use of org.apache.iceberg.Table in project hive by apache.
the class HiveTableUtil method importFiles.
/**
* Import files from given partitions to an Iceberg table.
* @param sourceLocation location of the HMS table
* @param format inputformat class name of the HMS table
* @param partitionSpecProxy list of HMS table partitions wrapped in partitionSpecProxy
* @param partitionKeys list of partition keys
* @param icebergTableProperties destination iceberg table properties
* @param conf a Hadoop configuration
*/
public static void importFiles(String sourceLocation, String format, PartitionSpecProxy partitionSpecProxy, List<FieldSchema> partitionKeys, Properties icebergTableProperties, Configuration conf) throws MetaException {
RemoteIterator<LocatedFileStatus> filesIterator = null;
// this operation must be done before the iceberg table is created
if (partitionSpecProxy.size() == 0) {
filesIterator = getFilesIterator(new Path(sourceLocation), conf);
}
Table icebergTable = Catalogs.createTable(conf, icebergTableProperties);
AppendFiles append = icebergTable.newAppend();
PartitionSpec spec = icebergTable.spec();
MetricsConfig metricsConfig = MetricsConfig.fromProperties(icebergTable.properties());
String nameMappingString = icebergTable.properties().get(TableProperties.DEFAULT_NAME_MAPPING);
NameMapping nameMapping = nameMappingString != null ? NameMappingParser.fromJson(nameMappingString) : null;
try {
if (partitionSpecProxy.size() == 0) {
List<DataFile> dataFiles = getDataFiles(filesIterator, Collections.emptyMap(), format, spec, metricsConfig, nameMapping, conf);
dataFiles.forEach(append::appendFile);
} else {
PartitionSpecProxy.PartitionIterator partitionIterator = partitionSpecProxy.getPartitionIterator();
List<Callable<Void>> tasks = new ArrayList<>();
while (partitionIterator.hasNext()) {
Partition partition = partitionIterator.next();
Callable<Void> task = () -> {
Path partitionPath = new Path(partition.getSd().getLocation());
String partitionName = Warehouse.makePartName(partitionKeys, partition.getValues());
Map<String, String> partitionSpec = Warehouse.makeSpecFromName(partitionName);
RemoteIterator<LocatedFileStatus> iterator = getFilesIterator(partitionPath, conf);
List<DataFile> dataFiles = getDataFiles(iterator, partitionSpec, format.toLowerCase(), spec, metricsConfig, nameMapping, conf);
synchronized (append) {
dataFiles.forEach(append::appendFile);
}
return null;
};
tasks.add(task);
}
int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_SERVER2_ICEBERG_METADATA_GENERATOR_THREADS);
ExecutorService executor = Executors.newFixedThreadPool(numThreads, new ThreadFactoryBuilder().setNameFormat("iceberg-metadata-generator-%d").setDaemon(true).build());
executor.invokeAll(tasks);
executor.shutdown();
}
append.commit();
} catch (IOException | InterruptedException e) {
throw new MetaException("Cannot import hive data into iceberg table.\n" + e.getMessage());
}
}
Aggregations