use of org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy in project hive by apache.
the class HiveTableUtil method importFiles.
/**
* Import files from given partitions to an Iceberg table.
* @param sourceLocation location of the HMS table
* @param format inputformat class name of the HMS table
* @param partitionSpecProxy list of HMS table partitions wrapped in partitionSpecProxy
* @param partitionKeys list of partition keys
* @param icebergTableProperties destination iceberg table properties
* @param conf a Hadoop configuration
*/
public static void importFiles(String sourceLocation, String format, PartitionSpecProxy partitionSpecProxy, List<FieldSchema> partitionKeys, Properties icebergTableProperties, Configuration conf) throws MetaException {
RemoteIterator<LocatedFileStatus> filesIterator = null;
// this operation must be done before the iceberg table is created
if (partitionSpecProxy.size() == 0) {
filesIterator = getFilesIterator(new Path(sourceLocation), conf);
}
Table icebergTable = Catalogs.createTable(conf, icebergTableProperties);
AppendFiles append = icebergTable.newAppend();
PartitionSpec spec = icebergTable.spec();
MetricsConfig metricsConfig = MetricsConfig.fromProperties(icebergTable.properties());
String nameMappingString = icebergTable.properties().get(TableProperties.DEFAULT_NAME_MAPPING);
NameMapping nameMapping = nameMappingString != null ? NameMappingParser.fromJson(nameMappingString) : null;
try {
if (partitionSpecProxy.size() == 0) {
List<DataFile> dataFiles = getDataFiles(filesIterator, Collections.emptyMap(), format, spec, metricsConfig, nameMapping, conf);
dataFiles.forEach(append::appendFile);
} else {
PartitionSpecProxy.PartitionIterator partitionIterator = partitionSpecProxy.getPartitionIterator();
List<Callable<Void>> tasks = new ArrayList<>();
while (partitionIterator.hasNext()) {
Partition partition = partitionIterator.next();
Callable<Void> task = () -> {
Path partitionPath = new Path(partition.getSd().getLocation());
String partitionName = Warehouse.makePartName(partitionKeys, partition.getValues());
Map<String, String> partitionSpec = Warehouse.makeSpecFromName(partitionName);
RemoteIterator<LocatedFileStatus> iterator = getFilesIterator(partitionPath, conf);
List<DataFile> dataFiles = getDataFiles(iterator, partitionSpec, format.toLowerCase(), spec, metricsConfig, nameMapping, conf);
synchronized (append) {
dataFiles.forEach(append::appendFile);
}
return null;
};
tasks.add(task);
}
int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_SERVER2_ICEBERG_METADATA_GENERATOR_THREADS);
ExecutorService executor = Executors.newFixedThreadPool(numThreads, new ThreadFactoryBuilder().setNameFormat("iceberg-metadata-generator-%d").setDaemon(true).build());
executor.invokeAll(tasks);
executor.shutdown();
}
append.commit();
} catch (IOException | InterruptedException e) {
throw new MetaException("Cannot import hive data into iceberg table.\n" + e.getMessage());
}
}
use of org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy in project hive by apache.
the class TestHiveMetaStorePartitionSpecs method testFetchingPartitionsWithDifferentSchemas.
/**
* Test to confirm that Partition-grouping behaves correctly when Table-schemas evolve.
* Partitions must be grouped by location and schema.
*/
@Test
public void testFetchingPartitionsWithDifferentSchemas() {
try {
// Create source table.
HiveMetaStoreClient hmsc = new HiveMetaStoreClient(conf);
clearAndRecreateDB(hmsc);
createTable(hmsc, true);
Table table = hmsc.getTable(dbName, tableName);
populatePartitions(hmsc, table, // Blurb list.
Arrays.asList("isLocatedInTablePath", "isLocatedOutsideTablePath"));
// Modify table schema. Add columns.
List<FieldSchema> fields = table.getSd().getCols();
fields.add(new FieldSchema("goo", "string", "Entirely new column. Doesn't apply to older partitions."));
table.getSd().setCols(fields);
hmsc.alter_table(dbName, tableName, table);
// Check that the change stuck.
table = hmsc.getTable(dbName, tableName);
Assert.assertEquals("Unexpected number of table columns.", 3, table.getSd().getColsSize());
// Add partitions with new schema.
// Mark Partitions with new schema with different blurb.
populatePartitions(hmsc, table, Arrays.asList("hasNewColumn"));
// Retrieve *all* partitions from the table.
PartitionSpecProxy partitionSpecProxy = hmsc.listPartitionSpecs(dbName, tableName, -1);
Assert.assertEquals("Unexpected number of partitions.", nDates * 3, partitionSpecProxy.size());
// Confirm grouping.
Assert.assertTrue("Unexpected type of PartitionSpecProxy.", partitionSpecProxy instanceof CompositePartitionSpecProxy);
CompositePartitionSpecProxy compositePartitionSpecProxy = (CompositePartitionSpecProxy) partitionSpecProxy;
List<PartitionSpec> partitionSpecs = compositePartitionSpecProxy.toPartitionSpec();
Assert.assertTrue("PartitionSpec[0] should have been a SharedSDPartitionSpec.", partitionSpecs.get(0).isSetSharedSDPartitionSpec());
Assert.assertEquals("PartitionSpec[0] should use the table-path as the common root location. ", table.getSd().getLocation(), partitionSpecs.get(0).getRootPath());
Assert.assertTrue("PartitionSpec[1] should have been a SharedSDPartitionSpec.", partitionSpecs.get(1).isSetSharedSDPartitionSpec());
Assert.assertEquals("PartitionSpec[1] should use the table-path as the common root location. ", table.getSd().getLocation(), partitionSpecs.get(1).getRootPath());
Assert.assertTrue("PartitionSpec[2] should have been a ListComposingPartitionSpec.", partitionSpecs.get(2).isSetPartitionList());
// Categorize the partitions returned, and confirm that all partitions are accounted for.
PartitionSpecProxy.PartitionIterator iterator = partitionSpecProxy.getPartitionIterator();
Map<String, List<Partition>> blurbToPartitionList = new HashMap<>(3);
while (iterator.hasNext()) {
Partition partition = iterator.next();
String blurb = partition.getValues().get(1);
if (!blurbToPartitionList.containsKey(blurb)) {
blurbToPartitionList.put(blurb, new ArrayList<>(nDates));
}
blurbToPartitionList.get(blurb).add(partition);
}
// and must have locations outside the table directory.
for (Partition partition : blurbToPartitionList.get("isLocatedOutsideTablePath")) {
Assert.assertEquals("Unexpected number of columns.", 2, partition.getSd().getCols().size());
Assert.assertEquals("Unexpected first column.", "foo", partition.getSd().getCols().get(0).getName());
Assert.assertEquals("Unexpected second column.", "bar", partition.getSd().getCols().get(1).getName());
String partitionLocation = partition.getSd().getLocation();
String tableLocation = table.getSd().getLocation();
Assert.assertTrue("Unexpected partition location: " + partitionLocation + ". " + "Partition should have been outside table location: " + tableLocation, !partitionLocation.startsWith(tableLocation));
}
// and must have locations within the table directory.
for (Partition partition : blurbToPartitionList.get("isLocatedInTablePath")) {
Assert.assertEquals("Unexpected number of columns.", 2, partition.getSd().getCols().size());
Assert.assertEquals("Unexpected first column.", "foo", partition.getSd().getCols().get(0).getName());
Assert.assertEquals("Unexpected second column.", "bar", partition.getSd().getCols().get(1).getName());
String partitionLocation = partition.getSd().getLocation();
String tableLocation = table.getSd().getLocation();
Assert.assertTrue("Unexpected partition location: " + partitionLocation + ". " + "Partition should have been within table location: " + tableLocation, partitionLocation.startsWith(tableLocation));
}
// and must have 3 columns. Also, the partition locations must lie within the table directory.
for (Partition partition : blurbToPartitionList.get("hasNewColumn")) {
Assert.assertEquals("Unexpected number of columns.", 3, partition.getSd().getCols().size());
Assert.assertEquals("Unexpected first column.", "foo", partition.getSd().getCols().get(0).getName());
Assert.assertEquals("Unexpected second column.", "bar", partition.getSd().getCols().get(1).getName());
Assert.assertEquals("Unexpected third column.", "goo", partition.getSd().getCols().get(2).getName());
String partitionLocation = partition.getSd().getLocation();
String tableLocation = table.getSd().getLocation();
Assert.assertTrue("Unexpected partition location: " + partitionLocation + ". " + "Partition should have been within table location: " + tableLocation, partitionLocation.startsWith(tableLocation));
}
} catch (Throwable t) {
LOG.error("Unexpected Exception!", t);
t.printStackTrace();
Assert.assertTrue("Unexpected Exception!", false);
}
}
use of org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy in project hive by apache.
the class TestHiveMetaStorePartitionSpecs method testAddPartitions.
/**
* Test to confirm that partitions can be added using PartitionSpecs.
*/
@Test
public void testAddPartitions() {
try {
// Create source table.
HiveMetaStoreClient hmsc = new HiveMetaStoreClient(conf);
clearAndRecreateDB(hmsc);
createTable(hmsc, true);
Table table = hmsc.getTable(dbName, tableName);
Assert.assertTrue(table.isSetId());
table.unsetId();
populatePartitions(hmsc, table, Arrays.asList("isLocatedInTablePath", "isLocatedOutsideTablePath"));
// Clone the table,
String targetTableName = "cloned_" + tableName;
Table targetTable = new Table(table);
targetTable.setTableName(targetTableName);
StorageDescriptor targetTableSd = new StorageDescriptor(targetTable.getSd());
targetTableSd.setLocation(targetTableSd.getLocation().replace(tableName, targetTableName));
hmsc.createTable(targetTable);
// Get partition-list from source.
PartitionSpecProxy partitionsForAddition = hmsc.listPartitionSpecsByFilter(dbName, tableName, "blurb = \"isLocatedInTablePath\"", -1);
partitionsForAddition.setTableName(targetTableName);
partitionsForAddition.setRootLocation(targetTableSd.getLocation());
Assert.assertEquals("Unexpected number of partitions added. ", partitionsForAddition.size(), hmsc.add_partitions_pspec(partitionsForAddition));
// Check that the added partitions are as expected.
PartitionSpecProxy clonedPartitions = hmsc.listPartitionSpecs(dbName, targetTableName, -1);
Assert.assertEquals("Unexpected number of partitions returned. ", partitionsForAddition.size(), clonedPartitions.size());
PartitionSpecProxy.PartitionIterator sourceIterator = partitionsForAddition.getPartitionIterator(), targetIterator = clonedPartitions.getPartitionIterator();
while (targetIterator.hasNext()) {
Partition sourcePartition = sourceIterator.next(), targetPartition = targetIterator.next();
Assert.assertEquals("Mismatched values.", sourcePartition.getValues(), targetPartition.getValues());
Assert.assertEquals("Mismatched locations.", sourcePartition.getSd().getLocation(), targetPartition.getSd().getLocation());
}
} catch (Throwable t) {
LOG.error("Unexpected Exception!", t);
t.printStackTrace();
Assert.assertTrue("Unexpected Exception!", false);
}
}
use of org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy in project hive by apache.
the class NonCatCallsWithCatalog method listPartitions.
@Test
public void listPartitions() throws TException {
String dbName = "list_partition_database_in_other_catalog";
Database db = new DatabaseBuilder().setName(dbName).build(conf);
db.unsetCatalogName();
client.createDatabase(db);
String tableName = "table_in_other_catalog";
Table table = new TableBuilder().inDb(db).setTableName(tableName).addCol("id", "int").addCol("name", "string").addPartCol("partcol", "string").build(conf);
table.unsetCatName();
client.createTable(table);
Partition[] parts = new Partition[5];
for (int i = 0; i < parts.length; i++) {
parts[i] = new PartitionBuilder().inTable(table).addValue("a" + i).build(conf);
parts[i].unsetCatName();
}
client.add_partitions(Arrays.asList(parts));
List<Partition> fetched = client.listPartitions(dbName, tableName, (short) -1);
Assert.assertEquals(parts.length, fetched.size());
Assert.assertEquals(expectedCatalog(), fetched.get(0).getCatName());
fetched = client.listPartitions(dbName, tableName, Collections.singletonList("a0"), (short) -1);
Assert.assertEquals(1, fetched.size());
Assert.assertEquals(expectedCatalog(), fetched.get(0).getCatName());
PartitionSpecProxy proxy = client.listPartitionSpecs(dbName, tableName, -1);
Assert.assertEquals(parts.length, proxy.size());
Assert.assertEquals(expectedCatalog(), proxy.getCatName());
fetched = client.listPartitionsByFilter(dbName, tableName, "partcol=\"a0\"", (short) -1);
Assert.assertEquals(1, fetched.size());
Assert.assertEquals(expectedCatalog(), fetched.get(0).getCatName());
proxy = client.listPartitionSpecsByFilter(dbName, tableName, "partcol=\"a0\"", -1);
Assert.assertEquals(1, proxy.size());
Assert.assertEquals(expectedCatalog(), proxy.getCatName());
Assert.assertEquals(1, client.getNumPartitionsByFilter(dbName, tableName, "partcol=\"a0\""));
List<String> names = client.listPartitionNames(dbName, tableName, (short) 57);
Assert.assertEquals(parts.length, names.size());
names = client.listPartitionNames(dbName, tableName, Collections.singletonList("a0"), Short.MAX_VALUE);
Assert.assertEquals(1, names.size());
PartitionValuesRequest rqst = new PartitionValuesRequest(dbName, tableName, Lists.newArrayList(new FieldSchema("partcol", "string", "")));
PartitionValuesResponse rsp = client.listPartitionValues(rqst);
Assert.assertEquals(5, rsp.getPartitionValuesSize());
}
use of org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy in project hive by apache.
the class TestListPartitions method otherCatalog.
@Test
@ConditionalIgnoreOnSessionHiveMetastoreClient
public void otherCatalog() throws TException {
String catName = "list_partition_catalog";
Catalog cat = new CatalogBuilder().setName(catName).setLocation(MetaStoreTestUtils.getTestWarehouseDir(catName)).build();
client.createCatalog(cat);
String dbName = "list_partition_database_in_other_catalog";
Database db = new DatabaseBuilder().setName(dbName).setCatalogName(catName).create(client, metaStore.getConf());
String tableName = "table_in_other_catalog";
Table table = new TableBuilder().inDb(db).setTableName(tableName).addCol("id", "int").addCol("name", "string").addPartCol("partcol", "string").create(client, metaStore.getConf());
Partition[] parts = new Partition[5];
for (int i = 0; i < parts.length; i++) {
parts[i] = new PartitionBuilder().inTable(table).addValue("a" + i).build(metaStore.getConf());
}
client.add_partitions(Arrays.asList(parts));
List<Partition> fetched = client.listPartitions(catName, dbName, tableName, -1);
Assert.assertEquals(parts.length, fetched.size());
Assert.assertEquals(catName, fetched.get(0).getCatName());
fetched = client.listPartitions(catName, dbName, tableName, Collections.singletonList("a0"), -1);
Assert.assertEquals(1, fetched.size());
Assert.assertEquals(catName, fetched.get(0).getCatName());
PartitionSpecProxy proxy = client.listPartitionSpecs(catName, dbName, tableName, -1);
Assert.assertEquals(parts.length, proxy.size());
Assert.assertEquals(catName, proxy.getCatName());
fetched = client.listPartitionsByFilter(catName, dbName, tableName, "partcol=\"a0\"", -1);
Assert.assertEquals(1, fetched.size());
Assert.assertEquals(catName, fetched.get(0).getCatName());
proxy = client.listPartitionSpecsByFilter(catName, dbName, tableName, "partcol=\"a0\"", -1);
Assert.assertEquals(1, proxy.size());
Assert.assertEquals(catName, proxy.getCatName());
Assert.assertEquals(1, client.getNumPartitionsByFilter(catName, dbName, tableName, "partcol=\"a0\""));
List<String> names = client.listPartitionNames(catName, dbName, tableName, 57);
Assert.assertEquals(parts.length, names.size());
names = client.listPartitionNames(catName, dbName, tableName, Collections.singletonList("a0"), Short.MAX_VALUE + 1);
Assert.assertEquals(1, names.size());
PartitionValuesRequest rqst = new PartitionValuesRequest(dbName, tableName, Lists.newArrayList(new FieldSchema("partcol", "string", "")));
rqst.setCatName(catName);
PartitionValuesResponse rsp = client.listPartitionValues(rqst);
Assert.assertEquals(5, rsp.getPartitionValuesSize());
}
Aggregations