Search in sources :

Example 1 with PartitionSpecProxy

use of org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy in project hive by apache.

the class HiveTableUtil method importFiles.

/**
 * Import files from given partitions to an Iceberg table.
 * @param sourceLocation location of the HMS table
 * @param format inputformat class name of the HMS table
 * @param partitionSpecProxy  list of HMS table partitions wrapped in partitionSpecProxy
 * @param partitionKeys list of partition keys
 * @param icebergTableProperties destination iceberg table properties
 * @param conf a Hadoop configuration
 */
public static void importFiles(String sourceLocation, String format, PartitionSpecProxy partitionSpecProxy, List<FieldSchema> partitionKeys, Properties icebergTableProperties, Configuration conf) throws MetaException {
    RemoteIterator<LocatedFileStatus> filesIterator = null;
    // this operation must be done before the iceberg table is created
    if (partitionSpecProxy.size() == 0) {
        filesIterator = getFilesIterator(new Path(sourceLocation), conf);
    }
    Table icebergTable = Catalogs.createTable(conf, icebergTableProperties);
    AppendFiles append = icebergTable.newAppend();
    PartitionSpec spec = icebergTable.spec();
    MetricsConfig metricsConfig = MetricsConfig.fromProperties(icebergTable.properties());
    String nameMappingString = icebergTable.properties().get(TableProperties.DEFAULT_NAME_MAPPING);
    NameMapping nameMapping = nameMappingString != null ? NameMappingParser.fromJson(nameMappingString) : null;
    try {
        if (partitionSpecProxy.size() == 0) {
            List<DataFile> dataFiles = getDataFiles(filesIterator, Collections.emptyMap(), format, spec, metricsConfig, nameMapping, conf);
            dataFiles.forEach(append::appendFile);
        } else {
            PartitionSpecProxy.PartitionIterator partitionIterator = partitionSpecProxy.getPartitionIterator();
            List<Callable<Void>> tasks = new ArrayList<>();
            while (partitionIterator.hasNext()) {
                Partition partition = partitionIterator.next();
                Callable<Void> task = () -> {
                    Path partitionPath = new Path(partition.getSd().getLocation());
                    String partitionName = Warehouse.makePartName(partitionKeys, partition.getValues());
                    Map<String, String> partitionSpec = Warehouse.makeSpecFromName(partitionName);
                    RemoteIterator<LocatedFileStatus> iterator = getFilesIterator(partitionPath, conf);
                    List<DataFile> dataFiles = getDataFiles(iterator, partitionSpec, format.toLowerCase(), spec, metricsConfig, nameMapping, conf);
                    synchronized (append) {
                        dataFiles.forEach(append::appendFile);
                    }
                    return null;
                };
                tasks.add(task);
            }
            int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_SERVER2_ICEBERG_METADATA_GENERATOR_THREADS);
            ExecutorService executor = Executors.newFixedThreadPool(numThreads, new ThreadFactoryBuilder().setNameFormat("iceberg-metadata-generator-%d").setDaemon(true).build());
            executor.invokeAll(tasks);
            executor.shutdown();
        }
        append.commit();
    } catch (IOException | InterruptedException e) {
        throw new MetaException("Cannot import hive data into iceberg table.\n" + e.getMessage());
    }
}
Also used : NameMapping(org.apache.iceberg.mapping.NameMapping) AppendFiles(org.apache.iceberg.AppendFiles) ArrayList(java.util.ArrayList) MetricsConfig(org.apache.iceberg.MetricsConfig) Callable(java.util.concurrent.Callable) DataFile(org.apache.iceberg.DataFile) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) ThreadFactoryBuilder(org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder) ArrayList(java.util.ArrayList) List(java.util.List) PartitionSpecProxy(org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.iceberg.Table) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) PartitionSpec(org.apache.iceberg.PartitionSpec) ExecutorService(java.util.concurrent.ExecutorService) Map(java.util.Map)

Example 2 with PartitionSpecProxy

use of org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy in project hive by apache.

the class TestHiveMetaStorePartitionSpecs method testFetchingPartitionsWithDifferentSchemas.

/**
 * Test to confirm that Partition-grouping behaves correctly when Table-schemas evolve.
 * Partitions must be grouped by location and schema.
 */
@Test
public void testFetchingPartitionsWithDifferentSchemas() {
    try {
        // Create source table.
        HiveMetaStoreClient hmsc = new HiveMetaStoreClient(conf);
        clearAndRecreateDB(hmsc);
        createTable(hmsc, true);
        Table table = hmsc.getTable(dbName, tableName);
        populatePartitions(hmsc, table, // Blurb list.
        Arrays.asList("isLocatedInTablePath", "isLocatedOutsideTablePath"));
        // Modify table schema. Add columns.
        List<FieldSchema> fields = table.getSd().getCols();
        fields.add(new FieldSchema("goo", "string", "Entirely new column. Doesn't apply to older partitions."));
        table.getSd().setCols(fields);
        hmsc.alter_table(dbName, tableName, table);
        // Check that the change stuck.
        table = hmsc.getTable(dbName, tableName);
        Assert.assertEquals("Unexpected number of table columns.", 3, table.getSd().getColsSize());
        // Add partitions with new schema.
        // Mark Partitions with new schema with different blurb.
        populatePartitions(hmsc, table, Arrays.asList("hasNewColumn"));
        // Retrieve *all* partitions from the table.
        PartitionSpecProxy partitionSpecProxy = hmsc.listPartitionSpecs(dbName, tableName, -1);
        Assert.assertEquals("Unexpected number of partitions.", nDates * 3, partitionSpecProxy.size());
        // Confirm grouping.
        Assert.assertTrue("Unexpected type of PartitionSpecProxy.", partitionSpecProxy instanceof CompositePartitionSpecProxy);
        CompositePartitionSpecProxy compositePartitionSpecProxy = (CompositePartitionSpecProxy) partitionSpecProxy;
        List<PartitionSpec> partitionSpecs = compositePartitionSpecProxy.toPartitionSpec();
        Assert.assertTrue("PartitionSpec[0] should have been a SharedSDPartitionSpec.", partitionSpecs.get(0).isSetSharedSDPartitionSpec());
        Assert.assertEquals("PartitionSpec[0] should use the table-path as the common root location. ", table.getSd().getLocation(), partitionSpecs.get(0).getRootPath());
        Assert.assertTrue("PartitionSpec[1] should have been a SharedSDPartitionSpec.", partitionSpecs.get(1).isSetSharedSDPartitionSpec());
        Assert.assertEquals("PartitionSpec[1] should use the table-path as the common root location. ", table.getSd().getLocation(), partitionSpecs.get(1).getRootPath());
        Assert.assertTrue("PartitionSpec[2] should have been a ListComposingPartitionSpec.", partitionSpecs.get(2).isSetPartitionList());
        // Categorize the partitions returned, and confirm that all partitions are accounted for.
        PartitionSpecProxy.PartitionIterator iterator = partitionSpecProxy.getPartitionIterator();
        Map<String, List<Partition>> blurbToPartitionList = new HashMap<>(3);
        while (iterator.hasNext()) {
            Partition partition = iterator.next();
            String blurb = partition.getValues().get(1);
            if (!blurbToPartitionList.containsKey(blurb)) {
                blurbToPartitionList.put(blurb, new ArrayList<>(nDates));
            }
            blurbToPartitionList.get(blurb).add(partition);
        }
        // and must have locations outside the table directory.
        for (Partition partition : blurbToPartitionList.get("isLocatedOutsideTablePath")) {
            Assert.assertEquals("Unexpected number of columns.", 2, partition.getSd().getCols().size());
            Assert.assertEquals("Unexpected first column.", "foo", partition.getSd().getCols().get(0).getName());
            Assert.assertEquals("Unexpected second column.", "bar", partition.getSd().getCols().get(1).getName());
            String partitionLocation = partition.getSd().getLocation();
            String tableLocation = table.getSd().getLocation();
            Assert.assertTrue("Unexpected partition location: " + partitionLocation + ". " + "Partition should have been outside table location: " + tableLocation, !partitionLocation.startsWith(tableLocation));
        }
        // and must have locations within the table directory.
        for (Partition partition : blurbToPartitionList.get("isLocatedInTablePath")) {
            Assert.assertEquals("Unexpected number of columns.", 2, partition.getSd().getCols().size());
            Assert.assertEquals("Unexpected first column.", "foo", partition.getSd().getCols().get(0).getName());
            Assert.assertEquals("Unexpected second column.", "bar", partition.getSd().getCols().get(1).getName());
            String partitionLocation = partition.getSd().getLocation();
            String tableLocation = table.getSd().getLocation();
            Assert.assertTrue("Unexpected partition location: " + partitionLocation + ". " + "Partition should have been within table location: " + tableLocation, partitionLocation.startsWith(tableLocation));
        }
        // and must have 3 columns. Also, the partition locations must lie within the table directory.
        for (Partition partition : blurbToPartitionList.get("hasNewColumn")) {
            Assert.assertEquals("Unexpected number of columns.", 3, partition.getSd().getCols().size());
            Assert.assertEquals("Unexpected first column.", "foo", partition.getSd().getCols().get(0).getName());
            Assert.assertEquals("Unexpected second column.", "bar", partition.getSd().getCols().get(1).getName());
            Assert.assertEquals("Unexpected third column.", "goo", partition.getSd().getCols().get(2).getName());
            String partitionLocation = partition.getSd().getLocation();
            String tableLocation = table.getSd().getLocation();
            Assert.assertTrue("Unexpected partition location: " + partitionLocation + ". " + "Partition should have been within table location: " + tableLocation, partitionLocation.startsWith(tableLocation));
        }
    } catch (Throwable t) {
        LOG.error("Unexpected Exception!", t);
        t.printStackTrace();
        Assert.assertTrue("Unexpected Exception!", false);
    }
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PartitionSpec(org.apache.hadoop.hive.metastore.api.PartitionSpec) CompositePartitionSpecProxy(org.apache.hadoop.hive.metastore.partition.spec.CompositePartitionSpecProxy) ArrayList(java.util.ArrayList) List(java.util.List) PartitionSpecProxy(org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy) CompositePartitionSpecProxy(org.apache.hadoop.hive.metastore.partition.spec.CompositePartitionSpecProxy) Test(org.junit.Test) MetastoreCheckinTest(org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)

Example 3 with PartitionSpecProxy

use of org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy in project hive by apache.

the class TestHiveMetaStorePartitionSpecs method testAddPartitions.

/**
 * Test to confirm that partitions can be added using PartitionSpecs.
 */
@Test
public void testAddPartitions() {
    try {
        // Create source table.
        HiveMetaStoreClient hmsc = new HiveMetaStoreClient(conf);
        clearAndRecreateDB(hmsc);
        createTable(hmsc, true);
        Table table = hmsc.getTable(dbName, tableName);
        Assert.assertTrue(table.isSetId());
        table.unsetId();
        populatePartitions(hmsc, table, Arrays.asList("isLocatedInTablePath", "isLocatedOutsideTablePath"));
        // Clone the table,
        String targetTableName = "cloned_" + tableName;
        Table targetTable = new Table(table);
        targetTable.setTableName(targetTableName);
        StorageDescriptor targetTableSd = new StorageDescriptor(targetTable.getSd());
        targetTableSd.setLocation(targetTableSd.getLocation().replace(tableName, targetTableName));
        hmsc.createTable(targetTable);
        // Get partition-list from source.
        PartitionSpecProxy partitionsForAddition = hmsc.listPartitionSpecsByFilter(dbName, tableName, "blurb = \"isLocatedInTablePath\"", -1);
        partitionsForAddition.setTableName(targetTableName);
        partitionsForAddition.setRootLocation(targetTableSd.getLocation());
        Assert.assertEquals("Unexpected number of partitions added. ", partitionsForAddition.size(), hmsc.add_partitions_pspec(partitionsForAddition));
        // Check that the added partitions are as expected.
        PartitionSpecProxy clonedPartitions = hmsc.listPartitionSpecs(dbName, targetTableName, -1);
        Assert.assertEquals("Unexpected number of partitions returned. ", partitionsForAddition.size(), clonedPartitions.size());
        PartitionSpecProxy.PartitionIterator sourceIterator = partitionsForAddition.getPartitionIterator(), targetIterator = clonedPartitions.getPartitionIterator();
        while (targetIterator.hasNext()) {
            Partition sourcePartition = sourceIterator.next(), targetPartition = targetIterator.next();
            Assert.assertEquals("Mismatched values.", sourcePartition.getValues(), targetPartition.getValues());
            Assert.assertEquals("Mismatched locations.", sourcePartition.getSd().getLocation(), targetPartition.getSd().getLocation());
        }
    } catch (Throwable t) {
        LOG.error("Unexpected Exception!", t);
        t.printStackTrace();
        Assert.assertTrue("Unexpected Exception!", false);
    }
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) PartitionSpecProxy(org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy) CompositePartitionSpecProxy(org.apache.hadoop.hive.metastore.partition.spec.CompositePartitionSpecProxy) Test(org.junit.Test) MetastoreCheckinTest(org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)

Example 4 with PartitionSpecProxy

use of org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy in project hive by apache.

the class NonCatCallsWithCatalog method listPartitions.

@Test
public void listPartitions() throws TException {
    String dbName = "list_partition_database_in_other_catalog";
    Database db = new DatabaseBuilder().setName(dbName).build(conf);
    db.unsetCatalogName();
    client.createDatabase(db);
    String tableName = "table_in_other_catalog";
    Table table = new TableBuilder().inDb(db).setTableName(tableName).addCol("id", "int").addCol("name", "string").addPartCol("partcol", "string").build(conf);
    table.unsetCatName();
    client.createTable(table);
    Partition[] parts = new Partition[5];
    for (int i = 0; i < parts.length; i++) {
        parts[i] = new PartitionBuilder().inTable(table).addValue("a" + i).build(conf);
        parts[i].unsetCatName();
    }
    client.add_partitions(Arrays.asList(parts));
    List<Partition> fetched = client.listPartitions(dbName, tableName, (short) -1);
    Assert.assertEquals(parts.length, fetched.size());
    Assert.assertEquals(expectedCatalog(), fetched.get(0).getCatName());
    fetched = client.listPartitions(dbName, tableName, Collections.singletonList("a0"), (short) -1);
    Assert.assertEquals(1, fetched.size());
    Assert.assertEquals(expectedCatalog(), fetched.get(0).getCatName());
    PartitionSpecProxy proxy = client.listPartitionSpecs(dbName, tableName, -1);
    Assert.assertEquals(parts.length, proxy.size());
    Assert.assertEquals(expectedCatalog(), proxy.getCatName());
    fetched = client.listPartitionsByFilter(dbName, tableName, "partcol=\"a0\"", (short) -1);
    Assert.assertEquals(1, fetched.size());
    Assert.assertEquals(expectedCatalog(), fetched.get(0).getCatName());
    proxy = client.listPartitionSpecsByFilter(dbName, tableName, "partcol=\"a0\"", -1);
    Assert.assertEquals(1, proxy.size());
    Assert.assertEquals(expectedCatalog(), proxy.getCatName());
    Assert.assertEquals(1, client.getNumPartitionsByFilter(dbName, tableName, "partcol=\"a0\""));
    List<String> names = client.listPartitionNames(dbName, tableName, (short) 57);
    Assert.assertEquals(parts.length, names.size());
    names = client.listPartitionNames(dbName, tableName, Collections.singletonList("a0"), Short.MAX_VALUE);
    Assert.assertEquals(1, names.size());
    PartitionValuesRequest rqst = new PartitionValuesRequest(dbName, tableName, Lists.newArrayList(new FieldSchema("partcol", "string", "")));
    PartitionValuesResponse rsp = client.listPartitionValues(rqst);
    Assert.assertEquals(5, rsp.getPartitionValuesSize());
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) PartitionValuesRequest(org.apache.hadoop.hive.metastore.api.PartitionValuesRequest) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) TableBuilder(org.apache.hadoop.hive.metastore.client.builder.TableBuilder) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DatabaseBuilder(org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder) PartitionBuilder(org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder) Database(org.apache.hadoop.hive.metastore.api.Database) PartitionValuesResponse(org.apache.hadoop.hive.metastore.api.PartitionValuesResponse) PartitionSpecProxy(org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy) Test(org.junit.Test)

Example 5 with PartitionSpecProxy

use of org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy in project hive by apache.

the class TestListPartitions method otherCatalog.

@Test
@ConditionalIgnoreOnSessionHiveMetastoreClient
public void otherCatalog() throws TException {
    String catName = "list_partition_catalog";
    Catalog cat = new CatalogBuilder().setName(catName).setLocation(MetaStoreTestUtils.getTestWarehouseDir(catName)).build();
    client.createCatalog(cat);
    String dbName = "list_partition_database_in_other_catalog";
    Database db = new DatabaseBuilder().setName(dbName).setCatalogName(catName).create(client, metaStore.getConf());
    String tableName = "table_in_other_catalog";
    Table table = new TableBuilder().inDb(db).setTableName(tableName).addCol("id", "int").addCol("name", "string").addPartCol("partcol", "string").create(client, metaStore.getConf());
    Partition[] parts = new Partition[5];
    for (int i = 0; i < parts.length; i++) {
        parts[i] = new PartitionBuilder().inTable(table).addValue("a" + i).build(metaStore.getConf());
    }
    client.add_partitions(Arrays.asList(parts));
    List<Partition> fetched = client.listPartitions(catName, dbName, tableName, -1);
    Assert.assertEquals(parts.length, fetched.size());
    Assert.assertEquals(catName, fetched.get(0).getCatName());
    fetched = client.listPartitions(catName, dbName, tableName, Collections.singletonList("a0"), -1);
    Assert.assertEquals(1, fetched.size());
    Assert.assertEquals(catName, fetched.get(0).getCatName());
    PartitionSpecProxy proxy = client.listPartitionSpecs(catName, dbName, tableName, -1);
    Assert.assertEquals(parts.length, proxy.size());
    Assert.assertEquals(catName, proxy.getCatName());
    fetched = client.listPartitionsByFilter(catName, dbName, tableName, "partcol=\"a0\"", -1);
    Assert.assertEquals(1, fetched.size());
    Assert.assertEquals(catName, fetched.get(0).getCatName());
    proxy = client.listPartitionSpecsByFilter(catName, dbName, tableName, "partcol=\"a0\"", -1);
    Assert.assertEquals(1, proxy.size());
    Assert.assertEquals(catName, proxy.getCatName());
    Assert.assertEquals(1, client.getNumPartitionsByFilter(catName, dbName, tableName, "partcol=\"a0\""));
    List<String> names = client.listPartitionNames(catName, dbName, tableName, 57);
    Assert.assertEquals(parts.length, names.size());
    names = client.listPartitionNames(catName, dbName, tableName, Collections.singletonList("a0"), Short.MAX_VALUE + 1);
    Assert.assertEquals(1, names.size());
    PartitionValuesRequest rqst = new PartitionValuesRequest(dbName, tableName, Lists.newArrayList(new FieldSchema("partcol", "string", "")));
    rqst.setCatName(catName);
    PartitionValuesResponse rsp = client.listPartitionValues(rqst);
    Assert.assertEquals(5, rsp.getPartitionValuesSize());
}
Also used : TableBuilder(org.apache.hadoop.hive.metastore.client.builder.TableBuilder) DatabaseBuilder(org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder) PartitionBuilder(org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder) CatalogBuilder(org.apache.hadoop.hive.metastore.client.builder.CatalogBuilder) PartitionSpecProxy(org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy) Test(org.junit.Test) MetastoreCheckinTest(org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)

Aggregations

PartitionSpecProxy (org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy)69 Test (org.junit.Test)60 MetastoreCheckinTest (org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)59 Partition (org.apache.hadoop.hive.metastore.api.Partition)53 Table (org.apache.hadoop.hive.metastore.api.Table)24 PartitionWithoutSD (org.apache.hadoop.hive.metastore.api.PartitionWithoutSD)15 ArrayList (java.util.ArrayList)13 Path (org.apache.hadoop.fs.Path)11 List (java.util.List)7 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)7 HashMap (java.util.HashMap)4 PartitionSpec (org.apache.hadoop.hive.metastore.api.PartitionSpec)4 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)3 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 PartitionBuilder (org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder)3 CompositePartitionSpecProxy (org.apache.hadoop.hive.metastore.partition.spec.CompositePartitionSpecProxy)3 PartitionSpecWithSharedSD (org.apache.hadoop.hive.metastore.api.PartitionSpecWithSharedSD)2 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)2 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)2 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)2