Search in sources :

Example 1 with HiveMetastoreClientPool

use of org.apache.gobblin.hive.HiveMetastoreClientPool in project incubator-gobblin by apache.

the class HiveConverterUtils method getDestinationTableMeta.

/**
 * Returns a pair of Hive table and its partitions
 * @param dbName db name
 * @param tableName table name
 * @param props properties
 * @return a pair of Hive table and its partitions
 * @throws DataConversionException
 */
public static Pair<Optional<Table>, Optional<List<Partition>>> getDestinationTableMeta(String dbName, String tableName, Properties props) {
    Optional<Table> table = Optional.<Table>absent();
    Optional<List<Partition>> partitions = Optional.<List<Partition>>absent();
    try {
        HiveMetastoreClientPool pool = HiveMetastoreClientPool.get(props, Optional.fromNullable(props.getProperty(HiveDatasetFinder.HIVE_METASTORE_URI_KEY)));
        try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
            table = Optional.of(client.get().getTable(dbName, tableName));
            if (table.isPresent()) {
                org.apache.hadoop.hive.ql.metadata.Table qlTable = new org.apache.hadoop.hive.ql.metadata.Table(table.get());
                if (HiveUtils.isPartitioned(qlTable)) {
                    partitions = Optional.of(HiveUtils.getPartitions(client.get(), qlTable, Optional.<String>absent()));
                }
            }
        }
    } catch (NoSuchObjectException e) {
        return ImmutablePair.of(table, partitions);
    } catch (IOException | TException e) {
        throw new RuntimeException("Could not fetch destination table metadata", e);
    }
    return ImmutablePair.of(table, partitions);
}
Also used : TException(org.apache.thrift.TException) Table(org.apache.hadoop.hive.metastore.api.Table) IOException(java.io.IOException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) List(java.util.List) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) HiveMetastoreClientPool(org.apache.gobblin.hive.HiveMetastoreClientPool)

Example 2 with HiveMetastoreClientPool

use of org.apache.gobblin.hive.HiveMetastoreClientPool in project incubator-gobblin by apache.

the class AbstractAvroToOrcConverter method getDestinationPartitionLocation.

private Optional<Path> getDestinationPartitionLocation(Optional<Table> table, WorkUnitState state, String partitionName) throws DataConversionException {
    Optional<org.apache.hadoop.hive.metastore.api.Partition> partitionOptional = Optional.<org.apache.hadoop.hive.metastore.api.Partition>absent();
    if (!table.isPresent()) {
        return Optional.<Path>absent();
    }
    try {
        HiveMetastoreClientPool pool = HiveMetastoreClientPool.get(state.getJobState().getProperties(), Optional.fromNullable(state.getJobState().getProp(HiveDatasetFinder.HIVE_METASTORE_URI_KEY)));
        try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
            partitionOptional = Optional.of(client.get().getPartition(table.get().getDbName(), table.get().getTableName(), partitionName));
        } catch (NoSuchObjectException e) {
            return Optional.<Path>absent();
        }
        if (partitionOptional.isPresent()) {
            org.apache.hadoop.hive.ql.metadata.Table qlTable = new org.apache.hadoop.hive.ql.metadata.Table(table.get());
            org.apache.hadoop.hive.ql.metadata.Partition qlPartition = new org.apache.hadoop.hive.ql.metadata.Partition(qlTable, partitionOptional.get());
            return Optional.of(qlPartition.getDataLocation());
        }
    } catch (IOException | TException | HiveException e) {
        throw new DataConversionException(String.format("Could not fetch destination table %s.%s metadata", table.get().getDbName(), table.get().getTableName()), e);
    }
    return Optional.<Path>absent();
}
Also used : Path(org.apache.hadoop.fs.Path) TException(org.apache.thrift.TException) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) Partition(org.apache.hadoop.hive.ql.metadata.Partition) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) DataConversionException(org.apache.gobblin.converter.DataConversionException) HiveMetastoreClientPool(org.apache.gobblin.hive.HiveMetastoreClientPool)

Example 3 with HiveMetastoreClientPool

use of org.apache.gobblin.hive.HiveMetastoreClientPool in project incubator-gobblin by apache.

the class HiveDatasetFinderTest method testWhitelist.

@Test
public void testWhitelist() throws Exception {
    List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList();
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1"));
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2"));
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1"));
    HiveMetastoreClientPool pool = getTestPool(dbAndTables);
    Properties properties = new Properties();
    properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, "db1");
    HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool);
    List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator());
    Assert.assertEquals(datasets.size(), 2);
    Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1");
    Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1");
    Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2"));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) HiveMetastoreClientPool(org.apache.gobblin.hive.HiveMetastoreClientPool) Test(org.testng.annotations.Test)

Example 4 with HiveMetastoreClientPool

use of org.apache.gobblin.hive.HiveMetastoreClientPool in project incubator-gobblin by apache.

the class HiveDatasetFinderTest method testTableList.

@Test
public void testTableList() throws Exception {
    List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList();
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1"));
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2"));
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3"));
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1"));
    HiveMetastoreClientPool pool = getTestPool(dbAndTables);
    Properties properties = new Properties();
    properties.put(HiveDatasetFinder.DB_KEY, "db1");
    properties.put(HiveDatasetFinder.TABLE_PATTERN_KEY, "table1|table2");
    HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool);
    List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator());
    Assert.assertEquals(datasets.size(), 2);
    Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1");
    Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1");
    Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2"));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) HiveMetastoreClientPool(org.apache.gobblin.hive.HiveMetastoreClientPool) Test(org.testng.annotations.Test)

Example 5 with HiveMetastoreClientPool

use of org.apache.gobblin.hive.HiveMetastoreClientPool in project incubator-gobblin by apache.

the class HiveDatasetFinderTest method testBlacklist.

@Test
public void testBlacklist() throws Exception {
    List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList();
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1"));
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2"));
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1"));
    HiveMetastoreClientPool pool = getTestPool(dbAndTables);
    Properties properties = new Properties();
    properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, "");
    properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.BLACKLIST, "db2");
    HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool);
    List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator());
    Assert.assertEquals(datasets.size(), 2);
    Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1");
    Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1");
    Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2"));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) HiveMetastoreClientPool(org.apache.gobblin.hive.HiveMetastoreClientPool) Test(org.testng.annotations.Test)

Aggregations

HiveMetastoreClientPool (org.apache.gobblin.hive.HiveMetastoreClientPool)11 Properties (java.util.Properties)6 Configuration (org.apache.hadoop.conf.Configuration)6 Test (org.testng.annotations.Test)6 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)5 Table (org.apache.hadoop.hive.metastore.api.Table)4 TException (org.apache.thrift.TException)4 IOException (java.io.IOException)3 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)3 DataConversionException (org.apache.gobblin.converter.DataConversionException)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 Partition (org.apache.hadoop.hive.ql.metadata.Partition)2 List (java.util.List)1 HiveDataset (org.apache.gobblin.data.management.copy.hive.HiveDataset)1 Path (org.apache.hadoop.fs.Path)1 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)1 Table (org.apache.hadoop.hive.ql.metadata.Table)1