Search in sources :

Example 6 with HiveMetastoreClientPool

use of org.apache.gobblin.hive.HiveMetastoreClientPool in project incubator-gobblin by apache.

the class HiveDatasetFinderTest method testDatasetConfig.

@Test
public void testDatasetConfig() throws Exception {
    List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList();
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1"));
    HiveMetastoreClientPool pool = getTestPool(dbAndTables);
    Properties properties = new Properties();
    properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, "");
    properties.put("hive.dataset.test.conf1", "conf1-val1");
    properties.put("hive.dataset.test.conf2", "conf2-val2");
    HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool);
    List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator());
    Assert.assertEquals(datasets.size(), 1);
    HiveDataset hiveDataset = datasets.get(0);
    Assert.assertEquals(hiveDataset.getDatasetConfig().getString("hive.dataset.test.conf1"), "conf1-val1");
    Assert.assertEquals(hiveDataset.getDatasetConfig().getString("hive.dataset.test.conf2"), "conf2-val2");
    // Test scoped configs with prefix
    properties.put(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, "hive.dataset.test");
    finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool);
    datasets = Lists.newArrayList(finder.getDatasetsIterator());
    Assert.assertEquals(datasets.size(), 1);
    hiveDataset = datasets.get(0);
    Assert.assertEquals(hiveDataset.getDatasetConfig().getString("conf1"), "conf1-val1");
    Assert.assertEquals(hiveDataset.getDatasetConfig().getString("conf2"), "conf2-val2");
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) HiveMetastoreClientPool(org.apache.gobblin.hive.HiveMetastoreClientPool) Test(org.testng.annotations.Test)

Example 7 with HiveMetastoreClientPool

use of org.apache.gobblin.hive.HiveMetastoreClientPool in project incubator-gobblin by apache.

the class HiveDatasetFinderTest method testException.

@Test
public void testException() throws Exception {
    List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList();
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1"));
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", TestHiveDatasetFinder.THROW_EXCEPTION));
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3"));
    HiveMetastoreClientPool pool = getTestPool(dbAndTables);
    Properties properties = new Properties();
    properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, "");
    HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool);
    List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator());
    Assert.assertEquals(datasets.size(), 2);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) HiveMetastoreClientPool(org.apache.gobblin.hive.HiveMetastoreClientPool) Test(org.testng.annotations.Test)

Example 8 with HiveMetastoreClientPool

use of org.apache.gobblin.hive.HiveMetastoreClientPool in project incubator-gobblin by apache.

the class HiveDatasetFinderTest method getTestPool.

private HiveMetastoreClientPool getTestPool(List<HiveDatasetFinder.DbAndTable> dbAndTables) throws Exception {
    SetMultimap<String, String> entities = HashMultimap.create();
    for (HiveDatasetFinder.DbAndTable dbAndTable : dbAndTables) {
        entities.put(dbAndTable.getDb(), dbAndTable.getTable());
    }
    HiveMetastoreClientPool pool = Mockito.mock(HiveMetastoreClientPool.class);
    IMetaStoreClient client = Mockito.mock(IMetaStoreClient.class);
    Mockito.when(client.getAllDatabases()).thenReturn(Lists.newArrayList(entities.keySet()));
    for (String db : entities.keySet()) {
        Mockito.doReturn(Lists.newArrayList(entities.get(db))).when(client).getAllTables(db);
    }
    for (HiveDatasetFinder.DbAndTable dbAndTable : dbAndTables) {
        Table table = new Table();
        table.setDbName(dbAndTable.getDb());
        table.setTableName(dbAndTable.getTable());
        StorageDescriptor sd = new StorageDescriptor();
        sd.setLocation("/tmp/test");
        table.setSd(sd);
        Mockito.doReturn(table).when(client).getTable(dbAndTable.getDb(), dbAndTable.getTable());
    }
    @SuppressWarnings("unchecked") AutoReturnableObject<IMetaStoreClient> aro = Mockito.mock(AutoReturnableObject.class);
    Mockito.when(aro.get()).thenReturn(client);
    Mockito.when(pool.getHiveRegProps()).thenReturn(null);
    Mockito.when(pool.getClient()).thenReturn(aro);
    return pool;
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) HiveMetastoreClientPool(org.apache.gobblin.hive.HiveMetastoreClientPool)

Example 9 with HiveMetastoreClientPool

use of org.apache.gobblin.hive.HiveMetastoreClientPool in project incubator-gobblin by apache.

the class HiveDatasetFinderTest method testDatasetFinder.

@Test
public void testDatasetFinder() throws Exception {
    List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList();
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1"));
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2"));
    dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3"));
    HiveMetastoreClientPool pool = getTestPool(dbAndTables);
    Properties properties = new Properties();
    properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, "");
    HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool);
    List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator());
    Assert.assertEquals(datasets.size(), 3);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) HiveMetastoreClientPool(org.apache.gobblin.hive.HiveMetastoreClientPool) Test(org.testng.annotations.Test)

Example 10 with HiveMetastoreClientPool

use of org.apache.gobblin.hive.HiveMetastoreClientPool in project incubator-gobblin by apache.

the class HiveConverterUtils method getDestinationPartitionLocation.

/**
 * Returns the partition data location of a given table and partition
 * @param table Hive table
 * @param state workunit state
 * @param partitionName partition name
 * @return partition data location
 * @throws DataConversionException
 */
public static Optional<Path> getDestinationPartitionLocation(Optional<Table> table, WorkUnitState state, String partitionName) throws DataConversionException {
    Optional<org.apache.hadoop.hive.metastore.api.Partition> partitionOptional;
    if (!table.isPresent()) {
        return Optional.absent();
    }
    try {
        HiveMetastoreClientPool pool = HiveMetastoreClientPool.get(state.getJobState().getProperties(), Optional.fromNullable(state.getJobState().getProp(HiveDatasetFinder.HIVE_METASTORE_URI_KEY)));
        try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
            partitionOptional = Optional.of(client.get().getPartition(table.get().getDbName(), table.get().getTableName(), partitionName));
        } catch (NoSuchObjectException e) {
            return Optional.absent();
        }
        if (partitionOptional.isPresent()) {
            org.apache.hadoop.hive.ql.metadata.Table qlTable = new org.apache.hadoop.hive.ql.metadata.Table(table.get());
            Partition qlPartition = new Partition(qlTable, partitionOptional.get());
            return Optional.of(qlPartition.getDataLocation());
        }
    } catch (IOException | TException | HiveException e) {
        throw new DataConversionException("Could not fetch destination table metadata", e);
    }
    return Optional.absent();
}
Also used : TException(org.apache.thrift.TException) Partition(org.apache.hadoop.hive.ql.metadata.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) IMetaStoreClient(org.apache.hadoop.hive.metastore.IMetaStoreClient) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) DataConversionException(org.apache.gobblin.converter.DataConversionException) HiveMetastoreClientPool(org.apache.gobblin.hive.HiveMetastoreClientPool)

Aggregations

HiveMetastoreClientPool (org.apache.gobblin.hive.HiveMetastoreClientPool)11 Properties (java.util.Properties)6 Configuration (org.apache.hadoop.conf.Configuration)6 Test (org.testng.annotations.Test)6 IMetaStoreClient (org.apache.hadoop.hive.metastore.IMetaStoreClient)5 Table (org.apache.hadoop.hive.metastore.api.Table)4 TException (org.apache.thrift.TException)4 IOException (java.io.IOException)3 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)3 DataConversionException (org.apache.gobblin.converter.DataConversionException)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 Partition (org.apache.hadoop.hive.ql.metadata.Partition)2 List (java.util.List)1 HiveDataset (org.apache.gobblin.data.management.copy.hive.HiveDataset)1 Path (org.apache.hadoop.fs.Path)1 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)1 Table (org.apache.hadoop.hive.ql.metadata.Table)1