use of org.apache.gobblin.data.management.copy.hive.HiveDataset in project incubator-gobblin by apache.
the class HivePartitionFinder method findDatasets.
/**
* Will find all datasets according to whitelist, except the backup, trash and staging tables.
*/
@Override
public List<HivePartitionDataset> findDatasets() throws IOException {
List<HivePartitionDataset> list = new ArrayList<>();
for (HiveDataset hiveDataset : this.hiveDatasets) {
for (Partition partition : hiveDataset.getPartitionsFromDataset()) {
list.add(new HivePartitionDataset(partition));
}
}
String selectionPolicyString = this.state.getProp(ComplianceConfigurationKeys.DATASET_SELECTION_POLICY_CLASS, ComplianceConfigurationKeys.DEFAULT_DATASET_SELECTION_POLICY_CLASS);
Policy<HivePartitionDataset> selectionPolicy = GobblinConstructorUtils.invokeConstructor(Policy.class, selectionPolicyString);
return selectionPolicy.selectedList(list);
}
use of org.apache.gobblin.data.management.copy.hive.HiveDataset in project incubator-gobblin by apache.
the class HivePartitionVersionFinder method getPartitions.
private static List<Partition> getPartitions(String completeTableName) {
List<String> tableList = At_SPLITTER.splitToList(completeTableName);
if (tableList.size() != 2) {
log.warn("Invalid table name " + completeTableName);
return Collections.EMPTY_LIST;
}
try (AutoReturnableObject<IMetaStoreClient> client = ComplianceRetentionJob.pool.getClient()) {
Table table = client.get().getTable(tableList.get(0), tableList.get(1));
HiveDataset dataset = new HiveDataset(FileSystem.newInstance(new Configuration()), ComplianceRetentionJob.pool, new org.apache.hadoop.hive.ql.metadata.Table(table), new Properties());
return dataset.getPartitionsFromDataset();
} catch (IOException | TException e) {
log.warn("Unable to get Partitions for table " + completeTableName + " " + e.getMessage());
}
return Collections.EMPTY_LIST;
}
use of org.apache.gobblin.data.management.copy.hive.HiveDataset in project incubator-gobblin by apache.
the class HiveMaterializerFromEntityQueryGenerator method getConversionEntity.
private HiveProcessingEntity getConversionEntity(HiveWorkUnit hiveWorkUnit) throws IOException, TException, HiveException {
try (AutoReturnableObject<IMetaStoreClient> client = this.pool.getClient()) {
HiveDataset dataset = hiveWorkUnit.getHiveDataset();
HiveDatasetFinder.DbAndTable dbAndTable = dataset.getDbAndTable();
Table table = new Table(client.get().getTable(dbAndTable.getDb(), dbAndTable.getTable()));
Partition partition = null;
if (hiveWorkUnit.getPartitionName().isPresent()) {
partition = new Partition(table, client.get().getPartition(dbAndTable.getDb(), dbAndTable.getTable(), hiveWorkUnit.getPartitionName().get()));
}
return new HiveProcessingEntity(dataset, table, Optional.fromNullable(partition));
}
}
use of org.apache.gobblin.data.management.copy.hive.HiveDataset in project incubator-gobblin by apache.
the class HiveMaterializerTest method setup.
@BeforeClass
public void setup() throws Exception {
this.jdbcConnector = HiveJdbcConnector.newEmbeddedConnector(2);
this.dataFile = new File(getClass().getClassLoader().getResource("hiveMaterializerTest/source/").toURI());
this.localHiveMetastore.dropDatabaseIfExists(this.dbName);
this.localHiveMetastore.createTestDb(this.dbName);
this.jdbcConnector.executeStatements(String.format("CREATE EXTERNAL TABLE %s.%s (id STRING, name String) PARTITIONED BY (%s String) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE", this.dbName, this.sourceTableName, this.partitionColumn), String.format("ALTER TABLE %s.%s ADD PARTITION (part = 'part1') LOCATION '%s'", this.dbName, this.sourceTableName, this.dataFile.getAbsolutePath() + "/part1"), String.format("ALTER TABLE %s.%s ADD PARTITION (part = 'part2') LOCATION '%s'", this.dbName, this.sourceTableName, this.dataFile.getAbsolutePath() + "/part2"));
List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, this.sourceTableName), 3);
Assert.assertEquals(allTable.size(), 8);
List<List<String>> part1 = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s WHERE %s='part1'", this.dbName, this.sourceTableName, this.partitionColumn), 3);
Assert.assertEquals(part1.size(), 4);
this.pool = HiveMetastoreClientPool.get(new Properties(), Optional.absent());
Table table;
try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
table = new Table(client.get().getTable(this.dbName, this.sourceTableName));
}
this.dataset = new HiveDataset(FileSystem.getLocal(new Configuration()), pool, table, new Properties());
}
use of org.apache.gobblin.data.management.copy.hive.HiveDataset in project incubator-gobblin by apache.
the class HiveMaterializerTest method testMaterializeView.
@Test
public void testMaterializeView() throws Exception {
String destinationTable = "materializeView";
File tmpDir = Files.createTempDir();
tmpDir.deleteOnExit();
String viewName = "myView";
this.jdbcConnector.executeStatements(String.format("CREATE VIEW %s.%s AS SELECT * FROM %s.%s WHERE name = 'foo'", this.dbName, viewName, this.dbName, this.sourceTableName));
Table view;
try (AutoReturnableObject<IMetaStoreClient> client = pool.getClient()) {
view = new Table(client.get().getTable(this.dbName, viewName));
}
HiveDataset viewDataset = new HiveDataset(FileSystem.getLocal(new Configuration()), pool, view, new Properties());
WorkUnit workUnit = HiveMaterializer.viewMaterializationWorkUnit(viewDataset, HiveConverterUtils.StorageFormat.AVRO, new TableLikeStageableTableMetadata(viewDataset.getTable(), this.dbName, destinationTable, tmpDir.getAbsolutePath()), null);
HiveMaterializer hiveMaterializer = new HiveMaterializer(getTaskContextForRun(workUnit));
hiveMaterializer.run();
Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
hiveMaterializer.commit();
Assert.assertEquals(hiveMaterializer.getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
List<List<String>> allTable = executeStatementAndGetResults(this.jdbcConnector, String.format("SELECT * FROM %s.%s", this.dbName, destinationTable), 3);
Assert.assertEquals(allTable.size(), 4);
Assert.assertEquals(allTable.stream().map(l -> l.get(0)).collect(Collectors.toList()), Lists.newArrayList("101", "103", "201", "203"));
}
Aggregations