use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class PurgeableHivePartitionDataset method purge.
/**
* This method is responsible for actual purging.
* - It first creates a staging table partition with the same schema as of original table partition.
* - Staging table partition is then populated by original table left outer joined with compliance id table.
*
* - Alter query will then change the partition location to the staging partition location.
* - In flight queries won't get affected due to alter partition query.
*/
public void purge() throws IOException {
this.datasetOwner = getOwner();
State state = new State(this.state);
this.datasetOwnerFs = ProxyUtils.getOwnerFs(state, this.datasetOwner);
try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.datasetOwner)) {
if (this.simulate) {
log.info("Simulate is set to true. Wont't run actual queries");
return;
}
String originalPartitionLocation = getOriginalPartitionLocation();
// Create the staging table and staging partition
queryExecutor.executeQueries(HivePurgerQueryTemplate.getCreateStagingTableQuery(this), this.datasetOwner);
this.startTime = getLastModifiedTime(originalPartitionLocation);
// Execute purge queries, that is insert filtered data into the staging partition
queryExecutor.executeQueries(this.purgeQueries, this.datasetOwner);
this.endTime = getLastModifiedTime(originalPartitionLocation);
// Create a backup table and partition pointing to the original partition location
queryExecutor.executeQueries(HivePurgerQueryTemplate.getBackupQueries(this), this.datasetOwner);
String commitPolicyString = this.state.getProp(ComplianceConfigurationKeys.PURGER_COMMIT_POLICY_CLASS, ComplianceConfigurationKeys.DEFAULT_PURGER_COMMIT_POLICY_CLASS);
CommitPolicy<PurgeableHivePartitionDataset> commitPolicy = GobblinConstructorUtils.invokeConstructor(CommitPolicy.class, commitPolicyString);
if (!commitPolicy.shouldCommit(this)) {
log.error("Last modified time before start of execution : " + this.startTime);
log.error("Last modified time after execution of purge queries : " + this.endTime);
throw new RuntimeException("Failed to commit. File modified during job run.");
}
// Alter the original table partition to start pointing to the cleaned-partition-location/staging-partition-location
queryExecutor.executeQueries(HivePurgerQueryTemplate.getAlterOriginalPartitionLocationQueries(this), this.datasetOwner);
// Drop the staging table
queryExecutor.executeQueries(HivePurgerQueryTemplate.getDropStagingTableQuery(this), this.datasetOwner);
} catch (SQLException e) {
throw new IOException(e);
}
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class ComplianceRetentionJob method initDatasetFinder.
public void initDatasetFinder(Properties properties) throws IOException {
Preconditions.checkArgument(properties.containsKey(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS), "Missing required propety " + GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS);
String finderClass = properties.getProperty(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS);
this.finder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, finderClass, new State(properties));
Iterator<HiveDataset> datasetsIterator = new HiveDatasetFinder(FileSystem.newInstance(new Configuration()), properties).getDatasetsIterator();
while (datasetsIterator.hasNext()) {
// Drop partitions from empty tables if property is set, otherwise skip the table
HiveDataset hiveDataset = datasetsIterator.next();
List<Partition> partitionsFromDataset = hiveDataset.getPartitionsFromDataset();
String completeTableName = hiveDataset.getTable().getCompleteName();
if (!partitionsFromDataset.isEmpty()) {
this.tableNamesList.add(completeTableName);
continue;
}
if (!Boolean.parseBoolean(properties.getProperty(ComplianceConfigurationKeys.SHOULD_DROP_EMPTY_TABLES, ComplianceConfigurationKeys.DEFAULT_SHOULD_DROP_EMPTY_TABLES))) {
continue;
}
if (completeTableName.contains(ComplianceConfigurationKeys.TRASH) || completeTableName.contains(ComplianceConfigurationKeys.BACKUP) || completeTableName.contains(ComplianceConfigurationKeys.STAGING)) {
this.tablesToDrop.add(hiveDataset);
}
}
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class HivePartitionVersionRetentionCleaner method clean.
/**
* If simulate is set to true, this will simply return.
* If version is pointing to an empty location, drop the partition and close the jdbc connection.
* If version is pointing to the same location as of the dataset, then drop the partition and close the jdbc connection.
* If version is pointing to the non deletable version locations, then drop the partition and close the jdbc connection.
* Otherwise delete the data underneath, drop the partition and close the jdbc connection.
*/
@Override
public void clean() throws IOException {
Path versionLocation = ((HivePartitionRetentionVersion) this.datasetVersion).getLocation();
Path datasetLocation = ((CleanableHivePartitionDataset) this.cleanableDataset).getLocation();
String completeName = ((HivePartitionRetentionVersion) this.datasetVersion).datasetURN();
State state = new State(this.state);
this.fs = ProxyUtils.getOwnerFs(state, this.versionOwner);
try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.versionOwner)) {
log.info("Trying to clean version " + completeName);
if (!this.fs.exists(versionLocation)) {
log.info("Data versionLocation doesn't exist. Metadata will be dropped for the version " + completeName);
} else if (datasetLocation.toString().equalsIgnoreCase(versionLocation.toString())) {
log.info("Dataset location is same as version location. Won't delete the data but metadata will be dropped for the version " + completeName);
} else if (this.nonDeletableVersionLocations.contains(versionLocation.toString())) {
log.info("This version corresponds to the non deletable version. Won't delete the data but metadata will be dropped for the version " + completeName);
} else if (HadoopUtils.hasContent(this.fs, versionLocation)) {
if (this.simulate) {
log.info("Simulate is set to true. Won't delete the partition " + completeName);
return;
}
log.info("Deleting data from the version " + completeName);
this.fs.delete(versionLocation, true);
}
executeDropVersionQueries(queryExecutor);
}
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class GobblinMetrics method addCustomTagToProperties.
/**
* Add a {@link Tag} to a {@link Properties} with key {@link #METRICS_STATE_CUSTOM_TAGS}.
* Also see {@link #addCustomTagToState(State, Tag)}
*
* <p>
* The {@link Properties} passed can be used to build a {@link State}.
* {@link org.apache.gobblin.metrics.Tag}s under this key can later be parsed using the method {@link #getCustomTagsFromState}.
* </p>
*
* @param properties {@link Properties} to add the tag to.
* @param tag {@link Tag} to add.
*/
public static void addCustomTagToProperties(Properties properties, Tag<?> tag) {
// Build a state wrapper to add custom tag to property
State state = new State(properties);
addCustomTagToState(state, tag);
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class RestorableHivePartitionDataset method init.
private void init(State state) {
this.state = new State(state);
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RESTORE_POLICY_CLASS), "Missing required property " + ComplianceConfigurationKeys.RESTORE_POLICY_CLASS);
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.TRASH_OWNER), "Missing required property " + ComplianceConfigurationKeys.TRASH_OWNER);
String restorePolicyClass = this.state.getProp(ComplianceConfigurationKeys.RESTORE_POLICY_CLASS);
this.datasetOwner = getOwner();
this.trashOwner = Optional.fromNullable(this.state.getProp(ComplianceConfigurationKeys.TRASH_OWNER));
setTimeStamp();
this.restorePolicy = GobblinConstructorUtils.invokeConstructor(HivePartitionRestorePolicy.class, restorePolicyClass, this.state);
try {
this.datasetToRestore = (HivePartitionDataset) this.restorePolicy.getDatasetToRestore(this);
log.info("Found dataset to restore with " + this.datasetToRestore.datasetURN());
} catch (IOException e) {
Throwables.propagate(e);
}
this.datasetToRestoreOwner = this.datasetToRestore.getOwner();
}
Aggregations