use of org.apache.gobblin.compliance.HivePartitionVersionPolicy in project incubator-gobblin by apache.
the class CleanableHivePartitionDataset method clean.
/**
* This method uses {@link HivePartitionVersionFinder} to list out versions
* corresponding to this dataset. It will then filter out versions using {@link HivePartitionVersionPolicy}.
*
* For each version there will be a corresponding {@link VersionCleaner} which will clean the version.
*/
@Override
public void clean() throws IOException {
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY);
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY);
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY);
List<String> patterns = new ArrayList<>();
patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.BACKUP);
patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.STAGING);
patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.TRASH);
HivePartitionVersionFinder versionFinder = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionFinder.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY), this.fs, this.state, patterns);
List<HivePartitionVersion> versions = new ArrayList<>(versionFinder.findDatasetVersions(this));
HivePartitionVersionPolicy versionPolicy = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionPolicy.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY), this.state, this);
List<HivePartitionVersion> deletableVersions = new ArrayList<>(versionPolicy.selectedList(versions));
List<String> nonDeletableVersionLocations = getNonDeletableVersionLocations(versions, deletableVersions);
for (HivePartitionVersion hivePartitionDatasetVersion : deletableVersions) {
try {
VersionCleaner versionCleaner = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionRetentionRunner.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY), this, hivePartitionDatasetVersion, nonDeletableVersionLocations, this.state);
versionCleaner.clean();
} catch (Exception e) {
log.warn("Caught exception trying to clean version " + hivePartitionDatasetVersion.datasetURN() + "\n" + e.getMessage());
}
}
}
Aggregations