Search in sources :

Example 1 with VersionCleaner

use of org.apache.gobblin.data.management.retention.version.VersionCleaner in project incubator-gobblin by apache.

the class CleanableHivePartitionDataset method clean.

/**
 * This method uses {@link HivePartitionVersionFinder} to list out versions
 * corresponding to this dataset. It will then filter out versions using {@link HivePartitionVersionPolicy}.
 *
 * For each version there will be a corresponding {@link VersionCleaner} which will clean the version.
 */
@Override
public void clean() throws IOException {
    Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY);
    Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY);
    Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY);
    List<String> patterns = new ArrayList<>();
    patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.BACKUP);
    patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.STAGING);
    patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.TRASH);
    HivePartitionVersionFinder versionFinder = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionFinder.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY), this.fs, this.state, patterns);
    List<HivePartitionVersion> versions = new ArrayList<>(versionFinder.findDatasetVersions(this));
    HivePartitionVersionPolicy versionPolicy = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionPolicy.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY), this.state, this);
    List<HivePartitionVersion> deletableVersions = new ArrayList<>(versionPolicy.selectedList(versions));
    List<String> nonDeletableVersionLocations = getNonDeletableVersionLocations(versions, deletableVersions);
    for (HivePartitionVersion hivePartitionDatasetVersion : deletableVersions) {
        try {
            VersionCleaner versionCleaner = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionRetentionRunner.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY), this, hivePartitionDatasetVersion, nonDeletableVersionLocations, this.state);
            versionCleaner.clean();
        } catch (Exception e) {
            log.warn("Caught exception trying to clean version " + hivePartitionDatasetVersion.datasetURN() + "\n" + e.getMessage());
        }
    }
}
Also used : HivePartitionVersion(org.apache.gobblin.compliance.HivePartitionVersion) HivePartitionVersionPolicy(org.apache.gobblin.compliance.HivePartitionVersionPolicy) ArrayList(java.util.ArrayList) HivePartitionVersionFinder(org.apache.gobblin.compliance.HivePartitionVersionFinder) IOException(java.io.IOException) VersionCleaner(org.apache.gobblin.data.management.retention.version.VersionCleaner)

Aggregations

IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HivePartitionVersion (org.apache.gobblin.compliance.HivePartitionVersion)1 HivePartitionVersionFinder (org.apache.gobblin.compliance.HivePartitionVersionFinder)1 HivePartitionVersionPolicy (org.apache.gobblin.compliance.HivePartitionVersionPolicy)1 VersionCleaner (org.apache.gobblin.data.management.retention.version.VersionCleaner)1