Search in sources :

Example 1 with HivePartitionVersionFinder

use of org.apache.gobblin.compliance.HivePartitionVersionFinder in project incubator-gobblin by apache.

the class CleanableHivePartitionDataset method clean.

/**
 * This method uses {@link HivePartitionVersionFinder} to list out versions
 * corresponding to this dataset. It will then filter out versions using {@link HivePartitionVersionPolicy}.
 *
 * For each version there will be a corresponding {@link VersionCleaner} which will clean the version.
 */
@Override
public void clean() throws IOException {
    Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY);
    Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY);
    Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY);
    List<String> patterns = new ArrayList<>();
    patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.BACKUP);
    patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.STAGING);
    patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.TRASH);
    HivePartitionVersionFinder versionFinder = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionFinder.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY), this.fs, this.state, patterns);
    List<HivePartitionVersion> versions = new ArrayList<>(versionFinder.findDatasetVersions(this));
    HivePartitionVersionPolicy versionPolicy = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionPolicy.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY), this.state, this);
    List<HivePartitionVersion> deletableVersions = new ArrayList<>(versionPolicy.selectedList(versions));
    List<String> nonDeletableVersionLocations = getNonDeletableVersionLocations(versions, deletableVersions);
    for (HivePartitionVersion hivePartitionDatasetVersion : deletableVersions) {
        try {
            VersionCleaner versionCleaner = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionRetentionRunner.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY), this, hivePartitionDatasetVersion, nonDeletableVersionLocations, this.state);
            versionCleaner.clean();
        } catch (Exception e) {
            log.warn("Caught exception trying to clean version " + hivePartitionDatasetVersion.datasetURN() + "\n" + e.getMessage());
        }
    }
}
Also used : HivePartitionVersion(org.apache.gobblin.compliance.HivePartitionVersion) HivePartitionVersionPolicy(org.apache.gobblin.compliance.HivePartitionVersionPolicy) ArrayList(java.util.ArrayList) HivePartitionVersionFinder(org.apache.gobblin.compliance.HivePartitionVersionFinder) IOException(java.io.IOException) VersionCleaner(org.apache.gobblin.data.management.retention.version.VersionCleaner)

Example 2 with HivePartitionVersionFinder

use of org.apache.gobblin.compliance.HivePartitionVersionFinder in project incubator-gobblin by apache.

the class LKGRestorePolicy method getDatasetToRestore.

/**
 * @param dataset to restore
 * @return most recent restorable dataset
 */
public HivePartitionDataset getDatasetToRestore(HivePartitionDataset dataset) throws IOException {
    List<String> patterns = new ArrayList<>();
    patterns.add(getCompleteTableName(dataset) + ComplianceConfigurationKeys.BACKUP);
    HivePartitionVersionFinder finder = new HivePartitionVersionFinder(WriterUtils.getWriterFs(new State(this.state)), this.state, patterns);
    List<HivePartitionVersion> versions = new ArrayList<>(finder.findDatasetVersions(dataset));
    Preconditions.checkArgument(!versions.isEmpty(), "No versions to restore dataset " + dataset.datasetURN());
    List<HivePartitionVersion> nonRestorableVersions = new ArrayList<>();
    for (HivePartitionVersion version : versions) {
        if (!isRestorable(dataset, version)) {
            nonRestorableVersions.add(version);
        }
    }
    versions.removeAll(nonRestorableVersions);
    Preconditions.checkArgument(!versions.isEmpty(), "No versions to restore dataset " + dataset.datasetURN());
    Collections.sort(versions);
    // return the most recent restorable version
    return new HivePartitionDataset(versions.get(0));
}
Also used : HivePartitionVersion(org.apache.gobblin.compliance.HivePartitionVersion) HivePartitionDataset(org.apache.gobblin.compliance.HivePartitionDataset) State(org.apache.gobblin.configuration.State) ArrayList(java.util.ArrayList) HivePartitionVersionFinder(org.apache.gobblin.compliance.HivePartitionVersionFinder)

Aggregations

ArrayList (java.util.ArrayList)2 HivePartitionVersion (org.apache.gobblin.compliance.HivePartitionVersion)2 HivePartitionVersionFinder (org.apache.gobblin.compliance.HivePartitionVersionFinder)2 IOException (java.io.IOException)1 HivePartitionDataset (org.apache.gobblin.compliance.HivePartitionDataset)1 HivePartitionVersionPolicy (org.apache.gobblin.compliance.HivePartitionVersionPolicy)1 State (org.apache.gobblin.configuration.State)1 VersionCleaner (org.apache.gobblin.data.management.retention.version.VersionCleaner)1