use of org.apache.gobblin.compliance.HivePartitionVersionFinder in project incubator-gobblin by apache.
the class CleanableHivePartitionDataset method clean.
/**
* This method uses {@link HivePartitionVersionFinder} to list out versions
* corresponding to this dataset. It will then filter out versions using {@link HivePartitionVersionPolicy}.
*
* For each version there will be a corresponding {@link VersionCleaner} which will clean the version.
*/
@Override
public void clean() throws IOException {
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY);
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY);
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY);
List<String> patterns = new ArrayList<>();
patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.BACKUP);
patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.STAGING);
patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.TRASH);
HivePartitionVersionFinder versionFinder = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionFinder.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY), this.fs, this.state, patterns);
List<HivePartitionVersion> versions = new ArrayList<>(versionFinder.findDatasetVersions(this));
HivePartitionVersionPolicy versionPolicy = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionPolicy.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY), this.state, this);
List<HivePartitionVersion> deletableVersions = new ArrayList<>(versionPolicy.selectedList(versions));
List<String> nonDeletableVersionLocations = getNonDeletableVersionLocations(versions, deletableVersions);
for (HivePartitionVersion hivePartitionDatasetVersion : deletableVersions) {
try {
VersionCleaner versionCleaner = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionRetentionRunner.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY), this, hivePartitionDatasetVersion, nonDeletableVersionLocations, this.state);
versionCleaner.clean();
} catch (Exception e) {
log.warn("Caught exception trying to clean version " + hivePartitionDatasetVersion.datasetURN() + "\n" + e.getMessage());
}
}
}
use of org.apache.gobblin.compliance.HivePartitionVersionFinder in project incubator-gobblin by apache.
the class LKGRestorePolicy method getDatasetToRestore.
/**
* @param dataset to restore
* @return most recent restorable dataset
*/
public HivePartitionDataset getDatasetToRestore(HivePartitionDataset dataset) throws IOException {
List<String> patterns = new ArrayList<>();
patterns.add(getCompleteTableName(dataset) + ComplianceConfigurationKeys.BACKUP);
HivePartitionVersionFinder finder = new HivePartitionVersionFinder(WriterUtils.getWriterFs(new State(this.state)), this.state, patterns);
List<HivePartitionVersion> versions = new ArrayList<>(finder.findDatasetVersions(dataset));
Preconditions.checkArgument(!versions.isEmpty(), "No versions to restore dataset " + dataset.datasetURN());
List<HivePartitionVersion> nonRestorableVersions = new ArrayList<>();
for (HivePartitionVersion version : versions) {
if (!isRestorable(dataset, version)) {
nonRestorableVersions.add(version);
}
}
versions.removeAll(nonRestorableVersions);
Preconditions.checkArgument(!versions.isEmpty(), "No versions to restore dataset " + dataset.datasetURN());
Collections.sort(versions);
// return the most recent restorable version
return new HivePartitionDataset(versions.get(0));
}
Aggregations