use of org.apache.gobblin.compliance.HivePartitionVersion in project incubator-gobblin by apache.
the class CleanableHivePartitionDataset method clean.
/**
* This method uses {@link HivePartitionVersionFinder} to list out versions
* corresponding to this dataset. It will then filter out versions using {@link HivePartitionVersionPolicy}.
*
* For each version there will be a corresponding {@link VersionCleaner} which will clean the version.
*/
@Override
public void clean() throws IOException {
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY);
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY);
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY), "Missing required property " + ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY);
List<String> patterns = new ArrayList<>();
patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.BACKUP);
patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.STAGING);
patterns.add(getCompleteTableName(this) + ComplianceConfigurationKeys.TRASH);
HivePartitionVersionFinder versionFinder = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionFinder.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_VERSION_FINDER_CLASS_KEY), this.fs, this.state, patterns);
List<HivePartitionVersion> versions = new ArrayList<>(versionFinder.findDatasetVersions(this));
HivePartitionVersionPolicy versionPolicy = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionPolicy.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_SELECTION_POLICY_CLASS_KEY), this.state, this);
List<HivePartitionVersion> deletableVersions = new ArrayList<>(versionPolicy.selectedList(versions));
List<String> nonDeletableVersionLocations = getNonDeletableVersionLocations(versions, deletableVersions);
for (HivePartitionVersion hivePartitionDatasetVersion : deletableVersions) {
try {
VersionCleaner versionCleaner = GobblinConstructorUtils.invokeConstructor(HivePartitionVersionRetentionRunner.class, this.state.getProp(ComplianceConfigurationKeys.RETENTION_VERSION_CLEANER_CLASS_KEY), this, hivePartitionDatasetVersion, nonDeletableVersionLocations, this.state);
versionCleaner.clean();
} catch (Exception e) {
log.warn("Caught exception trying to clean version " + hivePartitionDatasetVersion.datasetURN() + "\n" + e.getMessage());
}
}
}
use of org.apache.gobblin.compliance.HivePartitionVersion in project incubator-gobblin by apache.
the class LKGRestorePolicy method getDatasetToRestore.
/**
* @param dataset to restore
* @return most recent restorable dataset
*/
public HivePartitionDataset getDatasetToRestore(HivePartitionDataset dataset) throws IOException {
List<String> patterns = new ArrayList<>();
patterns.add(getCompleteTableName(dataset) + ComplianceConfigurationKeys.BACKUP);
HivePartitionVersionFinder finder = new HivePartitionVersionFinder(WriterUtils.getWriterFs(new State(this.state)), this.state, patterns);
List<HivePartitionVersion> versions = new ArrayList<>(finder.findDatasetVersions(dataset));
Preconditions.checkArgument(!versions.isEmpty(), "No versions to restore dataset " + dataset.datasetURN());
List<HivePartitionVersion> nonRestorableVersions = new ArrayList<>();
for (HivePartitionVersion version : versions) {
if (!isRestorable(dataset, version)) {
nonRestorableVersions.add(version);
}
}
versions.removeAll(nonRestorableVersions);
Preconditions.checkArgument(!versions.isEmpty(), "No versions to restore dataset " + dataset.datasetURN());
Collections.sort(versions);
// return the most recent restorable version
return new HivePartitionDataset(versions.get(0));
}
use of org.apache.gobblin.compliance.HivePartitionVersion in project incubator-gobblin by apache.
the class CleanableHivePartitionDataset method getNonDeletableVersionLocations.
private List<String> getNonDeletableVersionLocations(List<HivePartitionVersion> versions, List<HivePartitionVersion> deletableVersions) {
List<String> nonDeletableVersionLocations = new ArrayList<>();
for (HivePartitionVersion version : versions) {
if (!deletableVersions.contains(version)) {
nonDeletableVersionLocations.add(version.getLocation().toString());
}
}
nonDeletableVersionLocations.add(this.getLocation().toString());
return nonDeletableVersionLocations;
}
use of org.apache.gobblin.compliance.HivePartitionVersion in project incubator-gobblin by apache.
the class HivePartitionVersionRetentionCleanerPolicy method selectedList.
@Override
public List<HivePartitionVersion> selectedList(List<HivePartitionVersion> versions) {
if (versions.isEmpty()) {
return versions;
}
List<HivePartitionRetentionVersion> backupVersions = new ArrayList<>();
List<HivePartitionRetentionVersion> trashVersions = new ArrayList<>();
List<HivePartitionVersion> selectedVersions = new ArrayList<>();
for (HivePartitionVersion version : versions) {
String prefix = this.dataset.getDbName() + ComplianceConfigurationKeys.DBNAME_SEPARATOR;
if (!version.getTableName().startsWith(prefix)) {
continue;
}
if (version.getTableName().contains(ComplianceConfigurationKeys.BACKUP)) {
backupVersions.add((HivePartitionRetentionVersion) version);
}
if (version.getTableName().contains(ComplianceConfigurationKeys.TRASH)) {
trashVersions.add((HivePartitionRetentionVersion) version);
}
}
for (HivePartitionRetentionVersion version : trashVersions) {
long ageInDays = TimeUnit.MILLISECONDS.toDays(version.getAgeInMilliSeconds());
if (ageInDays >= this.trashRetentionDays) {
selectedVersions.add(version);
}
}
if (backupVersions.isEmpty()) {
return selectedVersions;
}
Collections.sort(backupVersions);
selectedVersions.addAll(backupVersions.subList(this.backupRetentionVersions, versions.size()));
if (this.backupRetentionVersions == 0) {
return selectedVersions;
}
for (HivePartitionRetentionVersion version : backupVersions.subList(0, this.backupRetentionVersions)) {
long ageInDays = TimeUnit.MILLISECONDS.toDays(version.getAgeInMilliSeconds());
if (ageInDays >= this.backupRetentionDays) {
selectedVersions.add(version);
}
}
return selectedVersions;
}
use of org.apache.gobblin.compliance.HivePartitionVersion in project incubator-gobblin by apache.
the class HivePartitionVersionRetentionReaperPolicy method selectedList.
@Override
public List<HivePartitionVersion> selectedList(List<HivePartitionVersion> versions) {
if (versions.isEmpty()) {
return versions;
}
Preconditions.checkArgument(versions.get(0) instanceof HivePartitionRetentionVersion);
List<HivePartitionVersion> selectedVersions = new ArrayList<>();
Collections.sort(versions);
for (HivePartitionVersion version : versions) {
if (shouldSelect(version)) {
selectedVersions.add(version);
}
}
return selectedVersions;
}
Aggregations