use of org.apache.gobblin.compliance.HiveProxyQueryExecutor in project incubator-gobblin by apache.
the class PurgeableHivePartitionDataset method purge.
/**
* This method is responsible for actual purging.
* - It first creates a staging table partition with the same schema as of original table partition.
* - Staging table partition is then populated by original table left outer joined with compliance id table.
*
* - Alter query will then change the partition location to the staging partition location.
* - In flight queries won't get affected due to alter partition query.
*/
public void purge() throws IOException {
this.datasetOwner = getOwner();
State state = new State(this.state);
this.datasetOwnerFs = ProxyUtils.getOwnerFs(state, this.datasetOwner);
try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.datasetOwner)) {
if (this.simulate) {
log.info("Simulate is set to true. Wont't run actual queries");
return;
}
String originalPartitionLocation = getOriginalPartitionLocation();
// Create the staging table and staging partition
queryExecutor.executeQueries(HivePurgerQueryTemplate.getCreateStagingTableQuery(this), this.datasetOwner);
this.startTime = getLastModifiedTime(originalPartitionLocation);
// Execute purge queries, that is insert filtered data into the staging partition
queryExecutor.executeQueries(this.purgeQueries, this.datasetOwner);
this.endTime = getLastModifiedTime(originalPartitionLocation);
// Create a backup table and partition pointing to the original partition location
queryExecutor.executeQueries(HivePurgerQueryTemplate.getBackupQueries(this), this.datasetOwner);
String commitPolicyString = this.state.getProp(ComplianceConfigurationKeys.PURGER_COMMIT_POLICY_CLASS, ComplianceConfigurationKeys.DEFAULT_PURGER_COMMIT_POLICY_CLASS);
CommitPolicy<PurgeableHivePartitionDataset> commitPolicy = GobblinConstructorUtils.invokeConstructor(CommitPolicy.class, commitPolicyString);
if (!commitPolicy.shouldCommit(this)) {
log.error("Last modified time before start of execution : " + this.startTime);
log.error("Last modified time after execution of purge queries : " + this.endTime);
throw new RuntimeException("Failed to commit. File modified during job run.");
}
// Alter the original table partition to start pointing to the cleaned-partition-location/staging-partition-location
queryExecutor.executeQueries(HivePurgerQueryTemplate.getAlterOriginalPartitionLocationQueries(this), this.datasetOwner);
// Drop the staging table
queryExecutor.executeQueries(HivePurgerQueryTemplate.getDropStagingTableQuery(this), this.datasetOwner);
} catch (SQLException e) {
throw new IOException(e);
}
}
use of org.apache.gobblin.compliance.HiveProxyQueryExecutor in project incubator-gobblin by apache.
the class HivePartitionVersionRetentionCleaner method clean.
/**
* If simulate is set to true, this will simply return.
* If version is pointing to an empty location, drop the partition and close the jdbc connection.
* If version is pointing to the same location as of the dataset, then drop the partition and close the jdbc connection.
* If version is pointing to the non deletable version locations, then drop the partition and close the jdbc connection.
* Otherwise delete the data underneath, drop the partition and close the jdbc connection.
*/
@Override
public void clean() throws IOException {
Path versionLocation = ((HivePartitionRetentionVersion) this.datasetVersion).getLocation();
Path datasetLocation = ((CleanableHivePartitionDataset) this.cleanableDataset).getLocation();
String completeName = ((HivePartitionRetentionVersion) this.datasetVersion).datasetURN();
State state = new State(this.state);
this.fs = ProxyUtils.getOwnerFs(state, this.versionOwner);
try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.versionOwner)) {
log.info("Trying to clean version " + completeName);
if (!this.fs.exists(versionLocation)) {
log.info("Data versionLocation doesn't exist. Metadata will be dropped for the version " + completeName);
} else if (datasetLocation.toString().equalsIgnoreCase(versionLocation.toString())) {
log.info("Dataset location is same as version location. Won't delete the data but metadata will be dropped for the version " + completeName);
} else if (this.nonDeletableVersionLocations.contains(versionLocation.toString())) {
log.info("This version corresponds to the non deletable version. Won't delete the data but metadata will be dropped for the version " + completeName);
} else if (HadoopUtils.hasContent(this.fs, versionLocation)) {
if (this.simulate) {
log.info("Simulate is set to true. Won't delete the partition " + completeName);
return;
}
log.info("Deleting data from the version " + completeName);
this.fs.delete(versionLocation, true);
}
executeDropVersionQueries(queryExecutor);
}
}
use of org.apache.gobblin.compliance.HiveProxyQueryExecutor in project incubator-gobblin by apache.
the class RestorableHivePartitionDataset method restore.
public void restore() throws IOException {
State state = new State(this.state);
this.datasetOwnerFs = ProxyUtils.getOwnerFs(state, this.datasetOwner);
try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.datasetOwner, this.datasetToRestoreOwner, this.trashOwner)) {
if (this.state.getPropAsBoolean(ComplianceConfigurationKeys.COMPLIANCE_JOB_SIMULATE, ComplianceConfigurationKeys.DEFAULT_COMPLIANCE_JOB_SIMULATE)) {
log.info("Simulating restore of " + datasetURN() + " with " + this.datasetToRestore.datasetURN());
return;
}
Path trashPartitionLocation = getTrashPartitionLocation();
executeTrashTableQueries(queryExecutor);
this.datasetOwnerFs.mkdirs(trashPartitionLocation.getParent());
this.datasetOwnerFs.rename(getLocation(), trashPartitionLocation);
FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE);
HadoopUtils.setPermissions(trashPartitionLocation.getParent(), this.datasetOwner, this.trashOwner, this.datasetOwnerFs, permission);
log.info("Moved dataset " + datasetURN() + " from " + getLocation() + " to trash location " + trashPartitionLocation);
fsMove(this.datasetToRestore.getLocation(), getLocation());
HadoopUtils.setPermissions(getLocation().getParent(), this.datasetOwner, this.trashOwner, this.datasetOwnerFs, permission);
log.info("Moved data from backup " + this.datasetToRestore.getLocation() + " to location " + getLocation());
executeDropPartitionQueries(queryExecutor);
}
}
use of org.apache.gobblin.compliance.HiveProxyQueryExecutor in project incubator-gobblin by apache.
the class ComplianceRetentionJob method executeDropTableQuery.
private static void executeDropTableQuery(HiveDataset hiveDataset, Properties properties) throws IOException {
String dbName = hiveDataset.getTable().getDbName();
String tableName = hiveDataset.getTable().getTableName();
Optional<String> datasetOwner = Optional.fromNullable(hiveDataset.getTable().getOwner());
try (HiveProxyQueryExecutor hiveProxyQueryExecutor = ProxyUtils.getQueryExecutor(new State(properties), datasetOwner)) {
hiveProxyQueryExecutor.executeQuery(HivePurgerQueryTemplate.getDropTableQuery(dbName, tableName), datasetOwner);
} catch (SQLException e) {
throw new IOException(e);
}
}
use of org.apache.gobblin.compliance.HiveProxyQueryExecutor in project incubator-gobblin by apache.
the class HivePartitionVersionRetentionReaper method clean.
/**
* If simulate is set to true, will simply return.
* If a version is pointing to a non-existing location, then drop the partition and close the jdbc connection.
* If a version is pointing to the same location as of the dataset, then drop the partition and close the jdbc connection.
* If a version is staging, it's data will be deleted and metadata is dropped.
* IF a versions is backup, it's data will be moved to a backup dir, current metadata will be dropped and it will
* be registered in the backup db.
*/
@Override
public void clean() throws IOException {
Path versionLocation = ((HivePartitionRetentionVersion) this.datasetVersion).getLocation();
Path datasetLocation = ((CleanableHivePartitionDataset) this.cleanableDataset).getLocation();
String completeName = ((HivePartitionRetentionVersion) this.datasetVersion).datasetURN();
State state = new State(this.state);
this.versionOwnerFs = ProxyUtils.getOwnerFs(state, this.versionOwner);
try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.versionOwner, this.backUpOwner)) {
if (!this.versionOwnerFs.exists(versionLocation)) {
log.info("Data versionLocation doesn't exist. Metadata will be dropped for the version " + completeName);
} else if (datasetLocation.toString().equalsIgnoreCase(versionLocation.toString())) {
log.info("Dataset location is same as version location. Won't delete the data but metadata will be dropped for the version " + completeName);
} else if (this.simulate) {
log.info("Simulate is set to true. Won't move the version " + completeName);
return;
} else if (completeName.contains(ComplianceConfigurationKeys.STAGING)) {
log.info("Deleting data from version " + completeName);
this.versionOwnerFs.delete(versionLocation, true);
} else if (completeName.contains(ComplianceConfigurationKeys.BACKUP)) {
executeAlterQueries(queryExecutor);
Path newVersionLocationParent = getNewVersionLocation().getParent();
log.info("Creating new dir " + newVersionLocationParent.toString());
this.versionOwnerFs.mkdirs(newVersionLocationParent);
log.info("Moving data from " + versionLocation + " to " + getNewVersionLocation());
fsMove(versionLocation, getNewVersionLocation());
FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE);
HadoopUtils.setPermissions(newVersionLocationParent, this.versionOwner, this.backUpOwner, this.versionOwnerFs, permission);
}
executeDropVersionQueries(queryExecutor);
}
}
Aggregations