Search in sources :

Example 1 with HiveProxyQueryExecutor

use of org.apache.gobblin.compliance.HiveProxyQueryExecutor in project incubator-gobblin by apache.

the class PurgeableHivePartitionDataset method purge.

/**
 * This method is responsible for actual purging.
 *  - It first creates a staging table partition with the same schema as of original table partition.
 *  - Staging table partition is then populated by original table left outer joined with compliance id table.
 *
 *  - Alter query will then change the partition location to the staging partition location.
 *  - In flight queries won't get affected due to alter partition query.
 */
public void purge() throws IOException {
    this.datasetOwner = getOwner();
    State state = new State(this.state);
    this.datasetOwnerFs = ProxyUtils.getOwnerFs(state, this.datasetOwner);
    try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.datasetOwner)) {
        if (this.simulate) {
            log.info("Simulate is set to true. Wont't run actual queries");
            return;
        }
        String originalPartitionLocation = getOriginalPartitionLocation();
        // Create the staging table and staging partition
        queryExecutor.executeQueries(HivePurgerQueryTemplate.getCreateStagingTableQuery(this), this.datasetOwner);
        this.startTime = getLastModifiedTime(originalPartitionLocation);
        // Execute purge queries, that is insert filtered data into the staging partition
        queryExecutor.executeQueries(this.purgeQueries, this.datasetOwner);
        this.endTime = getLastModifiedTime(originalPartitionLocation);
        // Create a backup table and partition pointing to the original partition location
        queryExecutor.executeQueries(HivePurgerQueryTemplate.getBackupQueries(this), this.datasetOwner);
        String commitPolicyString = this.state.getProp(ComplianceConfigurationKeys.PURGER_COMMIT_POLICY_CLASS, ComplianceConfigurationKeys.DEFAULT_PURGER_COMMIT_POLICY_CLASS);
        CommitPolicy<PurgeableHivePartitionDataset> commitPolicy = GobblinConstructorUtils.invokeConstructor(CommitPolicy.class, commitPolicyString);
        if (!commitPolicy.shouldCommit(this)) {
            log.error("Last modified time before start of execution : " + this.startTime);
            log.error("Last modified time after execution of purge queries : " + this.endTime);
            throw new RuntimeException("Failed to commit. File modified during job run.");
        }
        // Alter the original table partition to start pointing to the cleaned-partition-location/staging-partition-location
        queryExecutor.executeQueries(HivePurgerQueryTemplate.getAlterOriginalPartitionLocationQueries(this), this.datasetOwner);
        // Drop the staging table
        queryExecutor.executeQueries(HivePurgerQueryTemplate.getDropStagingTableQuery(this), this.datasetOwner);
    } catch (SQLException e) {
        throw new IOException(e);
    }
}
Also used : SQLException(java.sql.SQLException) State(org.apache.gobblin.configuration.State) IOException(java.io.IOException) HiveProxyQueryExecutor(org.apache.gobblin.compliance.HiveProxyQueryExecutor)

Example 2 with HiveProxyQueryExecutor

use of org.apache.gobblin.compliance.HiveProxyQueryExecutor in project incubator-gobblin by apache.

the class HivePartitionVersionRetentionCleaner method clean.

/**
 * If simulate is set to true, this will simply return.
 * If version is pointing to an empty location, drop the partition and close the jdbc connection.
 * If version is pointing to the same location as of the dataset, then drop the partition and close the jdbc connection.
 * If version is pointing to the non deletable version locations, then drop the partition and close the jdbc connection.
 * Otherwise delete the data underneath, drop the partition and close the jdbc connection.
 */
@Override
public void clean() throws IOException {
    Path versionLocation = ((HivePartitionRetentionVersion) this.datasetVersion).getLocation();
    Path datasetLocation = ((CleanableHivePartitionDataset) this.cleanableDataset).getLocation();
    String completeName = ((HivePartitionRetentionVersion) this.datasetVersion).datasetURN();
    State state = new State(this.state);
    this.fs = ProxyUtils.getOwnerFs(state, this.versionOwner);
    try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.versionOwner)) {
        log.info("Trying to clean version " + completeName);
        if (!this.fs.exists(versionLocation)) {
            log.info("Data versionLocation doesn't exist. Metadata will be dropped for the version  " + completeName);
        } else if (datasetLocation.toString().equalsIgnoreCase(versionLocation.toString())) {
            log.info("Dataset location is same as version location. Won't delete the data but metadata will be dropped for the version " + completeName);
        } else if (this.nonDeletableVersionLocations.contains(versionLocation.toString())) {
            log.info("This version corresponds to the non deletable version. Won't delete the data but metadata will be dropped for the version " + completeName);
        } else if (HadoopUtils.hasContent(this.fs, versionLocation)) {
            if (this.simulate) {
                log.info("Simulate is set to true. Won't delete the partition " + completeName);
                return;
            }
            log.info("Deleting data from the version " + completeName);
            this.fs.delete(versionLocation, true);
        }
        executeDropVersionQueries(queryExecutor);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) State(org.apache.gobblin.configuration.State) HiveProxyQueryExecutor(org.apache.gobblin.compliance.HiveProxyQueryExecutor)

Example 3 with HiveProxyQueryExecutor

use of org.apache.gobblin.compliance.HiveProxyQueryExecutor in project incubator-gobblin by apache.

the class RestorableHivePartitionDataset method restore.

public void restore() throws IOException {
    State state = new State(this.state);
    this.datasetOwnerFs = ProxyUtils.getOwnerFs(state, this.datasetOwner);
    try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.datasetOwner, this.datasetToRestoreOwner, this.trashOwner)) {
        if (this.state.getPropAsBoolean(ComplianceConfigurationKeys.COMPLIANCE_JOB_SIMULATE, ComplianceConfigurationKeys.DEFAULT_COMPLIANCE_JOB_SIMULATE)) {
            log.info("Simulating restore of " + datasetURN() + " with " + this.datasetToRestore.datasetURN());
            return;
        }
        Path trashPartitionLocation = getTrashPartitionLocation();
        executeTrashTableQueries(queryExecutor);
        this.datasetOwnerFs.mkdirs(trashPartitionLocation.getParent());
        this.datasetOwnerFs.rename(getLocation(), trashPartitionLocation);
        FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE);
        HadoopUtils.setPermissions(trashPartitionLocation.getParent(), this.datasetOwner, this.trashOwner, this.datasetOwnerFs, permission);
        log.info("Moved dataset " + datasetURN() + " from " + getLocation() + " to trash location " + trashPartitionLocation);
        fsMove(this.datasetToRestore.getLocation(), getLocation());
        HadoopUtils.setPermissions(getLocation().getParent(), this.datasetOwner, this.trashOwner, this.datasetOwnerFs, permission);
        log.info("Moved data from backup " + this.datasetToRestore.getLocation() + " to location " + getLocation());
        executeDropPartitionQueries(queryExecutor);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) State(org.apache.gobblin.configuration.State) FsPermission(org.apache.hadoop.fs.permission.FsPermission) HiveProxyQueryExecutor(org.apache.gobblin.compliance.HiveProxyQueryExecutor)

Example 4 with HiveProxyQueryExecutor

use of org.apache.gobblin.compliance.HiveProxyQueryExecutor in project incubator-gobblin by apache.

the class ComplianceRetentionJob method executeDropTableQuery.

private static void executeDropTableQuery(HiveDataset hiveDataset, Properties properties) throws IOException {
    String dbName = hiveDataset.getTable().getDbName();
    String tableName = hiveDataset.getTable().getTableName();
    Optional<String> datasetOwner = Optional.fromNullable(hiveDataset.getTable().getOwner());
    try (HiveProxyQueryExecutor hiveProxyQueryExecutor = ProxyUtils.getQueryExecutor(new State(properties), datasetOwner)) {
        hiveProxyQueryExecutor.executeQuery(HivePurgerQueryTemplate.getDropTableQuery(dbName, tableName), datasetOwner);
    } catch (SQLException e) {
        throw new IOException(e);
    }
}
Also used : SQLException(java.sql.SQLException) State(org.apache.gobblin.configuration.State) IOException(java.io.IOException) HiveProxyQueryExecutor(org.apache.gobblin.compliance.HiveProxyQueryExecutor)

Example 5 with HiveProxyQueryExecutor

use of org.apache.gobblin.compliance.HiveProxyQueryExecutor in project incubator-gobblin by apache.

the class HivePartitionVersionRetentionReaper method clean.

/**
 * If simulate is set to true, will simply return.
 * If a version is pointing to a non-existing location, then drop the partition and close the jdbc connection.
 * If a version is pointing to the same location as of the dataset, then drop the partition and close the jdbc connection.
 * If a version is staging, it's data will be deleted and metadata is dropped.
 * IF a versions is backup, it's data will be moved to a backup dir, current metadata will be dropped and it will
 * be registered in the backup db.
 */
@Override
public void clean() throws IOException {
    Path versionLocation = ((HivePartitionRetentionVersion) this.datasetVersion).getLocation();
    Path datasetLocation = ((CleanableHivePartitionDataset) this.cleanableDataset).getLocation();
    String completeName = ((HivePartitionRetentionVersion) this.datasetVersion).datasetURN();
    State state = new State(this.state);
    this.versionOwnerFs = ProxyUtils.getOwnerFs(state, this.versionOwner);
    try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.versionOwner, this.backUpOwner)) {
        if (!this.versionOwnerFs.exists(versionLocation)) {
            log.info("Data versionLocation doesn't exist. Metadata will be dropped for the version  " + completeName);
        } else if (datasetLocation.toString().equalsIgnoreCase(versionLocation.toString())) {
            log.info("Dataset location is same as version location. Won't delete the data but metadata will be dropped for the version " + completeName);
        } else if (this.simulate) {
            log.info("Simulate is set to true. Won't move the version " + completeName);
            return;
        } else if (completeName.contains(ComplianceConfigurationKeys.STAGING)) {
            log.info("Deleting data from version " + completeName);
            this.versionOwnerFs.delete(versionLocation, true);
        } else if (completeName.contains(ComplianceConfigurationKeys.BACKUP)) {
            executeAlterQueries(queryExecutor);
            Path newVersionLocationParent = getNewVersionLocation().getParent();
            log.info("Creating new dir " + newVersionLocationParent.toString());
            this.versionOwnerFs.mkdirs(newVersionLocationParent);
            log.info("Moving data from " + versionLocation + " to " + getNewVersionLocation());
            fsMove(versionLocation, getNewVersionLocation());
            FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE);
            HadoopUtils.setPermissions(newVersionLocationParent, this.versionOwner, this.backUpOwner, this.versionOwnerFs, permission);
        }
        executeDropVersionQueries(queryExecutor);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) State(org.apache.gobblin.configuration.State) FsPermission(org.apache.hadoop.fs.permission.FsPermission) HiveProxyQueryExecutor(org.apache.gobblin.compliance.HiveProxyQueryExecutor)

Aggregations

HiveProxyQueryExecutor (org.apache.gobblin.compliance.HiveProxyQueryExecutor)5 State (org.apache.gobblin.configuration.State)5 Path (org.apache.hadoop.fs.Path)3 IOException (java.io.IOException)2 SQLException (java.sql.SQLException)2 FsPermission (org.apache.hadoop.fs.permission.FsPermission)2