Search in sources :

Example 1 with PartitionDeregisterStep

use of org.apache.gobblin.hive.PartitionDeregisterStep in project incubator-gobblin by apache.

the class HiveCopyEntityHelper method addPartitionDeregisterSteps.

int addPartitionDeregisterSteps(List<CopyEntity> copyEntities, String fileSet, int initialPriority, Table table, Partition partition) throws IOException {
    int stepPriority = initialPriority;
    Collection<Path> partitionPaths = Lists.newArrayList();
    if (this.deleteMethod == DeregisterFileDeleteMethod.RECURSIVE) {
        partitionPaths = Lists.newArrayList(partition.getDataLocation());
    } else if (this.deleteMethod == DeregisterFileDeleteMethod.INPUT_FORMAT) {
        InputFormat<?, ?> inputFormat = HiveUtils.getInputFormat(partition.getTPartition().getSd());
        HiveLocationDescriptor targetLocation = new HiveLocationDescriptor(partition.getDataLocation(), inputFormat, this.targetFs, this.dataset.getProperties());
        partitionPaths = targetLocation.getPaths().keySet();
    } else if (this.deleteMethod == DeregisterFileDeleteMethod.NO_DELETE) {
        partitionPaths = Lists.newArrayList();
    }
    if (!partitionPaths.isEmpty()) {
        DeleteFileCommitStep deletePaths = DeleteFileCommitStep.fromPaths(this.targetFs, partitionPaths, this.dataset.getProperties(), table.getDataLocation());
        copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String>newHashMap(), deletePaths, stepPriority++));
    }
    PartitionDeregisterStep deregister = new PartitionDeregisterStep(table.getTTable(), partition.getTPartition(), this.targetURI, this.hiveRegProps);
    copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String>newHashMap(), deregister, stepPriority++));
    return stepPriority;
}
Also used : Path(org.apache.hadoop.fs.Path) PartitionDeregisterStep(org.apache.gobblin.hive.PartitionDeregisterStep) InputFormat(org.apache.hadoop.mapred.InputFormat) PostPublishStep(org.apache.gobblin.data.management.copy.entities.PostPublishStep) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) ToString(lombok.ToString)

Aggregations

ToString (lombok.ToString)1 PostPublishStep (org.apache.gobblin.data.management.copy.entities.PostPublishStep)1 PartitionDeregisterStep (org.apache.gobblin.hive.PartitionDeregisterStep)1 DeleteFileCommitStep (org.apache.gobblin.util.commit.DeleteFileCommitStep)1 Path (org.apache.hadoop.fs.Path)1 InputFormat (org.apache.hadoop.mapred.InputFormat)1