Search in sources :

Example 1 with PrePublishStep

use of org.apache.gobblin.data.management.copy.entities.PrePublishStep in project incubator-gobblin by apache.

the class UnpartitionedTableFileSet method generateCopyEntities.

// Suppress warnings for "stepPriority++" in the PrePublishStep constructor, as stepPriority may be used later
@SuppressFBWarnings("DLS_DEAD_LOCAL_STORE")
@Override
protected Collection<CopyEntity> generateCopyEntities() throws IOException {
    MultiTimingEvent multiTimer = new MultiTimingEvent(this.helper.getEventSubmitter(), "TableCopy", true);
    int stepPriority = 0;
    String fileSet = getTable().getTableName();
    List<CopyEntity> copyEntities = Lists.newArrayList();
    Optional<Table> existingTargetTable = this.helper.getExistingTargetTable();
    if (existingTargetTable.isPresent()) {
        if (!this.helper.getTargetTable().getDataLocation().equals(existingTargetTable.get().getDataLocation())) {
            switch(this.helper.getExistingEntityPolicy()) {
                case UPDATE_TABLE:
                    // Update the location of files while keep the existing table entity.
                    log.warn("Source table will not be deregistered while file locaiton has been changed, update source table's" + " file location to" + this.helper.getTargetTable().getDataLocation());
                    existingTargetTable = Optional.absent();
                    break;
                case REPLACE_TABLE:
                case REPLACE_TABLE_AND_PARTITIONS:
                    // Required to de-register the original table.
                    log.warn("Source and target table are not compatible. Will override target table " + existingTargetTable.get().getDataLocation());
                    stepPriority = this.helper.addTableDeregisterSteps(copyEntities, fileSet, stepPriority, this.helper.getTargetTable());
                    existingTargetTable = Optional.absent();
                    break;
                default:
                    log.error("Source and target table are not compatible. Aborting copy of table " + this.helper.getTargetTable(), new HiveTableLocationNotMatchException(this.helper.getTargetTable().getDataLocation(), existingTargetTable.get().getDataLocation()));
                    multiTimer.close();
                    return Lists.newArrayList();
            }
        }
    }
    stepPriority = this.helper.addSharedSteps(copyEntities, fileSet, stepPriority);
    HiveLocationDescriptor sourceLocation = HiveLocationDescriptor.forTable(getTable(), getHiveDataset().getFs(), getHiveDataset().getProperties());
    HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forTable(this.helper.getTargetTable(), this.helper.getTargetFs(), getHiveDataset().getProperties());
    Optional<HiveLocationDescriptor> existingTargetLocation = existingTargetTable.isPresent() ? Optional.of(HiveLocationDescriptor.forTable(existingTargetTable.get(), this.helper.getTargetFs(), getHiveDataset().getProperties())) : Optional.<HiveLocationDescriptor>absent();
    if (this.helper.getFastTableSkip().isPresent() && this.helper.getFastTableSkip().get().apply(this.helper)) {
        log.info(String.format("Skipping copy of table %s due to fast table skip predicate.", getTable().getDbName() + "." + getTable().getTableName()));
        multiTimer.close();
        return Lists.newArrayList();
    }
    HiveCopyEntityHelper.DiffPathSet diffPathSet = HiveCopyEntityHelper.fullPathDiff(sourceLocation, desiredTargetLocation, existingTargetLocation, Optional.<Partition>absent(), multiTimer, this.helper);
    multiTimer.nextStage(HiveCopyEntityHelper.Stages.FULL_PATH_DIFF);
    // Could used to delete files for the existing snapshot
    DeleteFileCommitStep deleteStep = DeleteFileCommitStep.fromPaths(this.helper.getTargetFs(), diffPathSet.pathsToDelete, getHiveDataset().getProperties());
    copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String>newHashMap(), deleteStep, stepPriority++));
    for (CopyableFile.Builder builder : this.helper.getCopyableFilesFromPaths(diffPathSet.filesToCopy, this.helper.getConfiguration(), Optional.<Partition>absent())) {
        CopyableFile fileEntity = builder.fileSet(fileSet).datasetOutputPath(desiredTargetLocation.location.toString()).build();
        this.helper.setCopyableFileDatasets(fileEntity);
        copyEntities.add(fileEntity);
    }
    multiTimer.close();
    return copyEntities;
}
Also used : Table(org.apache.hadoop.hive.ql.metadata.Table) CopyEntity(org.apache.gobblin.data.management.copy.CopyEntity) MultiTimingEvent(org.apache.gobblin.metrics.event.MultiTimingEvent) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) PrePublishStep(org.apache.gobblin.data.management.copy.entities.PrePublishStep) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings)

Example 2 with PrePublishStep

use of org.apache.gobblin.data.management.copy.entities.PrePublishStep in project incubator-gobblin by apache.

the class HivePartitionFileSet method generateCopyEntities.

@Override
protected Collection<CopyEntity> generateCopyEntities() throws IOException {
    try (Closer closer = Closer.create()) {
        MultiTimingEvent multiTimer = closer.register(new MultiTimingEvent(this.eventSubmitter, "PartitionCopy", true));
        int stepPriority = 0;
        String fileSet = HiveCopyEntityHelper.gson.toJson(this.partition.getValues());
        List<CopyEntity> copyEntities = Lists.newArrayList();
        stepPriority = hiveCopyEntityHelper.addSharedSteps(copyEntities, fileSet, stepPriority);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.COMPUTE_TARGETS);
        Path targetPath = hiveCopyEntityHelper.getTargetLocation(hiveCopyEntityHelper.getDataset().fs, hiveCopyEntityHelper.getTargetFs(), this.partition.getDataLocation(), Optional.of(this.partition));
        Partition targetPartition = getTargetPartition(this.partition, targetPath);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.EXISTING_PARTITION);
        if (this.existingTargetPartition.isPresent()) {
            hiveCopyEntityHelper.getTargetPartitions().remove(this.partition.getValues());
            try {
                checkPartitionCompatibility(targetPartition, this.existingTargetPartition.get());
            } catch (IOException ioe) {
                if (hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_PARTITIONS && hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_TABLE_AND_PARTITIONS) {
                    log.error("Source and target partitions are not compatible. Aborting copy of partition " + this.partition, ioe);
                    return Lists.newArrayList();
                }
                log.warn("Source and target partitions are not compatible. Will override target partition: " + ioe.getMessage());
                log.debug("Incompatibility details: ", ioe);
                stepPriority = hiveCopyEntityHelper.addPartitionDeregisterSteps(copyEntities, fileSet, stepPriority, hiveCopyEntityHelper.getTargetTable(), this.existingTargetPartition.get());
                this.existingTargetPartition = Optional.absent();
            }
        }
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.PARTITION_SKIP_PREDICATE);
        if (hiveCopyEntityHelper.getFastPartitionSkip().isPresent() && hiveCopyEntityHelper.getFastPartitionSkip().get().apply(this)) {
            log.info(String.format("Skipping copy of partition %s due to fast partition skip predicate.", this.partition.getCompleteName()));
            return Lists.newArrayList();
        }
        HiveSpec partitionHiveSpec = new SimpleHiveSpec.Builder<>(targetPath).withTable(HiveMetaStoreUtils.getHiveTable(hiveCopyEntityHelper.getTargetTable().getTTable())).withPartition(Optional.of(HiveMetaStoreUtils.getHivePartition(targetPartition.getTPartition()))).build();
        HiveRegisterStep register = new HiveRegisterStep(hiveCopyEntityHelper.getTargetURI(), partitionHiveSpec, hiveCopyEntityHelper.getHiveRegProps());
        copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String>newHashMap(), register, stepPriority++));
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_LOCATIONS);
        HiveLocationDescriptor sourceLocation = HiveLocationDescriptor.forPartition(this.partition, hiveCopyEntityHelper.getDataset().fs, this.properties);
        HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forPartition(targetPartition, hiveCopyEntityHelper.getTargetFs(), this.properties);
        Optional<HiveLocationDescriptor> existingTargetLocation = this.existingTargetPartition.isPresent() ? Optional.of(HiveLocationDescriptor.forPartition(this.existingTargetPartition.get(), hiveCopyEntityHelper.getTargetFs(), this.properties)) : Optional.<HiveLocationDescriptor>absent();
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.FULL_PATH_DIFF);
        HiveCopyEntityHelper.DiffPathSet diffPathSet = HiveCopyEntityHelper.fullPathDiff(sourceLocation, desiredTargetLocation, existingTargetLocation, Optional.<Partition>absent(), multiTimer, hiveCopyEntityHelper);
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_DELETE_UNITS);
        if (diffPathSet.pathsToDelete.size() > 0) {
            DeleteFileCommitStep deleteStep = DeleteFileCommitStep.fromPaths(hiveCopyEntityHelper.getTargetFs(), diffPathSet.pathsToDelete, hiveCopyEntityHelper.getDataset().properties);
            copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String>newHashMap(), deleteStep, stepPriority++));
        }
        multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_COPY_UNITS);
        for (CopyableFile.Builder builder : hiveCopyEntityHelper.getCopyableFilesFromPaths(diffPathSet.filesToCopy, hiveCopyEntityHelper.getConfiguration(), Optional.of(this.partition))) {
            CopyableFile fileEntity = builder.fileSet(fileSet).checksum(new byte[0]).datasetOutputPath(desiredTargetLocation.location.toString()).build();
            this.hiveCopyEntityHelper.setCopyableFileDatasets(fileEntity);
            copyEntities.add(fileEntity);
        }
        log.info("Created {} copy entities for partition {}", copyEntities.size(), this.partition.getCompleteName());
        return copyEntities;
    }
}
Also used : Closer(com.google.common.io.Closer) Path(org.apache.hadoop.fs.Path) Partition(org.apache.hadoop.hive.ql.metadata.Partition) CopyEntity(org.apache.gobblin.data.management.copy.CopyEntity) PostPublishStep(org.apache.gobblin.data.management.copy.entities.PostPublishStep) MultiTimingEvent(org.apache.gobblin.metrics.event.MultiTimingEvent) IOException(java.io.IOException) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) HiveRegisterStep(org.apache.gobblin.hive.HiveRegisterStep) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) PrePublishStep(org.apache.gobblin.data.management.copy.entities.PrePublishStep) HiveSpec(org.apache.gobblin.hive.spec.HiveSpec) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec)

Example 3 with PrePublishStep

use of org.apache.gobblin.data.management.copy.entities.PrePublishStep in project incubator-gobblin by apache.

the class ConfigBasedDatasetTest method testGetCopyableFiles.

@Test
public void testGetCopyableFiles() throws Exception {
    String sourceDir = getClass().getClassLoader().getResource("configBasedDatasetTest/src").getFile();
    String destinationDir = getClass().getClassLoader().getResource("configBasedDatasetTest/dest").getFile();
    long sourceWatermark = 100L;
    Collection<? extends CopyEntity> copyableFiles = testGetCopyableFilesHelper(sourceDir, destinationDir, sourceWatermark, false);
    Assert.assertEquals(copyableFiles.size(), 8);
    copyableFiles = testGetCopyableFilesHelper(sourceDir, destinationDir, sourceWatermark, true);
    Assert.assertEquals(copyableFiles.size(), 6);
    Set<Path> paths = Sets.newHashSet(new Path("dir1/file2"), new Path("dir1/file1"), new Path("dir2/file1"), new Path("dir2/file3"));
    for (CopyEntity copyEntity : copyableFiles) {
        if (copyEntity instanceof CopyableFile) {
            CopyableFile file = (CopyableFile) copyEntity;
            Path originRelativePath = PathUtils.relativizePath(PathUtils.getPathWithoutSchemeAndAuthority(file.getOrigin().getPath()), PathUtils.getPathWithoutSchemeAndAuthority(new Path(sourceDir)));
            Path targetRelativePath = PathUtils.relativizePath(PathUtils.getPathWithoutSchemeAndAuthority(file.getDestination()), PathUtils.getPathWithoutSchemeAndAuthority(new Path(destinationDir)));
            Assert.assertTrue(paths.contains(originRelativePath));
            Assert.assertTrue(paths.contains(targetRelativePath));
            Assert.assertEquals(originRelativePath, targetRelativePath);
        } else if (copyEntity instanceof PrePublishStep) {
            PrePublishStep pre = (PrePublishStep) copyEntity;
            Assert.assertTrue(pre.getStep() instanceof DeleteFileCommitStep);
            // need to delete this file
            Assert.assertTrue(pre.explain().indexOf("configBasedDatasetTest/dest/dir1/file1") > 0);
        } else if (copyEntity instanceof PostPublishStep) {
            PostPublishStep post = (PostPublishStep) copyEntity;
            Assert.assertTrue(post.getStep() instanceof WatermarkMetadataGenerationCommitStep);
            Assert.assertTrue(post.explain().indexOf("dest/_metadata") > 0 && post.explain().indexOf("" + sourceWatermark) > 0);
        } else {
            throw new Exception("Wrong type");
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CopyEntity(org.apache.gobblin.data.management.copy.CopyEntity) PostPublishStep(org.apache.gobblin.data.management.copy.entities.PostPublishStep) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) PrePublishStep(org.apache.gobblin.data.management.copy.entities.PrePublishStep) Test(org.testng.annotations.Test)

Example 4 with PrePublishStep

use of org.apache.gobblin.data.management.copy.entities.PrePublishStep in project incubator-gobblin by apache.

the class ConfigBasedDataset method getCopyableFiles.

@Override
public Collection<? extends CopyEntity> getCopyableFiles(FileSystem targetFs, CopyConfiguration copyConfiguration) throws IOException {
    List<CopyEntity> copyableFiles = Lists.newArrayList();
    EndPoint copyFromRaw = copyRoute.getCopyFrom();
    EndPoint copyToRaw = copyRoute.getCopyTo();
    if (!(copyFromRaw instanceof HadoopFsEndPoint && copyToRaw instanceof HadoopFsEndPoint)) {
        log.warn("Currently only handle the Hadoop Fs EndPoint replication");
        return copyableFiles;
    }
    // For {@link HadoopFsEndPoint}s, set pathfilter and applyFilterToDirectories
    HadoopFsEndPoint copyFrom = (HadoopFsEndPoint) copyFromRaw;
    HadoopFsEndPoint copyTo = (HadoopFsEndPoint) copyToRaw;
    copyFrom.setPathFilter(pathFilter);
    copyFrom.setApplyFilterToDirectories(applyFilterToDirectories);
    copyTo.setPathFilter(pathFilter);
    copyTo.setApplyFilterToDirectories(applyFilterToDirectories);
    if (this.watermarkEnabled) {
        if ((!copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent()) || (copyFromRaw.getWatermark().isPresent() && copyToRaw.getWatermark().isPresent() && copyFromRaw.getWatermark().get().compareTo(copyToRaw.getWatermark().get()) <= 0)) {
            log.info("No need to copy as destination watermark >= source watermark with source watermark {}, for dataset with metadata {}", copyFromRaw.getWatermark().isPresent() ? copyFromRaw.getWatermark().get().toJson() : "N/A", this.rc.getMetaData());
            return copyableFiles;
        }
    }
    Configuration conf = HadoopUtils.newConfiguration();
    FileSystem copyFromFs = FileSystem.get(copyFrom.getFsURI(), conf);
    FileSystem copyToFs = FileSystem.get(copyTo.getFsURI(), conf);
    Collection<FileStatus> allFilesInSource = copyFrom.getFiles();
    Collection<FileStatus> allFilesInTarget = copyTo.getFiles();
    Set<FileStatus> copyFromFileStatuses = Sets.newHashSet(allFilesInSource);
    Map<Path, FileStatus> copyToFileMap = Maps.newHashMap();
    for (FileStatus f : allFilesInTarget) {
        copyToFileMap.put(PathUtils.getPathWithoutSchemeAndAuthority(f.getPath()), f);
    }
    Collection<Path> deletedPaths = Lists.newArrayList();
    boolean watermarkMetadataCopied = false;
    boolean deleteTargetIfNotExistOnSource = rc.isDeleteTargetIfNotExistOnSource();
    for (FileStatus originFileStatus : copyFromFileStatuses) {
        Path relative = PathUtils.relativizePath(PathUtils.getPathWithoutSchemeAndAuthority(originFileStatus.getPath()), PathUtils.getPathWithoutSchemeAndAuthority(copyFrom.getDatasetPath()));
        // construct the new path in the target file system
        Path newPath = new Path(copyTo.getDatasetPath(), relative);
        if (relative.toString().equals(ReplicaHadoopFsEndPoint.WATERMARK_FILE)) {
            watermarkMetadataCopied = true;
        }
        // skip copy same file
        if (copyToFileMap.containsKey(newPath) && copyToFileMap.get(newPath).getLen() == originFileStatus.getLen() && copyToFileMap.get(newPath).getModificationTime() > originFileStatus.getModificationTime()) {
            log.debug("Copy from timestamp older than copy to timestamp, skipped copy {} for dataset with metadata {}", originFileStatus.getPath(), this.rc.getMetaData());
        } else {
            // need to remove those files in the target File System
            if (copyToFileMap.containsKey(newPath)) {
                deletedPaths.add(newPath);
            }
            CopyableFile copyableFile = CopyableFile.fromOriginAndDestination(copyFromFs, originFileStatus, copyToFs.makeQualified(newPath), copyConfiguration).fileSet(PathUtils.getPathWithoutSchemeAndAuthority(copyTo.getDatasetPath()).toString()).build();
            copyableFile.setFsDatasets(copyFromFs, copyToFs);
            copyableFiles.add(copyableFile);
        }
        // clean up already checked paths
        copyToFileMap.remove(newPath);
    }
    // delete the paths on target directory if NOT exists on source
    if (deleteTargetIfNotExistOnSource) {
        deletedPaths.addAll(copyToFileMap.keySet());
    }
    // delete old files first
    if (!deletedPaths.isEmpty()) {
        DeleteFileCommitStep deleteCommitStep = DeleteFileCommitStep.fromPaths(copyToFs, deletedPaths, this.props);
        copyableFiles.add(new PrePublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(), deleteCommitStep, 0));
    }
    // generate the watermark file even if watermark checking is disabled. Make sure it can come into functional once disired.
    if ((!watermarkMetadataCopied) && copyFrom.getWatermark().isPresent()) {
        copyableFiles.add(new PostPublishStep(copyTo.getDatasetPath().toString(), Maps.<String, String>newHashMap(), new WatermarkMetadataGenerationCommitStep(copyTo.getFsURI().toString(), copyTo.getDatasetPath(), copyFrom.getWatermark().get()), 1));
    }
    return copyableFiles;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) CopyConfiguration(org.apache.gobblin.data.management.copy.CopyConfiguration) CopyEntity(org.apache.gobblin.data.management.copy.CopyEntity) PostPublishStep(org.apache.gobblin.data.management.copy.entities.PostPublishStep) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) FileSystem(org.apache.hadoop.fs.FileSystem) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) PrePublishStep(org.apache.gobblin.data.management.copy.entities.PrePublishStep)

Example 5 with PrePublishStep

use of org.apache.gobblin.data.management.copy.entities.PrePublishStep in project incubator-gobblin by apache.

the class RecursiveCopyableDataset method getCopyableFiles.

@Override
public Collection<? extends CopyEntity> getCopyableFiles(FileSystem targetFs, CopyConfiguration configuration) throws IOException {
    Path nonGlobSearchPath = PathUtils.deepestNonGlobPath(this.glob);
    Path targetPath = new Path(configuration.getPublishDir(), PathUtils.relativizePath(this.rootPath, nonGlobSearchPath));
    Map<Path, FileStatus> filesInSource = createPathMap(getFilesAtPath(this.fs, this.rootPath, this.pathFilter), this.rootPath);
    Map<Path, FileStatus> filesInTarget = createPathMap(getFilesAtPath(targetFs, targetPath, this.pathFilter), targetPath);
    List<Path> toCopy = Lists.newArrayList();
    Map<Path, FileStatus> toDelete = Maps.newHashMap();
    boolean requiresUpdate = false;
    for (Map.Entry<Path, FileStatus> entry : filesInSource.entrySet()) {
        FileStatus statusInTarget = filesInTarget.remove(entry.getKey());
        if (statusInTarget != null) {
            // in both
            if (!sameFile(filesInSource.get(entry.getKey()), statusInTarget)) {
                toCopy.add(entry.getKey());
                toDelete.put(entry.getKey(), statusInTarget);
                requiresUpdate = true;
            }
        } else {
            toCopy.add(entry.getKey());
        }
    }
    if (!this.update && requiresUpdate) {
        throw new IOException("Some files need to be copied but they already exist in the destination. " + "Aborting because not running in update mode.");
    }
    if (this.delete) {
        toDelete.putAll(filesInTarget);
    }
    List<CopyEntity> copyEntities = Lists.newArrayList();
    List<CopyableFile> copyableFiles = Lists.newArrayList();
    for (Path path : toCopy) {
        FileStatus file = filesInSource.get(path);
        Path filePathRelativeToSearchPath = PathUtils.relativizePath(file.getPath(), nonGlobSearchPath);
        Path thisTargetPath = new Path(configuration.getPublishDir(), filePathRelativeToSearchPath);
        CopyableFile copyableFile = CopyableFile.fromOriginAndDestination(this.fs, file, thisTargetPath, configuration).fileSet(datasetURN()).datasetOutputPath(thisTargetPath.toString()).ancestorsOwnerAndPermission(CopyableFile.resolveReplicatedOwnerAndPermissionsRecursively(this.fs, file.getPath().getParent(), nonGlobSearchPath, configuration)).build();
        copyableFile.setFsDatasets(this.fs, targetFs);
        copyableFiles.add(copyableFile);
    }
    copyEntities.addAll(this.copyableFileFilter.filter(this.fs, targetFs, copyableFiles));
    if (!toDelete.isEmpty()) {
        CommitStep step = new DeleteFileCommitStep(targetFs, toDelete.values(), this.properties, this.deleteEmptyDirectories ? Optional.of(targetPath) : Optional.<Path>absent());
        copyEntities.add(new PrePublishStep(datasetURN(), Maps.<String, String>newHashMap(), step, 1));
    }
    return copyEntities;
}
Also used : Path(org.apache.hadoop.fs.Path) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) CommitStep(org.apache.gobblin.commit.CommitStep) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) PrePublishStep(org.apache.gobblin.data.management.copy.entities.PrePublishStep) Map(java.util.Map)

Aggregations

PrePublishStep (org.apache.gobblin.data.management.copy.entities.PrePublishStep)5 DeleteFileCommitStep (org.apache.gobblin.util.commit.DeleteFileCommitStep)5 CopyEntity (org.apache.gobblin.data.management.copy.CopyEntity)4 CopyableFile (org.apache.gobblin.data.management.copy.CopyableFile)4 Path (org.apache.hadoop.fs.Path)4 PostPublishStep (org.apache.gobblin.data.management.copy.entities.PostPublishStep)3 IOException (java.io.IOException)2 MultiTimingEvent (org.apache.gobblin.metrics.event.MultiTimingEvent)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 Closer (com.google.common.io.Closer)1 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 Map (java.util.Map)1 CommitStep (org.apache.gobblin.commit.CommitStep)1 CopyConfiguration (org.apache.gobblin.data.management.copy.CopyConfiguration)1 HiveRegisterStep (org.apache.gobblin.hive.HiveRegisterStep)1 HiveSpec (org.apache.gobblin.hive.spec.HiveSpec)1 SimpleHiveSpec (org.apache.gobblin.hive.spec.SimpleHiveSpec)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Partition (org.apache.hadoop.hive.ql.metadata.Partition)1