Search in sources :

Example 1 with CommitStepCopyEntity

use of org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity in project incubator-gobblin by apache.

the class CopyDataPublisher method getCommitSequence.

private static List<CommitStep> getCommitSequence(Collection<WorkUnitState> workUnits, Class<?> baseClass) throws IOException {
    List<CommitStepCopyEntity> steps = Lists.newArrayList();
    for (WorkUnitState wus : workUnits) {
        if (baseClass.isAssignableFrom(CopySource.getCopyEntityClass(wus))) {
            CommitStepCopyEntity step = (CommitStepCopyEntity) CopySource.deserializeCopyEntity(wus);
            steps.add(step);
        }
    }
    Comparator<CommitStepCopyEntity> commitStepSorter = new Comparator<CommitStepCopyEntity>() {

        @Override
        public int compare(CommitStepCopyEntity o1, CommitStepCopyEntity o2) {
            return Integer.compare(o1.getPriority(), o2.getPriority());
        }
    };
    Collections.sort(steps, commitStepSorter);
    List<CommitStep> sequence = Lists.newArrayList();
    for (CommitStepCopyEntity entity : steps) {
        sequence.add(entity.getStep());
    }
    return sequence;
}
Also used : CommitStep(org.apache.gobblin.commit.CommitStep) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) CommitStepCopyEntity(org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity) Comparator(java.util.Comparator)

Example 2 with CommitStepCopyEntity

use of org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity in project incubator-gobblin by apache.

the class RecursiveCopyableDatasetTest method classifyFiles.

private ClassifiedFiles classifyFiles(Collection<? extends CopyEntity> copyEntities) {
    Map<Path, Path> pathsToCopy = Maps.newHashMap();
    Set<Path> pathsToDelete = Sets.newHashSet();
    for (CopyEntity ce : copyEntities) {
        if (ce instanceof CopyableFile) {
            pathsToCopy.put(((CopyableFile) ce).getOrigin().getPath(), ((CopyableFile) ce).getDestination());
        }
        if (ce instanceof CommitStepCopyEntity) {
            CommitStep step = ((CommitStepCopyEntity) ce).getStep();
            if (step instanceof DeleteFileCommitStep) {
                for (FileStatus status : ((DeleteFileCommitStep) step).getPathsToDelete()) {
                    pathsToDelete.add(status.getPath());
                }
            }
        }
    }
    return new ClassifiedFiles(pathsToCopy, pathsToDelete);
}
Also used : Path(org.apache.hadoop.fs.Path) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) CommitStep(org.apache.gobblin.commit.CommitStep) FileStatus(org.apache.hadoop.fs.FileStatus) CommitStepCopyEntity(org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) CommitStepCopyEntity(org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity)

Example 3 with CommitStepCopyEntity

use of org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity in project incubator-gobblin by apache.

the class RecursiveCopyableDatasetTest method testCopyWithDeleteTargetAndDeleteParentDirectories.

@Test
public void testCopyWithDeleteTargetAndDeleteParentDirectories() throws Exception {
    Path source = new Path("/source");
    Path target = new Path("/target");
    List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1"));
    List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file3"));
    Properties properties = new Properties();
    properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString());
    properties.setProperty(RecursiveCopyableDataset.DELETE_EMPTY_DIRECTORIES_KEY, "true");
    properties.setProperty(RecursiveCopyableDataset.DELETE_KEY, "true");
    RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties);
    Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build());
    Assert.assertEquals(copyableFiles.size(), 2);
    ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles);
    Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1")));
    Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "file1"));
    Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 1);
    Assert.assertTrue(classifiedFiles.getPathsToDelete().contains(new Path(target, "file3")));
    CommitStepCopyEntity entity = (CommitStepCopyEntity) Iterables.filter(copyableFiles, new Predicate<CopyEntity>() {

        @Override
        public boolean apply(@Nullable CopyEntity copyEntity) {
            return copyEntity instanceof CommitStepCopyEntity;
        }
    }).iterator().next();
    DeleteFileCommitStep step = (DeleteFileCommitStep) entity.getStep();
    Assert.assertTrue(step.getParentDeletionLimit().isPresent());
    Assert.assertEquals(step.getParentDeletionLimit().get(), target);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) CommitStepCopyEntity(org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) Properties(java.util.Properties) CommitStepCopyEntity(org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity) Predicate(avro.shaded.com.google.common.base.Predicate) Nullable(javax.annotation.Nullable) Test(org.testng.annotations.Test)

Example 4 with CommitStepCopyEntity

use of org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity in project incubator-gobblin by apache.

the class RecursiveCopyableDatasetTest method testCopyWithDeleteTarget.

@Test
public void testCopyWithDeleteTarget() throws Exception {
    Path source = new Path("/source");
    Path target = new Path("/target");
    List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1"));
    List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file3"));
    Properties properties = new Properties();
    properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString());
    properties.setProperty(RecursiveCopyableDataset.DELETE_KEY, "true");
    RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties);
    Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build());
    Assert.assertEquals(copyableFiles.size(), 2);
    ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles);
    Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1")));
    Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "file1"));
    Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 1);
    Assert.assertTrue(classifiedFiles.getPathsToDelete().contains(new Path(target, "file3")));
    CommitStepCopyEntity entity = (CommitStepCopyEntity) Iterables.filter(copyableFiles, new Predicate<CopyEntity>() {

        @Override
        public boolean apply(@Nullable CopyEntity copyEntity) {
            return copyEntity instanceof CommitStepCopyEntity;
        }
    }).iterator().next();
    DeleteFileCommitStep step = (DeleteFileCommitStep) entity.getStep();
    Assert.assertFalse(step.getParentDeletionLimit().isPresent());
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) CommitStepCopyEntity(org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity) DeleteFileCommitStep(org.apache.gobblin.util.commit.DeleteFileCommitStep) Properties(java.util.Properties) CommitStepCopyEntity(org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity) Predicate(avro.shaded.com.google.common.base.Predicate) Nullable(javax.annotation.Nullable) Test(org.testng.annotations.Test)

Aggregations

CommitStepCopyEntity (org.apache.gobblin.data.management.copy.entities.CommitStepCopyEntity)4 DeleteFileCommitStep (org.apache.gobblin.util.commit.DeleteFileCommitStep)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 Path (org.apache.hadoop.fs.Path)3 Predicate (avro.shaded.com.google.common.base.Predicate)2 Properties (java.util.Properties)2 Nullable (javax.annotation.Nullable)2 CommitStep (org.apache.gobblin.commit.CommitStep)2 Configuration (org.apache.hadoop.conf.Configuration)2 Test (org.testng.annotations.Test)2 Comparator (java.util.Comparator)1 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1