Search in sources :

Example 1 with TimestampedDatasetVersion

use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.

the class HiddenFilterSelectionPolicyTest method testListSelectedVersions.

@Test
public void testListSelectedVersions() throws Exception {
    List<FileSystemDatasetVersion> versionList = new ArrayList<>();
    Set<String> pathSet = new HashSet<>();
    Path path1 = new Path("/data/dataset/versions/version1");
    pathSet.add(path1.toString());
    Path path2 = new Path("/data/dataset/versions/version2");
    pathSet.add(path2.toString());
    Path path3 = new Path("/data/dataset/.temp/tmpPath");
    Path path4 = new Path("/data/dataset/_temp/tmpPath");
    versionList.add(new TimestampedDatasetVersion(new DateTime(), path1));
    versionList.add(new TimestampedDatasetVersion(new DateTime(), path2));
    versionList.add(new TimestampedDatasetVersion(new DateTime(), path3));
    versionList.add(new TimestampedDatasetVersion(new DateTime(), path4));
    List<String> hiddenFilePrefixes = Arrays.asList("_", ".");
    List<Config> configList = new ArrayList<>();
    Config config1 = ConfigFactory.parseMap(ImmutableMap.of(HiddenFilterSelectionPolicy.HIDDEN_FILTER_HIDDEN_FILE_PREFIX_KEY, hiddenFilePrefixes));
    configList.add(config1);
    Config config2 = ConfigFactory.parseMap(ImmutableMap.of(HiddenFilterSelectionPolicy.HIDDEN_FILTER_HIDDEN_FILE_PREFIX_KEY, "_,."));
    configList.add(config2);
    for (Config config : configList) {
        HiddenFilterSelectionPolicy policy = new HiddenFilterSelectionPolicy(config);
        Collection<FileSystemDatasetVersion> selectedVersions = policy.listSelectedVersions(versionList);
        Assert.assertEquals(selectedVersions.size(), 2);
        for (FileSystemDatasetVersion version : selectedVersions) {
            Set<Path> paths = version.getPaths();
            for (Path path : paths) {
                Assert.assertTrue(pathSet.contains(path.toString()));
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TimestampedDatasetVersion(org.apache.gobblin.data.management.version.TimestampedDatasetVersion) Config(com.typesafe.config.Config) FileSystemDatasetVersion(org.apache.gobblin.data.management.version.FileSystemDatasetVersion) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 2 with TimestampedDatasetVersion

use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.

the class TimeBasedSelectionPolicyTest method testListCopyableVersions.

@Test
public void testListCopyableVersions() {
    Properties props = new Properties();
    Path dummyPath = new Path("dummy");
    DateTime dt1 = new DateTime().minusDays(8);
    DateTime dt2 = new DateTime().minusDays(6);
    props.put(SelectAfterTimeBasedPolicy.TIME_BASED_SELECTION_LOOK_BACK_TIME_KEY, "7d");
    SelectAfterTimeBasedPolicy policyLookback7Days = new SelectAfterTimeBasedPolicy(props);
    TimestampedDatasetVersion version1 = new TimestampedDatasetVersion(dt1, dummyPath);
    TimestampedDatasetVersion version2 = new TimestampedDatasetVersion(dt2, dummyPath);
    Assert.assertEquals(policyLookback7Days.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 1);
    props.put(SelectAfterTimeBasedPolicy.TIME_BASED_SELECTION_LOOK_BACK_TIME_KEY, "1h");
    SelectAfterTimeBasedPolicy policyLookback1Hour = new SelectAfterTimeBasedPolicy(props);
    Assert.assertEquals(policyLookback1Hour.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 0);
    props.put(SelectAfterTimeBasedPolicy.TIME_BASED_SELECTION_LOOK_BACK_TIME_KEY, "9d");
    SelectAfterTimeBasedPolicy policyLookback8Days = new SelectAfterTimeBasedPolicy(props);
    Assert.assertEquals(policyLookback8Days.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 2);
}
Also used : Path(org.apache.hadoop.fs.Path) TimestampedDatasetVersion(org.apache.gobblin.data.management.version.TimestampedDatasetVersion) Properties(java.util.Properties) DateTime(org.joda.time.DateTime) Test(org.testng.annotations.Test)

Example 3 with TimestampedDatasetVersion

use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.

the class TimeBasedSelectionPolicyTest method testSelectAfterTimebasedPolicy.

@Test
public void testSelectAfterTimebasedPolicy() {
    Path dummyPath = new Path("dummy");
    DateTime dt1 = new DateTime().minusDays(8);
    DateTime dt2 = new DateTime().minusDays(6);
    Config config = ConfigFactory.parseMap(ImmutableMap.of(SelectAfterTimeBasedPolicy.TIME_BASED_SELECTION_LOOK_BACK_TIME_KEY, "7d"));
    SelectAfterTimeBasedPolicy policyLookback7Days = new SelectAfterTimeBasedPolicy(config);
    TimestampedDatasetVersion version1 = new TimestampedDatasetVersion(dt1, dummyPath);
    TimestampedDatasetVersion version2 = new TimestampedDatasetVersion(dt2, dummyPath);
    Assert.assertEquals(policyLookback7Days.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 1);
    Assert.assertEquals(Lists.newArrayList(policyLookback7Days.listSelectedVersions(Lists.newArrayList(version1, version2))).get(0), version2);
    config = ConfigFactory.parseMap(ImmutableMap.of(SelectAfterTimeBasedPolicy.TIME_BASED_SELECTION_LOOK_BACK_TIME_KEY, "1h"));
    SelectAfterTimeBasedPolicy policyLookback1Hour = new SelectAfterTimeBasedPolicy(config);
    Assert.assertEquals(policyLookback1Hour.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 0);
    config = ConfigFactory.parseMap(ImmutableMap.of(SelectAfterTimeBasedPolicy.TIME_BASED_SELECTION_LOOK_BACK_TIME_KEY, "9d"));
    SelectAfterTimeBasedPolicy policyLookback8Days = new SelectAfterTimeBasedPolicy(config);
    Assert.assertEquals(policyLookback8Days.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 2);
}
Also used : Path(org.apache.hadoop.fs.Path) TimestampedDatasetVersion(org.apache.gobblin.data.management.version.TimestampedDatasetVersion) Config(com.typesafe.config.Config) DateTime(org.joda.time.DateTime) Test(org.testng.annotations.Test)

Example 4 with TimestampedDatasetVersion

use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.

the class TimeBasedSelectionPolicyTest method testSelectBeforeTimebasedPolicy.

@Test
public void testSelectBeforeTimebasedPolicy() {
    Path dummyPath = new Path("dummy");
    DateTime dt1 = new DateTime().minusDays(8);
    DateTime dt2 = new DateTime().minusDays(6);
    Config config = ConfigFactory.parseMap(ImmutableMap.of(SelectAfterTimeBasedPolicy.TIME_BASED_SELECTION_LOOK_BACK_TIME_KEY, "7d"));
    SelectBeforeTimeBasedPolicy policyLookback7Days = new SelectBeforeTimeBasedPolicy(config);
    TimestampedDatasetVersion version1 = new TimestampedDatasetVersion(dt1, dummyPath);
    TimestampedDatasetVersion version2 = new TimestampedDatasetVersion(dt2, dummyPath);
    Assert.assertEquals(policyLookback7Days.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 1);
    Assert.assertEquals(Lists.newArrayList(policyLookback7Days.listSelectedVersions(Lists.newArrayList(version1, version2))).get(0), version1);
    config = ConfigFactory.parseMap(ImmutableMap.of(SelectAfterTimeBasedPolicy.TIME_BASED_SELECTION_LOOK_BACK_TIME_KEY, "1h"));
    SelectBeforeTimeBasedPolicy policyLookback1Hour = new SelectBeforeTimeBasedPolicy(config);
    Assert.assertEquals(policyLookback1Hour.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 2);
    config = ConfigFactory.parseMap(ImmutableMap.of(SelectAfterTimeBasedPolicy.TIME_BASED_SELECTION_LOOK_BACK_TIME_KEY, "9d"));
    SelectBeforeTimeBasedPolicy policyLookback9Days = new SelectBeforeTimeBasedPolicy(config);
    Assert.assertEquals(policyLookback9Days.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 0);
}
Also used : Path(org.apache.hadoop.fs.Path) TimestampedDatasetVersion(org.apache.gobblin.data.management.version.TimestampedDatasetVersion) Config(com.typesafe.config.Config) DateTime(org.joda.time.DateTime) Test(org.testng.annotations.Test)

Example 5 with TimestampedDatasetVersion

use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.

the class TimestampBasedCopyableDatasetTest method testIsCopyableFile.

/**
 * Test {@link TimestampBasedCopyableDataset.CopyableFileGenerator}'s logic to determine copyable files.
 */
@Test
public void testIsCopyableFile() throws IOException, InterruptedException {
    Path testRoot = new Path("testCopyableFileGenerator");
    Path srcRoot = new Path(testRoot, "datasetRoot");
    String versionDir = "dummyversion";
    Path versionPath = new Path(srcRoot, versionDir);
    Path targetDir = new Path(testRoot, "target");
    if (this.localFs.exists(testRoot)) {
        this.localFs.delete(testRoot, true);
    }
    this.localFs.mkdirs(versionPath);
    Path srcfile = new Path(versionPath, "file1");
    this.localFs.create(srcfile);
    this.localFs.mkdirs(targetDir);
    Properties props = new Properties();
    props.put(TimestampBasedCopyableDataset.COPY_POLICY, TimeBasedCopyPolicyForTest.class.getName());
    props.put(TimestampBasedCopyableDataset.DATASET_VERSION_FINDER, TimestampedDatasetVersionFinderForTest.class.getName());
    Path datasetRootPath = this.localFs.getFileStatus(srcRoot).getPath();
    TimestampBasedCopyableDataset copyabledataset = new TimestampBasedCopyableDataset(localFs, props, datasetRootPath);
    TimestampedDatasetVersion srcVersion = new TimestampedDatasetVersion(new DateTime(), versionPath);
    class SimpleCopyableFileGenerator extends TimestampBasedCopyableDataset.CopyableFileGenerator {

        public SimpleCopyableFileGenerator(TimestampBasedCopyableDataset copyableDataset, FileSystem srcFs, FileSystem targetFs, CopyConfiguration configuration, TimestampedDatasetVersion copyableVersion, ConcurrentLinkedQueue<CopyableFile> copyableFileList) {
            super(srcFs, targetFs, configuration, copyableDataset.datasetRoot(), configuration.getPublishDir(), copyableVersion.getDateTime(), copyableVersion.getPaths(), copyableFileList, copyableDataset.copyableFileFilter());
        }

        @Override
        protected CopyableFile generateCopyableFile(FileStatus singleFile, Path targetPath, long timestampFromPath, Path locationToCopy) throws IOException {
            CopyableFile mockCopyableFile = mock(CopyableFile.class);
            when(mockCopyableFile.getFileSet()).thenReturn(singleFile.getPath().toString());
            return mockCopyableFile;
        }
    }
    // When srcFile exists on src but not on target, srcFile should be included in the copyableFileList.
    CopyConfiguration configuration1 = mock(CopyConfiguration.class);
    when(configuration1.getPublishDir()).thenReturn(localFs.getFileStatus(targetDir).getPath());
    ConcurrentLinkedQueue<CopyableFile> copyableFileList1 = new ConcurrentLinkedQueue<>();
    TimestampBasedCopyableDataset.CopyableFileGenerator copyFileGenerator1 = new SimpleCopyableFileGenerator(copyabledataset, localFs, localFs, configuration1, srcVersion, copyableFileList1);
    copyFileGenerator1.run();
    Assert.assertEquals(copyableFileList1.size(), 1);
    Assert.assertEquals(copyableFileList1.poll().getFileSet(), localFs.getFileStatus(srcfile).getPath().toString());
    // When files exist on both locations but with different timestamp, the result should only include newer src files.
    String noNeedToCopyFile = "file2";
    Path oldSrcFile = new Path(versionPath, noNeedToCopyFile);
    this.localFs.create(oldSrcFile);
    Thread.sleep(100);
    Path newTargetfile = new Path(targetDir, new Path(versionDir, noNeedToCopyFile));
    this.localFs.create(newTargetfile);
    copyFileGenerator1.run();
    Assert.assertEquals(copyableFileList1.size(), 1);
    Assert.assertEquals(copyableFileList1.poll().getFileSet(), localFs.getFileStatus(srcfile).getPath().toString());
    // When srcFile exists on both locations and have the same modified timestamp, it should not be included in copyableFileList.
    CopyConfiguration configuration2 = mock(CopyConfiguration.class);
    when(configuration2.getPublishDir()).thenReturn(localFs.getFileStatus(datasetRootPath).getPath());
    ConcurrentLinkedQueue<CopyableFile> copyableFileList2 = new ConcurrentLinkedQueue<>();
    TimestampBasedCopyableDataset.CopyableFileGenerator copyFileGenerator2 = new SimpleCopyableFileGenerator(copyabledataset, localFs, localFs, configuration2, srcVersion, copyableFileList2);
    copyFileGenerator2.run();
    Assert.assertEquals(copyableFileList2.size(), 0);
    this.localFs.delete(testRoot, true);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) TimestampedDatasetVersion(org.apache.gobblin.data.management.version.TimestampedDatasetVersion) Properties(java.util.Properties) DateTime(org.joda.time.DateTime) FileSystem(org.apache.hadoop.fs.FileSystem) CopyConfiguration(org.apache.gobblin.data.management.copy.CopyConfiguration) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Aggregations

TimestampedDatasetVersion (org.apache.gobblin.data.management.version.TimestampedDatasetVersion)9 DateTime (org.joda.time.DateTime)8 Path (org.apache.hadoop.fs.Path)7 Test (org.testng.annotations.Test)7 Config (com.typesafe.config.Config)4 Properties (java.util.Properties)3 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)3 CopyableFile (org.apache.gobblin.data.management.copy.CopyableFile)3 CopyConfiguration (org.apache.gobblin.data.management.copy.CopyConfiguration)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 BeforeTest (org.testng.annotations.BeforeTest)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 ExecutionException (java.util.concurrent.ExecutionException)1 Future (java.util.concurrent.Future)1 FileSystemDatasetVersion (org.apache.gobblin.data.management.version.FileSystemDatasetVersion)1 FileSystemDataset (org.apache.gobblin.dataset.FileSystemDataset)1 FileSystem (org.apache.hadoop.fs.FileSystem)1