Search in sources :

Example 6 with TimestampedDatasetVersion

use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.

the class TimeBasedSelectionPolicyTest method testSelectBetweenTimebasedPolicy.

@Test
public void testSelectBetweenTimebasedPolicy() {
    Path dummyPath = new Path("dummy");
    DateTime dt1 = new DateTime().minusDays(8);
    DateTime dt2 = new DateTime().minusDays(6);
    Config config = ConfigFactory.parseMap(ImmutableMap.of(SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MAX_LOOK_BACK_TIME_KEY, "7d", SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MIN_LOOK_BACK_TIME_KEY, "4d"));
    SelectBetweenTimeBasedPolicy policyLookback7Days = new SelectBetweenTimeBasedPolicy(config);
    TimestampedDatasetVersion version1 = new TimestampedDatasetVersion(dt1, dummyPath);
    TimestampedDatasetVersion version2 = new TimestampedDatasetVersion(dt2, dummyPath);
    Assert.assertEquals(policyLookback7Days.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 1);
    Assert.assertEquals(Lists.newArrayList(policyLookback7Days.listSelectedVersions(Lists.newArrayList(version1, version2))).get(0), version2);
    config = ConfigFactory.parseMap(ImmutableMap.of(SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MAX_LOOK_BACK_TIME_KEY, "9d", SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MIN_LOOK_BACK_TIME_KEY, "4d"));
    SelectBetweenTimeBasedPolicy policyLookback9d4d = new SelectBetweenTimeBasedPolicy(config);
    Assert.assertEquals(policyLookback9d4d.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 2);
    config = ConfigFactory.parseMap(ImmutableMap.of(SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MAX_LOOK_BACK_TIME_KEY, "4d", SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MIN_LOOK_BACK_TIME_KEY, "1d"));
    SelectBetweenTimeBasedPolicy policyLookback4d1d = new SelectBetweenTimeBasedPolicy(config);
    Assert.assertEquals(policyLookback4d1d.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 0);
    config = ConfigFactory.parseMap(ImmutableMap.of(SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MAX_LOOK_BACK_TIME_KEY, "7d"));
    SelectBetweenTimeBasedPolicy policyLookback7d0d = new SelectBetweenTimeBasedPolicy(config);
    Assert.assertEquals(policyLookback7d0d.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 1);
    Assert.assertEquals(Lists.newArrayList(policyLookback7d0d.listSelectedVersions(Lists.newArrayList(version1, version2))).get(0), version2);
}
Also used : Path(org.apache.hadoop.fs.Path) TimestampedDatasetVersion(org.apache.gobblin.data.management.version.TimestampedDatasetVersion) Config(com.typesafe.config.Config) DateTime(org.joda.time.DateTime) Test(org.testng.annotations.Test)

Example 7 with TimestampedDatasetVersion

use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.

the class TimestampBasedCopyableDatasetTest method testCopyableFileGenerator.

/**
 * Test {@link TimestampBasedCopyableDataset.CopyableFileGenerator} when src location is empty and also when it is null.
 */
@Test(expectedExceptions = RuntimeException.class)
public void testCopyableFileGenerator() {
    Properties props = new Properties();
    props.put(TimestampBasedCopyableDataset.COPY_POLICY, TimeBasedCopyPolicyForTest.class.getName());
    props.put(TimestampBasedCopyableDataset.DATASET_VERSION_FINDER, TimestampedDatasetVersionFinderForTest.class.getName());
    TimestampBasedCopyableDataset copyabledataset = new TimestampBasedCopyableDataset(localFs, props, new Path("dummy"));
    CopyConfiguration configuration = mock(CopyConfiguration.class);
    when(configuration.getPublishDir()).thenReturn(new Path("publishDir"));
    ConcurrentLinkedQueue<CopyableFile> copyableFileList = new ConcurrentLinkedQueue<>();
    // The src path is empty.
    TimestampedDatasetVersion emptyVersion = new TimestampedDatasetVersion(new DateTime(), new Path("dummy2"));
    TimestampBasedCopyableDataset.CopyableFileGenerator emptyGenerator = copyabledataset.getCopyableFileGenetator(localFs, configuration, emptyVersion, copyableFileList);
    emptyGenerator.run();
    Assert.assertEquals(copyableFileList.size(), 0);
    // The src path is null.
    TimestampedDatasetVersion versionHasNullPath = new TimestampedDatasetVersion(new DateTime(), null);
    TimestampBasedCopyableDataset.CopyableFileGenerator exceptionGenerator = copyabledataset.getCopyableFileGenetator(localFs, configuration, versionHasNullPath, copyableFileList);
    exceptionGenerator.run();
}
Also used : Path(org.apache.hadoop.fs.Path) TimestampedDatasetVersion(org.apache.gobblin.data.management.version.TimestampedDatasetVersion) CopyConfiguration(org.apache.gobblin.data.management.copy.CopyConfiguration) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) Properties(java.util.Properties) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) DateTime(org.joda.time.DateTime) Test(org.testng.annotations.Test) BeforeTest(org.testng.annotations.BeforeTest)

Example 8 with TimestampedDatasetVersion

use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.

the class ModDateTimeDatasetVersionFinder method findDatasetVersions.

@Override
public Collection<TimestampedDatasetVersion> findDatasetVersions(Dataset dataset) throws IOException {
    FileSystemDataset fsDataset = (FileSystemDataset) dataset;
    FileStatus status = this.fs.getFileStatus(fsDataset.datasetRoot());
    return Lists.newArrayList(new TimestampedDatasetVersion(new DateTime(status.getModificationTime()), fsDataset.datasetRoot()));
}
Also used : FileSystemDataset(org.apache.gobblin.dataset.FileSystemDataset) FileStatus(org.apache.hadoop.fs.FileStatus) TimestampedDatasetVersion(org.apache.gobblin.data.management.version.TimestampedDatasetVersion) DateTime(org.joda.time.DateTime)

Example 9 with TimestampedDatasetVersion

use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.

the class TimestampBasedCopyableDataset method getCopyableFiles.

@Override
public Collection<CopyableFile> getCopyableFiles(FileSystem targetFs, CopyConfiguration configuration) throws IOException {
    log.info(String.format("Getting copyable files at root path: %s", this.datasetRoot));
    List<TimestampedDatasetVersion> versions = Lists.newArrayList(this.datasetVersionFinder.findDatasetVersions(this));
    if (versions.isEmpty()) {
        log.warn("No dataset version can be found. Ignoring.");
        return Lists.newArrayList();
    }
    Collection<TimestampedDatasetVersion> copyableVersions = this.versionSelectionPolicy.listSelectedVersions(versions);
    ConcurrentLinkedQueue<CopyableFile> copyableFileList = new ConcurrentLinkedQueue<>();
    List<Future<?>> futures = Lists.newArrayList();
    for (TimestampedDatasetVersion copyableVersion : copyableVersions) {
        futures.add(this.executor.submit(this.getCopyableFileGenetator(targetFs, configuration, copyableVersion, copyableFileList)));
    }
    try {
        for (Future<?> future : futures) {
            future.get();
        }
    } catch (ExecutionException | InterruptedException e) {
        throw new IOException("Failed to generate copyable files.", e);
    } finally {
        ExecutorsUtils.shutdownExecutorService(executor, Optional.of(log));
    }
    return copyableFileList;
}
Also used : TimestampedDatasetVersion(org.apache.gobblin.data.management.version.TimestampedDatasetVersion) IOException(java.io.IOException) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) Future(java.util.concurrent.Future) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

TimestampedDatasetVersion (org.apache.gobblin.data.management.version.TimestampedDatasetVersion)9 DateTime (org.joda.time.DateTime)8 Path (org.apache.hadoop.fs.Path)7 Test (org.testng.annotations.Test)7 Config (com.typesafe.config.Config)4 Properties (java.util.Properties)3 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)3 CopyableFile (org.apache.gobblin.data.management.copy.CopyableFile)3 CopyConfiguration (org.apache.gobblin.data.management.copy.CopyConfiguration)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 BeforeTest (org.testng.annotations.BeforeTest)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 ExecutionException (java.util.concurrent.ExecutionException)1 Future (java.util.concurrent.Future)1 FileSystemDatasetVersion (org.apache.gobblin.data.management.version.FileSystemDatasetVersion)1 FileSystemDataset (org.apache.gobblin.dataset.FileSystemDataset)1 FileSystem (org.apache.hadoop.fs.FileSystem)1