use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.
the class TimeBasedSelectionPolicyTest method testSelectBetweenTimebasedPolicy.
@Test
public void testSelectBetweenTimebasedPolicy() {
Path dummyPath = new Path("dummy");
DateTime dt1 = new DateTime().minusDays(8);
DateTime dt2 = new DateTime().minusDays(6);
Config config = ConfigFactory.parseMap(ImmutableMap.of(SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MAX_LOOK_BACK_TIME_KEY, "7d", SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MIN_LOOK_BACK_TIME_KEY, "4d"));
SelectBetweenTimeBasedPolicy policyLookback7Days = new SelectBetweenTimeBasedPolicy(config);
TimestampedDatasetVersion version1 = new TimestampedDatasetVersion(dt1, dummyPath);
TimestampedDatasetVersion version2 = new TimestampedDatasetVersion(dt2, dummyPath);
Assert.assertEquals(policyLookback7Days.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 1);
Assert.assertEquals(Lists.newArrayList(policyLookback7Days.listSelectedVersions(Lists.newArrayList(version1, version2))).get(0), version2);
config = ConfigFactory.parseMap(ImmutableMap.of(SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MAX_LOOK_BACK_TIME_KEY, "9d", SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MIN_LOOK_BACK_TIME_KEY, "4d"));
SelectBetweenTimeBasedPolicy policyLookback9d4d = new SelectBetweenTimeBasedPolicy(config);
Assert.assertEquals(policyLookback9d4d.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 2);
config = ConfigFactory.parseMap(ImmutableMap.of(SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MAX_LOOK_BACK_TIME_KEY, "4d", SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MIN_LOOK_BACK_TIME_KEY, "1d"));
SelectBetweenTimeBasedPolicy policyLookback4d1d = new SelectBetweenTimeBasedPolicy(config);
Assert.assertEquals(policyLookback4d1d.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 0);
config = ConfigFactory.parseMap(ImmutableMap.of(SelectBetweenTimeBasedPolicy.TIME_BASED_SELECTION_MAX_LOOK_BACK_TIME_KEY, "7d"));
SelectBetweenTimeBasedPolicy policyLookback7d0d = new SelectBetweenTimeBasedPolicy(config);
Assert.assertEquals(policyLookback7d0d.listSelectedVersions(Lists.newArrayList(version1, version2)).size(), 1);
Assert.assertEquals(Lists.newArrayList(policyLookback7d0d.listSelectedVersions(Lists.newArrayList(version1, version2))).get(0), version2);
}
use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.
the class TimestampBasedCopyableDatasetTest method testCopyableFileGenerator.
/**
* Test {@link TimestampBasedCopyableDataset.CopyableFileGenerator} when src location is empty and also when it is null.
*/
@Test(expectedExceptions = RuntimeException.class)
public void testCopyableFileGenerator() {
Properties props = new Properties();
props.put(TimestampBasedCopyableDataset.COPY_POLICY, TimeBasedCopyPolicyForTest.class.getName());
props.put(TimestampBasedCopyableDataset.DATASET_VERSION_FINDER, TimestampedDatasetVersionFinderForTest.class.getName());
TimestampBasedCopyableDataset copyabledataset = new TimestampBasedCopyableDataset(localFs, props, new Path("dummy"));
CopyConfiguration configuration = mock(CopyConfiguration.class);
when(configuration.getPublishDir()).thenReturn(new Path("publishDir"));
ConcurrentLinkedQueue<CopyableFile> copyableFileList = new ConcurrentLinkedQueue<>();
// The src path is empty.
TimestampedDatasetVersion emptyVersion = new TimestampedDatasetVersion(new DateTime(), new Path("dummy2"));
TimestampBasedCopyableDataset.CopyableFileGenerator emptyGenerator = copyabledataset.getCopyableFileGenetator(localFs, configuration, emptyVersion, copyableFileList);
emptyGenerator.run();
Assert.assertEquals(copyableFileList.size(), 0);
// The src path is null.
TimestampedDatasetVersion versionHasNullPath = new TimestampedDatasetVersion(new DateTime(), null);
TimestampBasedCopyableDataset.CopyableFileGenerator exceptionGenerator = copyabledataset.getCopyableFileGenetator(localFs, configuration, versionHasNullPath, copyableFileList);
exceptionGenerator.run();
}
use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.
the class ModDateTimeDatasetVersionFinder method findDatasetVersions.
@Override
public Collection<TimestampedDatasetVersion> findDatasetVersions(Dataset dataset) throws IOException {
FileSystemDataset fsDataset = (FileSystemDataset) dataset;
FileStatus status = this.fs.getFileStatus(fsDataset.datasetRoot());
return Lists.newArrayList(new TimestampedDatasetVersion(new DateTime(status.getModificationTime()), fsDataset.datasetRoot()));
}
use of org.apache.gobblin.data.management.version.TimestampedDatasetVersion in project incubator-gobblin by apache.
the class TimestampBasedCopyableDataset method getCopyableFiles.
@Override
public Collection<CopyableFile> getCopyableFiles(FileSystem targetFs, CopyConfiguration configuration) throws IOException {
log.info(String.format("Getting copyable files at root path: %s", this.datasetRoot));
List<TimestampedDatasetVersion> versions = Lists.newArrayList(this.datasetVersionFinder.findDatasetVersions(this));
if (versions.isEmpty()) {
log.warn("No dataset version can be found. Ignoring.");
return Lists.newArrayList();
}
Collection<TimestampedDatasetVersion> copyableVersions = this.versionSelectionPolicy.listSelectedVersions(versions);
ConcurrentLinkedQueue<CopyableFile> copyableFileList = new ConcurrentLinkedQueue<>();
List<Future<?>> futures = Lists.newArrayList();
for (TimestampedDatasetVersion copyableVersion : copyableVersions) {
futures.add(this.executor.submit(this.getCopyableFileGenetator(targetFs, configuration, copyableVersion, copyableFileList)));
}
try {
for (Future<?> future : futures) {
future.get();
}
} catch (ExecutionException | InterruptedException e) {
throw new IOException("Failed to generate copyable files.", e);
} finally {
ExecutorsUtils.shutdownExecutorService(executor, Optional.of(log));
}
return copyableFileList;
}
Aggregations