use of org.apache.gobblin.dataset.FileSystemDataset in project incubator-gobblin by apache.
the class PinotAuditCountVerifierTest method testTier.
@Test
public void testTier() throws Exception {
final String topic = "randomTopic";
final String input = "/base/input";
final String output = "/base/output";
final String inputSub = "hourly";
final String outputSub = "hourly";
TestAuditCountClient client = new TestAuditCountClient();
FileSystemDataset dataset = new FileSystemDataset() {
@Override
public Path datasetRoot() {
return new Path(input + topic + inputSub + "/2017/04/03/10");
}
@Override
public String datasetURN() {
return input + topic + inputSub + "/2017/04/03/10";
}
};
State props = new State();
props.setProp(CompactionAuditCountVerifier.PRODUCER_TIER, PRODUCER_TIER);
props.setProp(CompactionAuditCountVerifier.ORIGIN_TIER, ORIGIN_TIER);
props.setProp(CompactionAuditCountVerifier.GOBBLIN_TIER, GOBBLIN_TIER);
props.setProp(MRCompactor.COMPACTION_INPUT_DIR, input);
props.setProp(MRCompactor.COMPACTION_INPUT_SUBDIR, inputSub);
props.setProp(MRCompactor.COMPACTION_DEST_DIR, output);
props.setProp(MRCompactor.COMPACTION_DEST_SUBDIR, outputSub);
props.setProp(MRCompactor.COMPACTION_TMP_DEST_DIR, "/tmp/compaction/verifier");
props.setProp(TimeBasedSubDirDatasetsFinder.COMPACTION_TIMEBASED_MAX_TIME_AGO, "3000d");
props.setProp(TimeBasedSubDirDatasetsFinder.COMPACTION_TIMEBASED_MIN_TIME_AGO, "1d");
CompactionAuditCountVerifier verifier = new CompactionAuditCountVerifier(props, client);
// All complete
client.setCounts(ImmutableMap.of(PRODUCER_TIER, 1000L, ORIGIN_TIER, 1000L, GOBBLIN_TIER, 1000L));
Assert.assertTrue(verifier.verify(dataset).isSuccessful);
// test true because GOBBLIN_TIER / PRODUCER_TIER is above threshold
client.setCounts(ImmutableMap.of(PRODUCER_TIER, 1000L, ORIGIN_TIER, 1100L, GOBBLIN_TIER, 1000L));
Assert.assertTrue(verifier.verify(dataset).isSuccessful);
// test false because GOBBLIN_TIER / (PRODUCER_TIER || ORIGIN_TIER) is below threshold
client.setCounts(ImmutableMap.of(PRODUCER_TIER, 1100L, ORIGIN_TIER, 1100L, GOBBLIN_TIER, 1000L));
Assert.assertFalse(verifier.verify(dataset).isSuccessful);
}
use of org.apache.gobblin.dataset.FileSystemDataset in project incubator-gobblin by apache.
the class ReplicationDataValidPathPicker method getValidPaths.
@SuppressWarnings("unchecked")
public static Collection<Path> getValidPaths(HadoopFsEndPoint hadoopFsEndPoint) throws IOException {
Config selectionConfig = hadoopFsEndPoint.getSelectionConfig();
FileSystemDataset tmpDataset = new HadoopFsEndPointDataset(hadoopFsEndPoint);
FileSystem theFs = FileSystem.get(hadoopFsEndPoint.getFsURI(), new Configuration());
/**
* Use {@link FileSystemDatasetVersion} as
* {@link DateTimeDatasetVersionFinder} / {@link GlobModTimeDatasetVersionFinder} use {@link TimestampedDatasetVersion}
* {@link SingleVersionFinder} uses {@link FileStatusDatasetVersion}
*/
VersionFinder<FileSystemDatasetVersion> finder;
try {
finder = (VersionFinder<FileSystemDatasetVersion>) ConstructorUtils.invokeConstructor(Class.forName(selectionConfig.getString(FINDER_CLASS)), theFs, selectionConfig);
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException | ClassNotFoundException e) {
throw new IllegalArgumentException(e);
}
List<FileSystemDatasetVersion> versions = Ordering.natural().reverse().sortedCopy(finder.findDatasetVersions(tmpDataset));
VersionSelectionPolicy<FileSystemDatasetVersion> selector;
try {
selector = (VersionSelectionPolicy<FileSystemDatasetVersion>) ConstructorUtils.invokeConstructor(Class.forName(selectionConfig.getString(POLICY_CLASS)), selectionConfig);
} catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException | ClassNotFoundException e) {
throw new IllegalArgumentException(e);
}
Collection<FileSystemDatasetVersion> versionsSelected = selector.listSelectedVersions(versions);
List<Path> result = new ArrayList<Path>();
for (FileSystemDatasetVersion t : versionsSelected) {
// get the first element out
result.add(t.getPaths().iterator().next());
}
return result;
}
use of org.apache.gobblin.dataset.FileSystemDataset in project incubator-gobblin by apache.
the class FsCleanableHelperTest method testDeleteEmptyDirs.
@Test
public void testDeleteEmptyDirs() throws Exception {
Properties props = new Properties();
props.setProperty(FsCleanableHelper.SKIP_TRASH_KEY, Boolean.toString(true));
FsCleanableHelper fsCleanableHelper = new FsCleanableHelper(this.fs, props, ConfigFactory.empty(), log);
FileSystemDataset fsDataset = mock(FileSystemDataset.class);
Path datasetRoot = new Path(testTempPath, "dataset1");
when(fsDataset.datasetRoot()).thenReturn(datasetRoot);
// To delete
Path deleted1 = new Path(datasetRoot, "2016/01/01/13");
Path deleted2 = new Path(datasetRoot, "2016/01/01/14");
Path deleted3 = new Path(datasetRoot, "2016/01/02/15");
// Do not delete
Path notDeleted1 = new Path(datasetRoot, "2016/01/02/16");
this.fs.mkdirs(deleted1);
this.fs.mkdirs(deleted2);
this.fs.mkdirs(deleted3);
this.fs.mkdirs(notDeleted1);
// Make sure all paths are created
Assert.assertTrue(this.fs.exists(deleted1));
Assert.assertTrue(this.fs.exists(deleted2));
Assert.assertTrue(this.fs.exists(deleted3));
Assert.assertTrue(this.fs.exists(notDeleted1));
List<FileSystemDatasetVersion> deletableVersions = ImmutableList.<FileSystemDatasetVersion>of(new MockFileSystemDatasetVersion(deleted1), new MockFileSystemDatasetVersion(deleted2), new MockFileSystemDatasetVersion(deleted3));
fsCleanableHelper.clean(deletableVersions, fsDataset);
// Verify versions are deleted
Assert.assertFalse(this.fs.exists(deleted1));
Assert.assertFalse(this.fs.exists(deleted2));
Assert.assertFalse(this.fs.exists(deleted3));
// Verify versions are not deleted
Assert.assertTrue(this.fs.exists(notDeleted1));
// Verify empty parent dir "2016/01/01" is deleted
Assert.assertFalse(this.fs.exists(deleted1.getParent()));
// Verify non empty parent dir "2016/01/02" exists
Assert.assertTrue(this.fs.exists(notDeleted1.getParent()));
}
use of org.apache.gobblin.dataset.FileSystemDataset in project incubator-gobblin by apache.
the class ModDateTimeDatasetVersionFinder method findDatasetVersions.
@Override
public Collection<TimestampedDatasetVersion> findDatasetVersions(Dataset dataset) throws IOException {
FileSystemDataset fsDataset = (FileSystemDataset) dataset;
FileStatus status = this.fs.getFileStatus(fsDataset.datasetRoot());
return Lists.newArrayList(new TimestampedDatasetVersion(new DateTime(status.getModificationTime()), fsDataset.datasetRoot()));
}
use of org.apache.gobblin.dataset.FileSystemDataset in project incubator-gobblin by apache.
the class AbstractDatasetVersionFinder method findDatasetVersions.
/**
* Find dataset versions in the input {@link org.apache.hadoop.fs.Path}. Dataset versions are subdirectories of the
* input {@link org.apache.hadoop.fs.Path} representing a single manageable unit in the dataset.
* See {@link org.apache.gobblin.data.management.retention.DatasetCleaner} for more information.
*
* @param dataset {@link org.apache.hadoop.fs.Path} to directory containing all versions of a dataset.
* @return Map of {@link org.apache.gobblin.data.management.version.DatasetVersion} and {@link org.apache.hadoop.fs.FileStatus}
* for each dataset version found.
* @throws IOException
*/
@Override
public Collection<T> findDatasetVersions(Dataset dataset) throws IOException {
FileSystemDataset fsDataset = (FileSystemDataset) dataset;
Path versionGlobStatus = new Path(fsDataset.datasetRoot(), globVersionPattern());
FileStatus[] dataSetVersionPaths = this.fs.globStatus(versionGlobStatus);
List<T> dataSetVersions = Lists.newArrayList();
for (FileStatus dataSetVersionPath : dataSetVersionPaths) {
T datasetVersion = getDatasetVersion(PathUtils.relativizePath(dataSetVersionPath.getPath(), fsDataset.datasetRoot()), dataSetVersionPath);
if (datasetVersion != null) {
dataSetVersions.add(datasetVersion);
}
}
return dataSetVersions;
}
Aggregations