use of org.apache.hadoop.fs.PathFilter in project oozie by apache.
the class ShareLibService method getLatestLibPath.
/**
* Gets the Latest lib path.
*
* @param rootDir the root dir
* @param prefix the prefix
* @return latest lib path
* @throws IOException Signals that an I/O exception has occurred.
*/
public Path getLatestLibPath(Path rootDir, final String prefix) throws IOException {
Date max = new Date(0L);
Path path = null;
PathFilter directoryFilter = new PathFilter() {
@Override
public boolean accept(Path path) {
return path.getName().startsWith(prefix);
}
};
FileStatus[] files = fs.listStatus(rootDir, directoryFilter);
for (FileStatus file : files) {
String name = file.getPath().getName().toString();
String time = name.substring(prefix.length());
Date d = null;
try {
d = dateFormat.parse(time);
} catch (ParseException e) {
continue;
}
if (d.compareTo(max) > 0) {
path = file.getPath();
max = d;
}
}
// If there are no timestamped directories, fall back to root directory
if (path == null) {
path = rootDir;
}
return path;
}
use of org.apache.hadoop.fs.PathFilter in project atlas-checks by osmlab.
the class AtlasDataSource method load.
/**
* Loads an {@link Atlas} from the input location. Intermediate {@link Atlas}es created are
* submitted to the provided {@link Consumer} to allow for any additional handling.
*
* @param input
* location of the {@link Atlas} source
* @param country
* country of the {@link Atlas}
* @param intermediateAtlasHandler
* handler given intermediate {@link Atlas} files when created
* @return {@link Atlas} representation of the data source
*/
public Atlas load(final String input, final String country, final Consumer<Atlas> intermediateAtlasHandler) {
// Path filters for supported file types
final PathFilter pbfFilter = new OsmPbfFilePathFilter();
final PathFilter atlasFilter = new CountrySpecificAtlasFilePathFilter(country);
final Optional<Resource> resource = this.loadHelper.collectSourceFile(input, pbfFilter, atlasFilter);
if (resource.isPresent()) {
final Resource dataSource = resource.get();
if (Atlas.isAtlas(dataSource)) {
return new AtlasResourceLoader().load(dataSource);
} else if (FileSuffix.resourceFilter(FileSuffix.PBF).test(dataSource)) {
this.logger.info("Loading Atlas from OSM protobuf {}", input);
final Atlas atlas = this.loadPbf(dataSource, country);
intermediateAtlasHandler.accept(atlas);
return atlas;
}
} else {
final String directory = this.pathResolver.resolvePath(input, country);
final List<Resource> atlasResources = this.loadHelper.collectSourceFiles(directory, true, atlasFilter);
if (atlasResources.size() > 0) {
return new AtlasResourceLoader().load(atlasResources);
} else {
final List<Resource> pbfResources = this.loadHelper.collectSourceFiles(directory, true, pbfFilter);
final int pbfCount = pbfResources.size();
if (pbfCount > 0) {
this.logger.info("Loading Atlas from {} OSM protobuf(s) found in {}", pbfCount, input);
final List<Atlas> atlases = pbfResources.parallelStream().map(dataSource -> this.loadPbf(dataSource, country)).peek(intermediateAtlasHandler).collect(Collectors.toList());
return new MultiAtlas(atlases);
}
}
}
return null;
}
use of org.apache.hadoop.fs.PathFilter in project incubator-gobblin by apache.
the class ConfigBasedDatasetTest method testGetCopyableFilesHelper.
public Collection<? extends CopyEntity> testGetCopyableFilesHelper(String sourceDir, String destinationDir, long sourceWatermark, boolean isFilterEnabled) throws Exception {
FileSystem localFs = FileSystem.getLocal(new Configuration());
URI local = localFs.getUri();
Properties properties = new Properties();
properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher");
PathFilter pathFilter = DatasetUtils.instantiatePathFilter(properties);
boolean applyFilterToDirectories = false;
if (isFilterEnabled) {
properties.setProperty(DatasetUtils.CONFIGURATION_KEY_PREFIX + "path.filter.class", "org.apache.gobblin.util.filters.HiddenFilter");
properties.setProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "true");
pathFilter = DatasetUtils.instantiatePathFilter(properties);
applyFilterToDirectories = Boolean.parseBoolean(properties.getProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "false"));
}
CopyConfiguration copyConfiguration = CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).publishDir(new Path(destinationDir)).preserve(PreserveAttributes.fromMnemonicString("ugp")).build();
ReplicationMetaData mockMetaData = Mockito.mock(ReplicationMetaData.class);
Mockito.when(mockMetaData.toString()).thenReturn("Mock Meta Data");
ReplicationConfiguration mockRC = Mockito.mock(ReplicationConfiguration.class);
Mockito.when(mockRC.getCopyMode()).thenReturn(ReplicationCopyMode.PULL);
Mockito.when(mockRC.getMetaData()).thenReturn(mockMetaData);
HadoopFsEndPoint copyFrom = Mockito.mock(HadoopFsEndPoint.class);
Mockito.when(copyFrom.getDatasetPath()).thenReturn(new Path(sourceDir));
Mockito.when(copyFrom.getFsURI()).thenReturn(local);
ComparableWatermark sw = new LongWatermark(sourceWatermark);
Mockito.when(copyFrom.getWatermark()).thenReturn(Optional.of(sw));
Mockito.when(copyFrom.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(sourceDir), pathFilter, applyFilterToDirectories));
HadoopFsEndPoint copyTo = Mockito.mock(HadoopFsEndPoint.class);
Mockito.when(copyTo.getDatasetPath()).thenReturn(new Path(destinationDir));
Mockito.when(copyTo.getFsURI()).thenReturn(local);
Optional<ComparableWatermark> tmp = Optional.absent();
Mockito.when(copyTo.getWatermark()).thenReturn(tmp);
Mockito.when(copyTo.getFiles()).thenReturn(FileListUtils.listFilesRecursively(localFs, new Path(destinationDir), pathFilter, applyFilterToDirectories));
CopyRoute route = Mockito.mock(CopyRoute.class);
Mockito.when(route.getCopyFrom()).thenReturn(copyFrom);
Mockito.when(route.getCopyTo()).thenReturn(copyTo);
ConfigBasedDataset dataset = new ConfigBasedDataset(mockRC, properties, route);
Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(localFs, copyConfiguration);
return copyableFiles;
}
use of org.apache.hadoop.fs.PathFilter in project incubator-gobblin by apache.
the class RecursivePathFinder method getPaths.
public Set<FileStatus> getPaths(boolean skipHiddenPaths) throws IOException {
if (!this.fs.exists(this.rootPath)) {
return Sets.newHashSet();
}
PathFilter actualFilter = skipHiddenPaths ? new AndPathFilter(new HiddenFilter(), this.pathFilter) : this.pathFilter;
List<FileStatus> files = FileListUtils.listFilesToCopyAtPath(this.fs, this.rootPath, actualFilter, includeEmptyDirectories);
return Sets.newHashSet(files);
}
use of org.apache.hadoop.fs.PathFilter in project incubator-gobblin by apache.
the class MetricsFileSystemInstrumentationTest method testListStatusPathsWithFilter.
@Test(enabled = false)
public void testListStatusPathsWithFilter() throws IOException, URISyntaxException {
HDFSRoot hdfsRoot = new HDFSRoot("/tmp/ListStatusPathsWithFilter");
MetricsFileSystemInstrumentation fs = (MetricsFileSystemInstrumentation) FileSystem.get(new URI(instrumentedURI), new Configuration());
Path[] paths = { hdfsRoot.filePath2, hdfsRoot.dirPath2, hdfsRoot.dirPath3 };
FileStatus[] status = fs.listStatus(paths, new PathFilter() {
@Override
public boolean accept(Path path) {
return path.toString().endsWith(".ext");
}
});
Assert.assertEquals(fs.listStatusTimer.getCount(), 3);
Assert.assertEquals(status.length, 2);
hdfsRoot.cleanupRoot();
}
Aggregations