use of org.apache.hadoop.fs.PathFilter in project carbondata by apache.
the class CarbonInputFormat method getFileStatus.
private void getFileStatus(JobContext job, String[] segmentsToConsider, String[] filesToConsider, List<FileStatus> result) throws IOException {
String[] partitionsToConsider = getValidPartitions(job);
if (partitionsToConsider.length == 0) {
throw new IOException("No partitions/data found");
}
PathFilter inputFilter = getDataFileFilter();
AbsoluteTableIdentifier absIdentifier = getAbsoluteTableIdentifier(job.getConfiguration());
CarbonTablePath tablePath = getTablePath(absIdentifier);
// get tokens for all the required FileSystem for table path
TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());
//get all data files of valid partitions and segments
for (int i = 0; i < partitionsToConsider.length; ++i) {
String partition = partitionsToConsider[i];
for (int j = 0; j < segmentsToConsider.length; ++j) {
String segmentId = segmentsToConsider[j];
String dataDirectoryPath = absIdentifier.appendWithLocalPrefix(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
if (filesToConsider.length == 0) {
Path segmentPath = new Path(dataDirectoryPath);
FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());
getFileStatusInternal(inputFilter, fs, segmentPath, result);
} else {
for (int k = 0; k < filesToConsider.length; ++k) {
String dataPath = absIdentifier.appendWithLocalPrefix(tablePath.getCarbonDataDirectoryPath(partition, segmentId) + File.separator + filesToConsider[k]);
Path filePath = new Path(dataPath);
FileSystem fs = filePath.getFileSystem(job.getConfiguration());
getFileStatusInternal(inputFilter, fs, filePath, result);
}
}
}
}
}
use of org.apache.hadoop.fs.PathFilter in project incubator-systemml by apache.
the class ApplyTfCSVMR method deletePartFiles.
private static void deletePartFiles(FileSystem fs, Path path) throws FileNotFoundException, IOException {
PathFilter filter = new PathFilter() {
public boolean accept(Path file) {
return file.getName().startsWith("part-");
}
};
FileStatus[] list = fs.listStatus(path, filter);
for (FileStatus stat : list) {
fs.delete(stat.getPath(), false);
}
}
use of org.apache.hadoop.fs.PathFilter in project lucene-solr by apache.
the class HdfsDirectoryFactory method cleanupOldIndexDirectories.
@Override
public void cleanupOldIndexDirectories(final String dataDir, final String currentIndexDir, boolean afterReload) {
// Get the FileSystem object
final Path dataDirPath = new Path(dataDir);
final Configuration conf = getConf();
FileSystem fileSystem = null;
try {
fileSystem = tmpFsCache.get(dataDir, () -> FileSystem.get(dataDirPath.toUri(), conf));
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
boolean pathExists = false;
try {
pathExists = fileSystem.exists(dataDirPath);
} catch (IOException e) {
LOG.error("Error checking if hdfs path " + dataDir + " exists", e);
}
if (!pathExists) {
LOG.warn("{} does not point to a valid data directory; skipping clean-up of old index directories.", dataDir);
return;
}
// make sure we don't delete the current
final Path currentIndexDirPath = new Path(currentIndexDir);
final FileSystem fs = fileSystem;
FileStatus[] oldIndexDirs = null;
try {
oldIndexDirs = fileSystem.listStatus(dataDirPath, new PathFilter() {
@Override
public boolean accept(Path path) {
boolean accept = false;
String pathName = path.getName();
try {
accept = fs.isDirectory(path) && !path.equals(currentIndexDirPath) && (pathName.equals("index") || pathName.matches(INDEX_W_TIMESTAMP_REGEX));
} catch (IOException e) {
LOG.error("Error checking if path {} is an old index directory, caused by: {}", path, e);
}
return accept;
}
});
} catch (IOException ioExc) {
LOG.error("Error checking for old index directories to clean-up.", ioExc);
}
List<Path> oldIndexPaths = new ArrayList<>(oldIndexDirs.length);
for (FileStatus ofs : oldIndexDirs) {
oldIndexPaths.add(ofs.getPath());
}
if (oldIndexDirs == null || oldIndexDirs.length == 0)
// nothing to clean-up
return;
Collections.sort(oldIndexPaths, Collections.reverseOrder());
Set<String> livePaths = getLivePaths();
int i = 0;
if (afterReload) {
LOG.info("Will not remove most recent old directory on reload {}", oldIndexDirs[0]);
i = 1;
}
LOG.info("Found {} old index directories to clean-up under {} afterReload={}", oldIndexDirs.length - i, dataDirPath, afterReload);
for (; i < oldIndexPaths.size(); i++) {
Path oldDirPath = oldIndexPaths.get(i);
if (livePaths.contains(oldDirPath.toString())) {
LOG.warn("Cannot delete directory {} because it is still being referenced in the cache.", oldDirPath);
} else {
try {
if (fileSystem.delete(oldDirPath, true)) {
LOG.info("Deleted old index directory {}", oldDirPath);
} else {
LOG.warn("Failed to delete old index directory {}", oldDirPath);
}
} catch (IOException e) {
LOG.error("Failed to delete old index directory {} due to: {}", oldDirPath, e);
}
}
}
}
Aggregations