use of org.apache.hadoop.fs.PathFilter in project presto by prestodb.
the class TestHiveFileIterator method testDefaultPathFilterWithRecursion.
@Test
public void testDefaultPathFilterWithRecursion() throws IOException {
// set up
File rootDir = createTempDir();
String basePath = rootDir.getAbsolutePath();
// create 8 files in root directory - 3 pathFilter matched and 5 non matched files.
createFiles(basePath, 3, true);
createFiles(basePath, 5, false);
// create two directories
List<File> subDirs = createDirs(basePath, 2);
// create 5 files in dir1 - 3 pathFilter matched and 2 non matched files.
String dir1 = subDirs.get(0).getAbsolutePath();
createFiles(dir1, 3, true);
createFiles(dir1, 2, false);
// create 7 files in dir2 - 3 pathFilter matched and 4 non matched files.
String dir2 = subDirs.get(1).getAbsolutePath();
createFiles(dir2, 3, true);
createFiles(dir2, 4, false);
Path rootPath = new Path("file://" + basePath + File.separator);
PathFilter pathFilter = path -> true;
HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), RECURSE, pathFilter);
int actualCount = Iterators.size(hiveFileIterator);
assertEquals(actualCount, 20);
// cleanup
deleteTestDir(rootDir);
}
use of org.apache.hadoop.fs.PathFilter in project presto by prestodb.
the class TestHiveFileIterator method testDefaultPathFilterNoRecursion.
@Test
public void testDefaultPathFilterNoRecursion() throws IOException {
// set up
File rootDir = createTempDir();
String basePath = rootDir.getAbsolutePath();
// create 8 files in root directory - 3 pathFilter matched and 5 non matched files.
createFiles(basePath, 3, true);
createFiles(basePath, 5, false);
Path rootPath = new Path("file://" + basePath + File.separator);
PathFilter pathFilter = path -> true;
HiveFileIterator hiveFileIterator = new HiveFileIterator(rootPath, listDirectoryOperation, new NamenodeStats(), IGNORED, pathFilter);
int actualCount = Iterators.size(hiveFileIterator);
assertEquals(actualCount, 8);
// cleanup
deleteTestDir(rootDir);
}
use of org.apache.hadoop.fs.PathFilter in project hbase by apache.
the class HFileArchiver method archiveRegion.
/**
* Remove an entire region from the table directory via archiving the region's hfiles.
* @param fs {@link FileSystem} from which to remove the region
* @param rootdir {@link Path} to the root directory where hbase files are stored (for building
* the archive path)
* @param tableDir {@link Path} to where the table is being stored (for building the archive path)
* @param regionDir {@link Path} to where a region is being stored (for building the archive path)
* @return <tt>true</tt> if the region was successfully deleted. <tt>false</tt> if the filesystem
* operations could not complete.
* @throws IOException if the request cannot be completed
*/
public static boolean archiveRegion(FileSystem fs, Path rootdir, Path tableDir, Path regionDir) throws IOException {
// make sure we can archive
if (tableDir == null || regionDir == null) {
LOG.error("No archive directory could be found because tabledir (" + tableDir + ") or regiondir (" + regionDir + "was null. Deleting files instead.");
if (regionDir != null) {
deleteRegionWithoutArchiving(fs, regionDir);
}
// the archived files correctly or not.
return false;
}
LOG.debug("ARCHIVING {}", regionDir);
// make sure the regiondir lives under the tabledir
Preconditions.checkArgument(regionDir.toString().startsWith(tableDir.toString()));
Path regionArchiveDir = HFileArchiveUtil.getRegionArchiveDir(rootdir, CommonFSUtils.getTableName(tableDir), regionDir.getName());
FileStatusConverter getAsFile = new FileStatusConverter(fs);
// otherwise, we attempt to archive the store files
// build collection of just the store directories to archive
Collection<File> toArchive = new ArrayList<>();
final PathFilter dirFilter = new FSUtils.DirFilter(fs);
PathFilter nonHidden = new PathFilter() {
@Override
public boolean accept(Path file) {
return dirFilter.accept(file) && !file.getName().startsWith(".");
}
};
FileStatus[] storeDirs = CommonFSUtils.listStatus(fs, regionDir, nonHidden);
// if there no files, we can just delete the directory and return;
if (storeDirs == null) {
LOG.debug("Directory {} empty.", regionDir);
return deleteRegionWithoutArchiving(fs, regionDir);
}
// convert the files in the region to a File
Stream.of(storeDirs).map(getAsFile).forEachOrdered(toArchive::add);
LOG.debug("Archiving " + toArchive);
List<File> failedArchive = resolveAndArchive(fs, regionArchiveDir, toArchive, EnvironmentEdgeManager.currentTime());
if (!failedArchive.isEmpty()) {
throw new FailedArchiveException("Failed to archive/delete all the files for region:" + regionDir.getName() + " into " + regionArchiveDir + ". Something is probably awry on the filesystem.", failedArchive.stream().map(FUNC_FILE_TO_PATH).collect(Collectors.toList()));
}
// if that was successful, then we delete the region
return deleteRegionWithoutArchiving(fs, regionDir);
}
use of org.apache.hadoop.fs.PathFilter in project hbase by apache.
the class WALSplitUtil method getSplitEditFilesSorted.
/**
* Returns sorted set of edit files made by splitter, excluding files with '.temp' suffix.
* @param walFS WAL FileSystem used to retrieving split edits files.
* @param regionDir WAL region dir to look for recovered edits files under.
* @return Files in passed <code>regionDir</code> as a sorted set.
*/
public static NavigableSet<Path> getSplitEditFilesSorted(final FileSystem walFS, final Path regionDir) throws IOException {
NavigableSet<Path> filesSorted = new TreeSet<>();
Path editsdir = getRegionDirRecoveredEditsDir(regionDir);
if (!walFS.exists(editsdir)) {
return filesSorted;
}
FileStatus[] files = CommonFSUtils.listStatus(walFS, editsdir, new PathFilter() {
@Override
public boolean accept(Path p) {
boolean result = false;
try {
// Return files and only files that match the editfile names pattern.
// There can be other files in this directory other than edit files.
// In particular, on error, we'll move aside the bad edit file giving
// it a timestamp suffix. See moveAsideBadEditsFile.
Matcher m = EDITFILES_NAME_PATTERN.matcher(p.getName());
result = walFS.isFile(p) && m.matches();
// because it means splitwal thread is writting this file.
if (p.getName().endsWith(RECOVERED_LOG_TMPFILE_SUFFIX)) {
result = false;
}
// Skip SeqId Files
if (isSequenceIdFile(p)) {
result = false;
}
} catch (IOException e) {
LOG.warn("Failed isFile check on {}", p, e);
}
return result;
}
});
if (ArrayUtils.isNotEmpty(files)) {
Arrays.asList(files).forEach(status -> filesSorted.add(status.getPath()));
}
return filesSorted;
}
use of org.apache.hadoop.fs.PathFilter in project hbase by apache.
the class FSUtils method getTableFragmentation.
/**
* Runs through the HBase rootdir and checks how many stores for each table
* have more than one file in them. Checks -ROOT- and hbase:meta too. The total
* percentage across all tables is stored under the special key "-TOTAL-".
*
* @param fs The file system to use
* @param hbaseRootDir The root directory to scan
* @return A map for each table and its percentage (never null)
* @throws IOException When scanning the directory fails
*/
public static Map<String, Integer> getTableFragmentation(final FileSystem fs, final Path hbaseRootDir) throws IOException {
Map<String, Integer> frags = new HashMap<>();
int cfCountTotal = 0;
int cfFragTotal = 0;
PathFilter regionFilter = new RegionDirFilter(fs);
PathFilter familyFilter = new FamilyDirFilter(fs);
List<Path> tableDirs = getTableDirs(fs, hbaseRootDir);
for (Path d : tableDirs) {
int cfCount = 0;
int cfFrag = 0;
FileStatus[] regionDirs = fs.listStatus(d, regionFilter);
for (FileStatus regionDir : regionDirs) {
Path dd = regionDir.getPath();
// else its a region name, now look in region for families
FileStatus[] familyDirs = fs.listStatus(dd, familyFilter);
for (FileStatus familyDir : familyDirs) {
cfCount++;
cfCountTotal++;
Path family = familyDir.getPath();
// now in family make sure only one file
FileStatus[] familyStatus = fs.listStatus(family);
if (familyStatus.length > 1) {
cfFrag++;
cfFragTotal++;
}
}
}
// compute percentage per table and store in result list
frags.put(CommonFSUtils.getTableName(d).getNameAsString(), cfCount == 0 ? 0 : Math.round((float) cfFrag / cfCount * 100));
}
// set overall percentage for all tables
frags.put("-TOTAL-", cfCountTotal == 0 ? 0 : Math.round((float) cfFragTotal / cfCountTotal * 100));
return frags;
}
Aggregations