use of org.apache.hadoop.fs.PathFilter in project gatk by broadinstitute.
the class ReadsSparkSource method getHeader.
/**
* Loads the header using Hadoop-BAM.
* @param filePath path to the bam.
* @param referencePath Reference path or null if not available. Reference is required for CRAM files.
* @return the header for the bam.
*/
public SAMFileHeader getHeader(final String filePath, final String referencePath) {
// GCS case
if (BucketUtils.isCloudStorageUrl(filePath)) {
try (ReadsDataSource readsDataSource = new ReadsDataSource(IOUtils.getPath(filePath))) {
return readsDataSource.getHeader();
}
}
// local file or HDFs case
try {
Path path = new Path(filePath);
FileSystem fs = path.getFileSystem(ctx.hadoopConfiguration());
if (fs.isDirectory(path)) {
FileStatus[] bamFiles = fs.listStatus(path, new PathFilter() {
private static final long serialVersionUID = 1L;
@Override
public boolean accept(Path path) {
return path.getName().startsWith(HADOOP_PART_PREFIX);
}
});
if (bamFiles.length == 0) {
throw new UserException("No BAM files to load header from in: " + path);
}
// Hadoop-BAM writes the same header to each shard, so use the first one
path = bamFiles[0].getPath();
}
setHadoopBAMConfigurationProperties(filePath, referencePath);
return SAMHeaderReader.readSAMHeaderFrom(path, ctx.hadoopConfiguration());
} catch (IOException | IllegalArgumentException e) {
throw new UserException("Failed to read bam header from " + filePath + "\n Caused by:" + e.getMessage(), e);
}
}
use of org.apache.hadoop.fs.PathFilter in project hbase by apache.
the class BackupUtils method getWALFilesOlderThan.
/**
* Get list of all old WAL files (WALs and archive)
* @param c configuration
* @param hostTimestampMap {host,timestamp} map
* @return list of WAL files
* @throws IOException exception
*/
public static List<String> getWALFilesOlderThan(final Configuration c, final HashMap<String, Long> hostTimestampMap) throws IOException {
Path rootDir = FSUtils.getRootDir(c);
Path logDir = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME);
Path oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME);
List<String> logFiles = new ArrayList<String>();
PathFilter filter = new PathFilter() {
@Override
public boolean accept(Path p) {
try {
if (AbstractFSWALProvider.isMetaFile(p)) {
return false;
}
String host = parseHostNameFromLogFile(p);
if (host == null) {
return false;
}
Long oldTimestamp = hostTimestampMap.get(host);
Long currentLogTS = BackupUtils.getCreationTime(p);
return currentLogTS <= oldTimestamp;
} catch (Exception e) {
LOG.warn("Can not parse" + p, e);
return false;
}
}
};
FileSystem fs = FileSystem.get(c);
logFiles = BackupUtils.getFiles(fs, logDir, logFiles, filter);
logFiles = BackupUtils.getFiles(fs, oldLogDir, logFiles, filter);
return logFiles;
}
use of org.apache.hadoop.fs.PathFilter in project hbase by apache.
the class WALSplitter method writeRegionSequenceIdFile.
/**
* Create a file with name as region open sequence id
* @param fs
* @param regiondir
* @param newSeqId
* @param saftyBumper
* @return long new sequence Id value
* @throws IOException
*/
public static long writeRegionSequenceIdFile(final FileSystem fs, final Path regiondir, long newSeqId, long saftyBumper) throws IOException {
Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(regiondir);
long maxSeqId = 0;
FileStatus[] files = null;
if (fs.exists(editsdir)) {
files = FSUtils.listStatus(fs, editsdir, new PathFilter() {
@Override
public boolean accept(Path p) {
return isSequenceIdFile(p);
}
});
if (files != null) {
for (FileStatus status : files) {
String fileName = status.getPath().getName();
try {
Long tmpSeqId = Long.parseLong(fileName.substring(0, fileName.length() - SEQUENCE_ID_FILE_SUFFIX_LENGTH));
maxSeqId = Math.max(tmpSeqId, maxSeqId);
} catch (NumberFormatException ex) {
LOG.warn("Invalid SeqId File Name=" + fileName);
}
}
}
}
if (maxSeqId > newSeqId) {
newSeqId = maxSeqId;
}
// bump up SeqId
newSeqId += saftyBumper;
// write a new seqId file
Path newSeqIdFile = new Path(editsdir, newSeqId + SEQUENCE_ID_FILE_SUFFIX);
if (newSeqId != maxSeqId) {
try {
if (!fs.createNewFile(newSeqIdFile) && !fs.exists(newSeqIdFile)) {
throw new IOException("Failed to create SeqId file:" + newSeqIdFile);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Wrote region seqId=" + newSeqIdFile + " to file, newSeqId=" + newSeqId + ", maxSeqId=" + maxSeqId);
}
} catch (FileAlreadyExistsException ignored) {
// latest hdfs throws this exception. it's all right if newSeqIdFile already exists
}
}
// remove old ones
if (files != null) {
for (FileStatus status : files) {
if (newSeqIdFile.equals(status.getPath())) {
continue;
}
fs.delete(status.getPath(), false);
}
}
return newSeqId;
}
use of org.apache.hadoop.fs.PathFilter in project hbase by apache.
the class TestDeleteColumnFamilyProcedureFromClient method deleteColumnFamilyTwice.
@Test
public void deleteColumnFamilyTwice() throws Exception {
Admin admin = TEST_UTIL.getAdmin();
HTableDescriptor beforehtd = admin.getTableDescriptor(TABLENAME);
String cfToDelete = "cf1";
FileSystem fs = TEST_UTIL.getDFSCluster().getFileSystem();
// 1 - Check if table exists in descriptor
assertTrue(admin.isTableAvailable(TABLENAME));
// 2 - Check if all the target column family exist in descriptor
HColumnDescriptor[] families = beforehtd.getColumnFamilies();
Boolean foundCF = false;
for (int i = 0; i < families.length; i++) {
if (families[i].getNameAsString().equals(cfToDelete)) {
foundCF = true;
break;
}
}
assertTrue(foundCF);
// 3 - Check if table exists in FS
Path tableDir = FSUtils.getTableDir(TEST_UTIL.getDefaultRootDirPath(), TABLENAME);
assertTrue(fs.exists(tableDir));
// 4 - Check if all the target column family exist in FS
FileStatus[] fileStatus = fs.listStatus(tableDir);
foundCF = false;
for (int i = 0; i < fileStatus.length; i++) {
if (fileStatus[i].isDirectory() == true) {
FileStatus[] cf = fs.listStatus(fileStatus[i].getPath(), new PathFilter() {
@Override
public boolean accept(Path p) {
if (p.getName().contains(HConstants.RECOVERED_EDITS_DIR)) {
return false;
}
return true;
}
});
for (int j = 0; j < cf.length; j++) {
if (cf[j].isDirectory() == true && cf[j].getPath().getName().equals(cfToDelete)) {
foundCF = true;
break;
}
}
}
if (foundCF) {
break;
}
}
assertTrue(foundCF);
// TEST - Disable and delete the column family
if (admin.isTableEnabled(TABLENAME)) {
admin.disableTable(TABLENAME);
}
admin.deleteColumnFamily(TABLENAME, Bytes.toBytes(cfToDelete));
// 5 - Check if the target column family is gone from the FS
fileStatus = fs.listStatus(tableDir);
for (int i = 0; i < fileStatus.length; i++) {
if (fileStatus[i].isDirectory() == true) {
FileStatus[] cf = fs.listStatus(fileStatus[i].getPath(), new PathFilter() {
@Override
public boolean accept(Path p) {
if (WALSplitter.isSequenceIdFile(p)) {
return false;
}
return true;
}
});
for (int j = 0; j < cf.length; j++) {
if (cf[j].isDirectory() == true) {
assertFalse(cf[j].getPath().getName().equals(cfToDelete));
}
}
}
}
try {
// Test: delete again
admin.deleteColumnFamily(TABLENAME, Bytes.toBytes(cfToDelete));
Assert.fail("Delete a non-exist column family should fail");
} catch (InvalidFamilyOperationException e) {
// Expected.
}
}
use of org.apache.hadoop.fs.PathFilter in project hbase by apache.
the class FSVisitor method visitRegionStoreFiles.
/**
* Iterate over the region store files
*
* @param fs {@link FileSystem}
* @param regionDir {@link Path} to the region directory
* @param visitor callback object to get the store files
* @throws IOException if an error occurred while scanning the directory
*/
public static void visitRegionStoreFiles(final FileSystem fs, final Path regionDir, final StoreFileVisitor visitor) throws IOException {
List<FileStatus> families = FSUtils.listStatusWithStatusFilter(fs, regionDir, new FSUtils.FamilyDirFilter(fs));
if (families == null) {
if (LOG.isTraceEnabled()) {
LOG.trace("No families under region directory:" + regionDir);
}
return;
}
PathFilter fileFilter = new FSUtils.FileFilter(fs);
for (FileStatus family : families) {
Path familyDir = family.getPath();
String familyName = familyDir.getName();
// get all the storeFiles in the family
FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir, fileFilter);
if (storeFiles == null) {
if (LOG.isTraceEnabled()) {
LOG.trace("No hfiles found for family: " + familyDir + ", skipping.");
}
continue;
}
for (FileStatus hfile : storeFiles) {
Path hfilePath = hfile.getPath();
visitor.storeFile(regionDir.getName(), familyName, hfilePath.getName());
}
}
}
Aggregations