Examples with PathFilter - org.apache.hadoop.fs.PathFilter

Example 21 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project gatk by broadinstitute.

the class ReadsSparkSource method getHeader.

/**
     * Loads the header using Hadoop-BAM.
     * @param filePath path to the bam.
     * @param referencePath Reference path or null if not available. Reference is required for CRAM files.
     * @return the header for the bam.
     */
public SAMFileHeader getHeader(final String filePath, final String referencePath) {
    // GCS case
    if (BucketUtils.isCloudStorageUrl(filePath)) {
        try (ReadsDataSource readsDataSource = new ReadsDataSource(IOUtils.getPath(filePath))) {
            return readsDataSource.getHeader();
        }
    }
    // local file or HDFs case
    try {
        Path path = new Path(filePath);
        FileSystem fs = path.getFileSystem(ctx.hadoopConfiguration());
        if (fs.isDirectory(path)) {
            FileStatus[] bamFiles = fs.listStatus(path, new PathFilter() {

                private static final long serialVersionUID = 1L;

                @Override
                public boolean accept(Path path) {
                    return path.getName().startsWith(HADOOP_PART_PREFIX);
                }
            });
            if (bamFiles.length == 0) {
                throw new UserException("No BAM files to load header from in: " + path);
            }
            // Hadoop-BAM writes the same header to each shard, so use the first one
            path = bamFiles[0].getPath();
        }
        setHadoopBAMConfigurationProperties(filePath, referencePath);
        return SAMHeaderReader.readSAMHeaderFrom(path, ctx.hadoopConfiguration());
    } catch (IOException | IllegalArgumentException e) {
        throw new UserException("Failed to read bam header from " + filePath + "\n Caused by:" + e.getMessage(), e);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) ReadsDataSource(org.broadinstitute.hellbender.engine.ReadsDataSource) UserException(org.broadinstitute.hellbender.exceptions.UserException) IOException(java.io.IOException)

Example 22 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class BackupUtils method getWALFilesOlderThan.

/**
   * Get list of all old WAL files (WALs and archive)
   * @param c configuration
   * @param hostTimestampMap {host,timestamp} map
   * @return list of WAL files
   * @throws IOException exception
   */
public static List<String> getWALFilesOlderThan(final Configuration c, final HashMap<String, Long> hostTimestampMap) throws IOException {
    Path rootDir = FSUtils.getRootDir(c);
    Path logDir = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME);
    Path oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME);
    List<String> logFiles = new ArrayList<String>();
    PathFilter filter = new PathFilter() {

        @Override
        public boolean accept(Path p) {
            try {
                if (AbstractFSWALProvider.isMetaFile(p)) {
                    return false;
                }
                String host = parseHostNameFromLogFile(p);
                if (host == null) {
                    return false;
                }
                Long oldTimestamp = hostTimestampMap.get(host);
                Long currentLogTS = BackupUtils.getCreationTime(p);
                return currentLogTS <= oldTimestamp;
            } catch (Exception e) {
                LOG.warn("Can not parse" + p, e);
                return false;
            }
        }
    };
    FileSystem fs = FileSystem.get(c);
    logFiles = BackupUtils.getFiles(fs, logDir, logFiles, filter);
    logFiles = BackupUtils.getFiles(fs, oldLogDir, logFiles, filter);
    return logFiles;
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileSystem(org.apache.hadoop.fs.FileSystem) HBackupFileSystem(org.apache.hadoop.hbase.backup.HBackupFileSystem) ArrayList(java.util.ArrayList) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException)

Example 23 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class WALSplitter method writeRegionSequenceIdFile.

/**
   * Create a file with name as region open sequence id
   * @param fs
   * @param regiondir
   * @param newSeqId
   * @param saftyBumper
   * @return long new sequence Id value
   * @throws IOException
   */
public static long writeRegionSequenceIdFile(final FileSystem fs, final Path regiondir, long newSeqId, long saftyBumper) throws IOException {
    Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(regiondir);
    long maxSeqId = 0;
    FileStatus[] files = null;
    if (fs.exists(editsdir)) {
        files = FSUtils.listStatus(fs, editsdir, new PathFilter() {

            @Override
            public boolean accept(Path p) {
                return isSequenceIdFile(p);
            }
        });
        if (files != null) {
            for (FileStatus status : files) {
                String fileName = status.getPath().getName();
                try {
                    Long tmpSeqId = Long.parseLong(fileName.substring(0, fileName.length() - SEQUENCE_ID_FILE_SUFFIX_LENGTH));
                    maxSeqId = Math.max(tmpSeqId, maxSeqId);
                } catch (NumberFormatException ex) {
                    LOG.warn("Invalid SeqId File Name=" + fileName);
                }
            }
        }
    }
    if (maxSeqId > newSeqId) {
        newSeqId = maxSeqId;
    }
    // bump up SeqId
    newSeqId += saftyBumper;
    // write a new seqId file
    Path newSeqIdFile = new Path(editsdir, newSeqId + SEQUENCE_ID_FILE_SUFFIX);
    if (newSeqId != maxSeqId) {
        try {
            if (!fs.createNewFile(newSeqIdFile) && !fs.exists(newSeqIdFile)) {
                throw new IOException("Failed to create SeqId file:" + newSeqIdFile);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Wrote region seqId=" + newSeqIdFile + " to file, newSeqId=" + newSeqId + ", maxSeqId=" + maxSeqId);
            }
        } catch (FileAlreadyExistsException ignored) {
        // latest hdfs throws this exception. it's all right if newSeqIdFile already exists
        }
    }
    // remove old ones
    if (files != null) {
        for (FileStatus status : files) {
            if (newSeqIdFile.equals(status.getPath())) {
                continue;
            }
            fs.delete(status.getPath(), false);
        }
    }
    return newSeqId;
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileAlreadyExistsException(org.apache.hadoop.fs.FileAlreadyExistsException) FileStatus(org.apache.hadoop.fs.FileStatus) AtomicLong(java.util.concurrent.atomic.AtomicLong) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) MultipleIOException(org.apache.hadoop.io.MultipleIOException)

Example 24 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class TestDeleteColumnFamilyProcedureFromClient method deleteColumnFamilyTwice.

@Test
public void deleteColumnFamilyTwice() throws Exception {
    Admin admin = TEST_UTIL.getAdmin();
    HTableDescriptor beforehtd = admin.getTableDescriptor(TABLENAME);
    String cfToDelete = "cf1";
    FileSystem fs = TEST_UTIL.getDFSCluster().getFileSystem();
    // 1 - Check if table exists in descriptor
    assertTrue(admin.isTableAvailable(TABLENAME));
    // 2 - Check if all the target column family exist in descriptor
    HColumnDescriptor[] families = beforehtd.getColumnFamilies();
    Boolean foundCF = false;
    for (int i = 0; i < families.length; i++) {
        if (families[i].getNameAsString().equals(cfToDelete)) {
            foundCF = true;
            break;
        }
    }
    assertTrue(foundCF);
    // 3 - Check if table exists in FS
    Path tableDir = FSUtils.getTableDir(TEST_UTIL.getDefaultRootDirPath(), TABLENAME);
    assertTrue(fs.exists(tableDir));
    // 4 - Check if all the target column family exist in FS
    FileStatus[] fileStatus = fs.listStatus(tableDir);
    foundCF = false;
    for (int i = 0; i < fileStatus.length; i++) {
        if (fileStatus[i].isDirectory() == true) {
            FileStatus[] cf = fs.listStatus(fileStatus[i].getPath(), new PathFilter() {

                @Override
                public boolean accept(Path p) {
                    if (p.getName().contains(HConstants.RECOVERED_EDITS_DIR)) {
                        return false;
                    }
                    return true;
                }
            });
            for (int j = 0; j < cf.length; j++) {
                if (cf[j].isDirectory() == true && cf[j].getPath().getName().equals(cfToDelete)) {
                    foundCF = true;
                    break;
                }
            }
        }
        if (foundCF) {
            break;
        }
    }
    assertTrue(foundCF);
    // TEST - Disable and delete the column family
    if (admin.isTableEnabled(TABLENAME)) {
        admin.disableTable(TABLENAME);
    }
    admin.deleteColumnFamily(TABLENAME, Bytes.toBytes(cfToDelete));
    // 5 - Check if the target column family is gone from the FS
    fileStatus = fs.listStatus(tableDir);
    for (int i = 0; i < fileStatus.length; i++) {
        if (fileStatus[i].isDirectory() == true) {
            FileStatus[] cf = fs.listStatus(fileStatus[i].getPath(), new PathFilter() {

                @Override
                public boolean accept(Path p) {
                    if (WALSplitter.isSequenceIdFile(p)) {
                        return false;
                    }
                    return true;
                }
            });
            for (int j = 0; j < cf.length; j++) {
                if (cf[j].isDirectory() == true) {
                    assertFalse(cf[j].getPath().getName().equals(cfToDelete));
                }
            }
        }
    }
    try {
        // Test: delete again
        admin.deleteColumnFamily(TABLENAME, Bytes.toBytes(cfToDelete));
        Assert.fail("Delete a non-exist column family should fail");
    } catch (InvalidFamilyOperationException e) {
    // Expected.
    }
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) Admin(org.apache.hadoop.hbase.client.Admin) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) InvalidFamilyOperationException(org.apache.hadoop.hbase.InvalidFamilyOperationException) Test(org.junit.Test)

Example 25 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class FSVisitor method visitRegionStoreFiles.

/**
   * Iterate over the region store files
   *
   * @param fs {@link FileSystem}
   * @param regionDir {@link Path} to the region directory
   * @param visitor callback object to get the store files
   * @throws IOException if an error occurred while scanning the directory
   */
public static void visitRegionStoreFiles(final FileSystem fs, final Path regionDir, final StoreFileVisitor visitor) throws IOException {
    List<FileStatus> families = FSUtils.listStatusWithStatusFilter(fs, regionDir, new FSUtils.FamilyDirFilter(fs));
    if (families == null) {
        if (LOG.isTraceEnabled()) {
            LOG.trace("No families under region directory:" + regionDir);
        }
        return;
    }
    PathFilter fileFilter = new FSUtils.FileFilter(fs);
    for (FileStatus family : families) {
        Path familyDir = family.getPath();
        String familyName = familyDir.getName();
        // get all the storeFiles in the family
        FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir, fileFilter);
        if (storeFiles == null) {
            if (LOG.isTraceEnabled()) {
                LOG.trace("No hfiles found for family: " + familyDir + ", skipping.");
            }
            continue;
        }
        for (FileStatus hfile : storeFiles) {
            Path hfilePath = hfile.getPath();
            visitor.storeFile(regionDir.getName(), familyName, hfilePath.getName());
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus)

Aggregations

PathFilter (org.apache.hadoop.fs.PathFilter)43 Path (org.apache.hadoop.fs.Path)41 FileStatus (org.apache.hadoop.fs.FileStatus)37 FileSystem (org.apache.hadoop.fs.FileSystem)18 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)11 Test (org.junit.Test)8 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 InterruptedIOException (java.io.InterruptedIOException)4 Configuration (org.apache.hadoop.conf.Configuration)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Table (org.apache.hadoop.hbase.client.Table)3 HRegion (org.apache.hadoop.hbase.regionserver.HRegion)3 ZooKeeperWatcher (org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher)3 URI (java.net.URI)2 HashMap (java.util.HashMap)2 ExecutionException (java.util.concurrent.ExecutionException)2 Exchange (org.apache.camel.Exchange)2 Message (org.apache.camel.Message)2 DefaultMessage (org.apache.camel.impl.DefaultMessage)2