Examples with PathFilter - org.apache.hadoop.fs.PathFilter

Example 51 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class TestDeleteColumnFamilyProcedureFromClient method deleteColumnFamilyWithMultipleRegions.

@Test
public void deleteColumnFamilyWithMultipleRegions() throws Exception {
    Admin admin = TEST_UTIL.getAdmin();
    TableDescriptor beforehtd = admin.getDescriptor(TABLENAME);
    FileSystem fs = TEST_UTIL.getDFSCluster().getFileSystem();
    // 1 - Check if table exists in descriptor
    assertTrue(admin.isTableAvailable(TABLENAME));
    // 2 - Check if all three families exist in descriptor
    assertEquals(3, beforehtd.getColumnFamilyCount());
    ColumnFamilyDescriptor[] families = beforehtd.getColumnFamilies();
    for (int i = 0; i < families.length; i++) {
        assertTrue(families[i].getNameAsString().equals("cf" + (i + 1)));
    }
    // 3 - Check if table exists in FS
    Path tableDir = CommonFSUtils.getTableDir(TEST_UTIL.getDefaultRootDirPath(), TABLENAME);
    assertTrue(fs.exists(tableDir));
    // 4 - Check if all the 3 column families exist in FS
    FileStatus[] fileStatus = fs.listStatus(tableDir);
    for (int i = 0; i < fileStatus.length; i++) {
        if (fileStatus[i].isDirectory() == true) {
            FileStatus[] cf = fs.listStatus(fileStatus[i].getPath(), new PathFilter() {

                @Override
                public boolean accept(Path p) {
                    if (p.getName().contains(HConstants.RECOVERED_EDITS_DIR)) {
                        return false;
                    }
                    return true;
                }
            });
            int k = 1;
            for (int j = 0; j < cf.length; j++) {
                if (cf[j].isDirectory() == true && cf[j].getPath().getName().startsWith(".") == false) {
                    assertEquals(cf[j].getPath().getName(), "cf" + k);
                    k++;
                }
            }
        }
    }
    // TEST - Disable and delete the column family
    admin.disableTable(TABLENAME);
    admin.deleteColumnFamily(TABLENAME, Bytes.toBytes("cf2"));
    // 5 - Check if only 2 column families exist in the descriptor
    TableDescriptor afterhtd = admin.getDescriptor(TABLENAME);
    assertEquals(2, afterhtd.getColumnFamilyCount());
    ColumnFamilyDescriptor[] newFamilies = afterhtd.getColumnFamilies();
    assertTrue(newFamilies[0].getNameAsString().equals("cf1"));
    assertTrue(newFamilies[1].getNameAsString().equals("cf3"));
    // 6 - Check if the second column family is gone from the FS
    fileStatus = fs.listStatus(tableDir);
    for (int i = 0; i < fileStatus.length; i++) {
        if (fileStatus[i].isDirectory() == true) {
            FileStatus[] cf = fs.listStatus(fileStatus[i].getPath(), new PathFilter() {

                @Override
                public boolean accept(Path p) {
                    if (WALSplitUtil.isSequenceIdFile(p)) {
                        return false;
                    }
                    return true;
                }
            });
            for (int j = 0; j < cf.length; j++) {
                if (cf[j].isDirectory() == true) {
                    assertFalse(cf[j].getPath().getName().equals("cf2"));
                }
            }
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) Admin(org.apache.hadoop.hbase.client.Admin) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor) Test(org.junit.Test)

Example 52 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class FSUtils method getRegionLocalityMappingFromFS.

/**
 * This function is to scan the root path of the file system to get either the
 * mapping between the region name and its best locality region server or the
 * degree of locality of each region on each of the servers having at least
 * one block of that region. The output map parameters are both optional.
 *
 * @param conf
 *          the configuration to use
 * @param desiredTable
 *          the table you wish to scan locality for
 * @param threadPoolSize
 *          the thread pool size to use
 * @param regionDegreeLocalityMapping
 *          the map into which to put the locality degree mapping or null,
 *          must be a thread-safe implementation
 * @throws IOException
 *           in case of file system errors or interrupts
 */
private static void getRegionLocalityMappingFromFS(final Configuration conf, final String desiredTable, int threadPoolSize, final Map<String, Map<String, Float>> regionDegreeLocalityMapping) throws IOException {
    final FileSystem fs = FileSystem.get(conf);
    final Path rootPath = CommonFSUtils.getRootDir(conf);
    final long startTime = EnvironmentEdgeManager.currentTime();
    final Path queryPath;
    // The table files are in ${hbase.rootdir}/data/<namespace>/<table>/*
    if (null == desiredTable) {
        queryPath = new Path(new Path(rootPath, HConstants.BASE_NAMESPACE_DIR).toString() + "/*/*/*/");
    } else {
        queryPath = new Path(CommonFSUtils.getTableDir(rootPath, TableName.valueOf(desiredTable)).toString() + "/*/");
    }
    // reject all paths that are not appropriate
    PathFilter pathFilter = new PathFilter() {

        @Override
        public boolean accept(Path path) {
            // this is the region name; it may get some noise data
            if (null == path) {
                return false;
            }
            // no parent?
            Path parent = path.getParent();
            if (null == parent) {
                return false;
            }
            String regionName = path.getName();
            if (null == regionName) {
                return false;
            }
            if (!regionName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
                return false;
            }
            return true;
        }
    };
    FileStatus[] statusList = fs.globStatus(queryPath, pathFilter);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Query Path: {} ; # list of files: {}", queryPath, Arrays.toString(statusList));
    }
    if (null == statusList) {
        return;
    }
    // lower the number of threads in case we have very few expected regions
    threadPoolSize = Math.min(threadPoolSize, statusList.length);
    // run in multiple threads
    final ExecutorService tpe = Executors.newFixedThreadPool(threadPoolSize, new ThreadFactoryBuilder().setNameFormat("FSRegionQuery-pool-%d").setDaemon(true).setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
    try {
        // ignore all file status items that are not of interest
        for (FileStatus regionStatus : statusList) {
            if (null == regionStatus || !regionStatus.isDirectory()) {
                continue;
            }
            final Path regionPath = regionStatus.getPath();
            if (null != regionPath) {
                tpe.execute(new FSRegionScanner(fs, regionPath, null, regionDegreeLocalityMapping));
            }
        }
    } finally {
        tpe.shutdown();
        final long threadWakeFrequency = (long) conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
        try {
            // exceptions in the execution of the threads
            while (!tpe.awaitTermination(threadWakeFrequency, TimeUnit.MILLISECONDS)) {
                // printing out rough estimate, so as to not introduce
                // AtomicInteger
                LOG.info("Locality checking is underway: { Scanned Regions : " + ((ThreadPoolExecutor) tpe).getCompletedTaskCount() + "/" + ((ThreadPoolExecutor) tpe).getTaskCount() + " }");
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        }
    }
    long overhead = EnvironmentEdgeManager.currentTime() - startTime;
    LOG.info("Scan DFS for locality info takes {}ms", overhead);
}

Also used : Path(org.apache.hadoop.fs.Path) InterruptedIOException(java.io.InterruptedIOException) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) HFileSystem(org.apache.hadoop.hbase.fs.HFileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) ExecutorService(java.util.concurrent.ExecutorService) ThreadFactoryBuilder(org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder)

Example 53 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class TestHFileArchiving method testDeleteRegionWithNoStoreFiles.

/**
 * Test that the region directory is removed when we archive a region without store files, but
 * still has hidden files.
 * @throws IOException throws an IOException if there's problem creating a table
 *   or if there's an issue with accessing FileSystem.
 */
@Test
public void testDeleteRegionWithNoStoreFiles() throws IOException {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    UTIL.createTable(tableName, TEST_FAM);
    // get the current store files for the region
    List<HRegion> servingRegions = UTIL.getHBaseCluster().getRegions(tableName);
    // make sure we only have 1 region serving this table
    assertEquals(1, servingRegions.size());
    HRegion region = servingRegions.get(0);
    FileSystem fs = region.getRegionFileSystem().getFileSystem();
    // make sure there are some files in the regiondir
    Path rootDir = CommonFSUtils.getRootDir(fs.getConf());
    Path regionDir = FSUtils.getRegionDirFromRootDir(rootDir, region.getRegionInfo());
    FileStatus[] regionFiles = CommonFSUtils.listStatus(fs, regionDir, null);
    Assert.assertNotNull("No files in the region directory", regionFiles);
    if (LOG.isDebugEnabled()) {
        List<Path> files = new ArrayList<>();
        for (FileStatus file : regionFiles) {
            files.add(file.getPath());
        }
        LOG.debug("Current files:" + files);
    }
    // delete the visible folders so we just have hidden files/folders
    final PathFilter dirFilter = new FSUtils.DirFilter(fs);
    PathFilter nonHidden = new PathFilter() {

        @Override
        public boolean accept(Path file) {
            return dirFilter.accept(file) && !file.getName().startsWith(".");
        }
    };
    FileStatus[] storeDirs = CommonFSUtils.listStatus(fs, regionDir, nonHidden);
    for (FileStatus store : storeDirs) {
        LOG.debug("Deleting store for test");
        fs.delete(store.getPath(), true);
    }
    // then archive the region
    HFileArchiver.archiveRegion(UTIL.getConfiguration(), fs, region.getRegionInfo());
    // and check to make sure the region directoy got deleted
    assertFalse("Region directory (" + regionDir + "), still exists.", fs.exists(regionDir));
    UTIL.deleteTable(tableName);
}

Also used : Path(org.apache.hadoop.fs.Path) TableName(org.apache.hadoop.hbase.TableName) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 54 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class TestWALSplit method getLogForRegion.

private Path[] getLogForRegion(TableName table, String region) throws IOException {
    Path tdir = CommonFSUtils.getWALTableDir(conf, table);
    @SuppressWarnings("deprecation") Path editsdir = WALSplitUtil.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, Bytes.toString(Bytes.toBytes(region))));
    FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {

        @Override
        public boolean accept(Path p) {
            if (WALSplitUtil.isSequenceIdFile(p)) {
                return false;
            }
            return true;
        }
    });
    Path[] paths = new Path[files.length];
    for (int i = 0; i < files.length; i++) {
        paths[i] = files[i].getPath();
    }
    return paths;
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus)

Example 55 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class TestReadWriteSeqIdFiles method test.

@Test
public void test() throws IOException {
    WALSplitUtil.writeRegionSequenceIdFile(walFS, REGION_DIR, 1000L);
    assertEquals(1000L, WALSplitUtil.getMaxRegionSequenceId(walFS, REGION_DIR));
    WALSplitUtil.writeRegionSequenceIdFile(walFS, REGION_DIR, 2000L);
    assertEquals(2000L, WALSplitUtil.getMaxRegionSequenceId(walFS, REGION_DIR));
    // can not write a sequence id which is smaller
    try {
        WALSplitUtil.writeRegionSequenceIdFile(walFS, REGION_DIR, 1500L);
    } catch (IOException e) {
        // expected
        LOG.info("Expected error", e);
    }
    Path editsdir = WALSplitUtil.getRegionDirRecoveredEditsDir(REGION_DIR);
    FileStatus[] files = CommonFSUtils.listStatus(walFS, editsdir, new PathFilter() {

        @Override
        public boolean accept(Path p) {
            return WALSplitUtil.isSequenceIdFile(p);
        }
    });
    // only one seqid file should exist
    assertEquals(1, files.length);
    // verify all seqId files aren't treated as recovered.edits files
    NavigableSet<Path> recoveredEdits = WALSplitUtil.getSplitEditFilesSorted(walFS, REGION_DIR);
    assertEquals(0, recoveredEdits.size());
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

PathFilter (org.apache.hadoop.fs.PathFilter)123 Path (org.apache.hadoop.fs.Path)114 FileStatus (org.apache.hadoop.fs.FileStatus)96 Test (org.junit.Test)47 IOException (java.io.IOException)42 FileSystem (org.apache.hadoop.fs.FileSystem)39 ArrayList (java.util.ArrayList)22 List (java.util.List)19 Configuration (org.apache.hadoop.conf.Configuration)18 Collections (java.util.Collections)11 BufferedReader (java.io.BufferedReader)9 InputStreamReader (java.io.InputStreamReader)9 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)9 Assert.assertEquals (org.junit.Assert.assertEquals)9 Assert.assertTrue (org.junit.Assert.assertTrue)9 URI (java.net.URI)8 Test (org.testng.annotations.Test)8 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)7 IGNORED (com.facebook.presto.hive.NestedDirectoryPolicy.IGNORED)6 RECURSE (com.facebook.presto.hive.NestedDirectoryPolicy.RECURSE)6