Examples with PathFilter - org.apache.hadoop.fs.PathFilter

Example 11 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class TestHFileArchiving method testRemovesRegionDirOnArchive.

@Test
public void testRemovesRegionDirOnArchive() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    UTIL.createTable(tableName, TEST_FAM);
    final Admin admin = UTIL.getAdmin();
    // get the current store files for the region
    List<HRegion> servingRegions = UTIL.getHBaseCluster().getRegions(tableName);
    // make sure we only have 1 region serving this table
    assertEquals(1, servingRegions.size());
    HRegion region = servingRegions.get(0);
    // and load the table
    UTIL.loadRegion(region, TEST_FAM);
    // shutdown the table so we can manipulate the files
    admin.disableTable(tableName);
    FileSystem fs = UTIL.getTestFileSystem();
    // now attempt to depose the region
    Path rootDir = region.getRegionFileSystem().getTableDir().getParent();
    Path regionDir = HRegion.getRegionDir(rootDir, region.getRegionInfo());
    HFileArchiver.archiveRegion(UTIL.getConfiguration(), fs, region.getRegionInfo());
    // check for the existence of the archive directory and some files in it
    Path archiveDir = HFileArchiveTestingUtil.getRegionArchiveDir(UTIL.getConfiguration(), region);
    assertTrue(fs.exists(archiveDir));
    // check to make sure the store directory was copied
    // check to make sure the store directory was copied
    FileStatus[] stores = fs.listStatus(archiveDir, new PathFilter() {

        @Override
        public boolean accept(Path p) {
            if (p.getName().contains(HConstants.RECOVERED_EDITS_DIR)) {
                return false;
            }
            return true;
        }
    });
    assertTrue(stores.length == 1);
    // make sure we archived the store files
    FileStatus[] storeFiles = fs.listStatus(stores[0].getPath());
    assertTrue(storeFiles.length > 0);
    // then ensure the region's directory isn't present
    assertFalse(fs.exists(regionDir));
    UTIL.deleteTable(tableName);
}

Also used : Path(org.apache.hadoop.fs.Path) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) FileSystem(org.apache.hadoop.fs.FileSystem) Admin(org.apache.hadoop.hbase.client.Admin) Test(org.junit.Test)

Example 12 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class FSUtils method getTableFragmentation.

/**
   * Runs through the HBase rootdir and checks how many stores for each table
   * have more than one file in them. Checks -ROOT- and hbase:meta too. The total
   * percentage across all tables is stored under the special key "-TOTAL-".
   *
   * @param fs  The file system to use.
   * @param hbaseRootDir  The root directory to scan.
   * @return A map for each table and its percentage.
   * @throws IOException When scanning the directory fails.
   */
public static Map<String, Integer> getTableFragmentation(final FileSystem fs, final Path hbaseRootDir) throws IOException {
    Map<String, Integer> frags = new HashMap<>();
    int cfCountTotal = 0;
    int cfFragTotal = 0;
    PathFilter regionFilter = new RegionDirFilter(fs);
    PathFilter familyFilter = new FamilyDirFilter(fs);
    List<Path> tableDirs = getTableDirs(fs, hbaseRootDir);
    for (Path d : tableDirs) {
        int cfCount = 0;
        int cfFrag = 0;
        FileStatus[] regionDirs = fs.listStatus(d, regionFilter);
        for (FileStatus regionDir : regionDirs) {
            Path dd = regionDir.getPath();
            // else its a region name, now look in region for families
            FileStatus[] familyDirs = fs.listStatus(dd, familyFilter);
            for (FileStatus familyDir : familyDirs) {
                cfCount++;
                cfCountTotal++;
                Path family = familyDir.getPath();
                // now in family make sure only one file
                FileStatus[] familyStatus = fs.listStatus(family);
                if (familyStatus.length > 1) {
                    cfFrag++;
                    cfFragTotal++;
                }
            }
        }
        // compute percentage per table and store in result list
        frags.put(FSUtils.getTableName(d).getNameAsString(), cfCount == 0 ? 0 : Math.round((float) cfFrag / cfCount * 100));
    }
    // set overall percentage for all tables
    frags.put("-TOTAL-", cfCountTotal == 0 ? 0 : Math.round((float) cfFragTotal / cfCountTotal * 100));
    return frags;
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Example 13 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hive by apache.

the class FSStatsAggregator method connect.

@Override
public boolean connect(StatsCollectionContext scc) {
    List<String> statsDirs = scc.getStatsTmpDirs();
    assert statsDirs.size() == 1 : "Found multiple stats dirs: " + statsDirs;
    Path statsDir = new Path(statsDirs.get(0));
    LOG.debug("About to read stats from : " + statsDir);
    statsMap = new HashMap<String, Map<String, String>>();
    try {
        fs = statsDir.getFileSystem(scc.getHiveConf());
        statsList = new ArrayList<Map<String, Map<String, String>>>();
        FileStatus[] status = fs.listStatus(statsDir, new PathFilter() {

            @Override
            public boolean accept(Path file) {
                return file.getName().startsWith(StatsSetupConst.STATS_FILE_PREFIX);
            }
        });
        for (FileStatus file : status) {
            Input in = new Input(fs.open(file.getPath()));
            Kryo kryo = SerializationUtilities.borrowKryo();
            try {
                statsMap = kryo.readObject(in, statsMap.getClass());
            } finally {
                SerializationUtilities.releaseKryo(kryo);
            }
            LOG.info("Read stats : " + statsMap);
            statsList.add(statsMap);
            in.close();
        }
        return true;
    } catch (IOException e) {
        LOG.error("Failed to read stats from filesystem ", e);
        return false;
    }
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) Input(com.esotericsoftware.kryo.io.Input) HashMap(java.util.HashMap) Map(java.util.Map) Kryo(com.esotericsoftware.kryo.Kryo)

Example 14 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class TestDistributedLogSplitting method testLogReplayForDisablingTable.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testLogReplayForDisablingTable() throws Exception {
    LOG.info("testLogReplayForDisablingTable");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    startCluster(NUM_RS);
    final int NUM_REGIONS_TO_CREATE = 40;
    final int NUM_LOG_LINES = 1000;
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
    Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
    try {
        // turn off load balancing to prevent regions from moving around otherwise
        // they will consume recovered.edits
        master.balanceSwitch(false);
        List<HRegionInfo> regions = null;
        HRegionServer hrs = null;
        boolean hasRegionsForBothTables = false;
        String tableName = null;
        for (int i = 0; i < NUM_RS; i++) {
            tableName = null;
            hasRegionsForBothTables = false;
            boolean isCarryingSystem = false;
            hrs = rsts.get(i).getRegionServer();
            regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            for (HRegionInfo region : regions) {
                if (region.getTable().isSystemTable()) {
                    isCarryingSystem = true;
                    break;
                }
                if (tableName != null && !tableName.equalsIgnoreCase(region.getTable().getNameAsString())) {
                    // make sure that we find a RS has online regions for both "table" and "disableTable"
                    hasRegionsForBothTables = true;
                    break;
                } else if (tableName == null) {
                    tableName = region.getTable().getNameAsString();
                }
            }
            if (isCarryingSystem) {
                continue;
            }
            if (hasRegionsForBothTables) {
                break;
            }
        }
        // make sure we found a good RS
        Assert.assertTrue(hasRegionsForBothTables);
        LOG.info("#regions = " + regions.size());
        Iterator<HRegionInfo> it = regions.iterator();
        while (it.hasNext()) {
            HRegionInfo region = it.next();
            if (region.isMetaTable()) {
                it.remove();
            }
        }
        makeWAL(hrs, regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
        makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
        LOG.info("Disabling table\n");
        TEST_UTIL.getAdmin().disableTable(TableName.valueOf(name.getMethodName()));
        TEST_UTIL.waitTableDisabled(TableName.valueOf(name.getMethodName()).getName());
        // abort RS
        LOG.info("Aborting region server: " + hrs.getServerName());
        hrs.abort("testing");
        // wait for abort completes
        TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
            }
        });
        // wait for regions come online
        TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (HBaseTestingUtility.getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
            }
        });
        // wait for all regions are fully recovered
        TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
                ServerManager serverManager = master.getServerManager();
                return (!serverManager.areDeadServersInProgress() && recoveringRegions != null && recoveringRegions.isEmpty());
            }
        });
        int count = 0;
        FileSystem fs = master.getMasterFileSystem().getFileSystem();
        Path rootdir = FSUtils.getRootDir(conf);
        Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf(name.getMethodName()));
        for (HRegionInfo hri : regions) {
            Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
            LOG.debug("checking edits dir " + editsdir);
            if (!fs.exists(editsdir))
                continue;
            FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {

                @Override
                public boolean accept(Path p) {
                    if (WALSplitter.isSequenceIdFile(p)) {
                        return false;
                    }
                    return true;
                }
            });
            if (files != null) {
                for (FileStatus file : files) {
                    int c = countWAL(file.getPath(), fs, conf);
                    count += c;
                    LOG.info(c + " edits in " + file.getPath());
                }
            }
        }
        LOG.info("Verify edits in recovered.edits files");
        assertEquals(NUM_LOG_LINES, count);
        LOG.info("Verify replayed edits");
        assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
        // clean up
        for (HRegionInfo hri : regions) {
            Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
            fs.delete(editsdir, true);
        }
        disablingHT.close();
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Waiter(org.apache.hadoop.hbase.Waiter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 15 with PathFilter

use of org.apache.hadoop.fs.PathFilter in project hbase by apache.

the class TestDistributedLogSplitting method testRecoveredEdits.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testRecoveredEdits() throws Exception {
    LOG.info("testRecoveredEdits");
    // create more than one wal
    conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024);
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
    startCluster(NUM_RS);
    final int NUM_LOG_LINES = 1000;
    final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    FileSystem fs = master.getMasterFileSystem().getFileSystem();
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    Path rootdir = FSUtils.getRootDir(conf);
    Table t = installTable(new ZooKeeperWatcher(conf, "table-creation", null), "table", "family", 40);
    try {
        TableName table = t.getName();
        List<HRegionInfo> regions = null;
        HRegionServer hrs = null;
        for (int i = 0; i < NUM_RS; i++) {
            boolean foundRs = false;
            hrs = rsts.get(i).getRegionServer();
            regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            for (HRegionInfo region : regions) {
                if (region.getTable().getNameAsString().equalsIgnoreCase("table")) {
                    foundRs = true;
                    break;
                }
            }
            if (foundRs)
                break;
        }
        final Path logDir = new Path(rootdir, AbstractFSWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
        LOG.info("#regions = " + regions.size());
        Iterator<HRegionInfo> it = regions.iterator();
        while (it.hasNext()) {
            HRegionInfo region = it.next();
            if (region.getTable().getNamespaceAsString().equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
                it.remove();
            }
        }
        makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
        slm.splitLogDistributed(logDir);
        int count = 0;
        for (HRegionInfo hri : regions) {
            Path tdir = FSUtils.getTableDir(rootdir, table);
            Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
            LOG.debug("checking edits dir " + editsdir);
            FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {

                @Override
                public boolean accept(Path p) {
                    if (WALSplitter.isSequenceIdFile(p)) {
                        return false;
                    }
                    return true;
                }
            });
            assertTrue("edits dir should have more than a single file in it. instead has " + files.length, files.length > 1);
            for (int i = 0; i < files.length; i++) {
                int c = countWAL(files[i].getPath(), fs, conf);
                count += c;
            }
            LOG.info(count + " edits in " + files.length + " recovered edits files.");
        }
        // check that the log file is moved
        assertFalse(fs.exists(logDir));
        assertEquals(NUM_LOG_LINES, count);
    } finally {
        if (t != null)
            t.close();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableName(org.apache.hadoop.hbase.TableName) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) FileSystem(org.apache.hadoop.fs.FileSystem) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

PathFilter (org.apache.hadoop.fs.PathFilter)43 Path (org.apache.hadoop.fs.Path)41 FileStatus (org.apache.hadoop.fs.FileStatus)37 FileSystem (org.apache.hadoop.fs.FileSystem)18 IOException (java.io.IOException)16 ArrayList (java.util.ArrayList)11 Test (org.junit.Test)8 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)5 InterruptedIOException (java.io.InterruptedIOException)4 Configuration (org.apache.hadoop.conf.Configuration)3 Admin (org.apache.hadoop.hbase.client.Admin)3 Table (org.apache.hadoop.hbase.client.Table)3 HRegion (org.apache.hadoop.hbase.regionserver.HRegion)3 ZooKeeperWatcher (org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher)3 URI (java.net.URI)2 HashMap (java.util.HashMap)2 ExecutionException (java.util.concurrent.ExecutionException)2 Exchange (org.apache.camel.Exchange)2 Message (org.apache.camel.Message)2 DefaultMessage (org.apache.camel.impl.DefaultMessage)2