Search in sources :

Example 96 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class TestDistributedLogSplitting method testRecoveredEdits.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testRecoveredEdits() throws Exception {
    LOG.info("testRecoveredEdits");
    // create more than one wal
    conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024);
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
    startCluster(NUM_RS);
    final int NUM_LOG_LINES = 1000;
    final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    FileSystem fs = master.getMasterFileSystem().getFileSystem();
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    Path rootdir = FSUtils.getRootDir(conf);
    Table t = installTable(new ZooKeeperWatcher(conf, "table-creation", null), "table", "family", 40);
    try {
        TableName table = t.getName();
        List<HRegionInfo> regions = null;
        HRegionServer hrs = null;
        for (int i = 0; i < NUM_RS; i++) {
            boolean foundRs = false;
            hrs = rsts.get(i).getRegionServer();
            regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            for (HRegionInfo region : regions) {
                if (region.getTable().getNameAsString().equalsIgnoreCase("table")) {
                    foundRs = true;
                    break;
                }
            }
            if (foundRs)
                break;
        }
        final Path logDir = new Path(rootdir, AbstractFSWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
        LOG.info("#regions = " + regions.size());
        Iterator<HRegionInfo> it = regions.iterator();
        while (it.hasNext()) {
            HRegionInfo region = it.next();
            if (region.getTable().getNamespaceAsString().equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
                it.remove();
            }
        }
        makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
        slm.splitLogDistributed(logDir);
        int count = 0;
        for (HRegionInfo hri : regions) {
            Path tdir = FSUtils.getTableDir(rootdir, table);
            Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
            LOG.debug("checking edits dir " + editsdir);
            FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {

                @Override
                public boolean accept(Path p) {
                    if (WALSplitter.isSequenceIdFile(p)) {
                        return false;
                    }
                    return true;
                }
            });
            assertTrue("edits dir should have more than a single file in it. instead has " + files.length, files.length > 1);
            for (int i = 0; i < files.length; i++) {
                int c = countWAL(files[i].getPath(), fs, conf);
                count += c;
            }
            LOG.info(count + " edits in " + files.length + " recovered edits files.");
        }
        // check that the log file is moved
        assertFalse(fs.exists(logDir));
        assertEquals(NUM_LOG_LINES, count);
    } finally {
        if (t != null)
            t.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableName(org.apache.hadoop.hbase.TableName) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) FileSystem(org.apache.hadoop.fs.FileSystem) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 97 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class TestDistributedLogSplitting method testWorkerAbort.

/**
   * The original intention of this test was to force an abort of a region
   * server and to make sure that the failure path in the region servers is
   * properly evaluated. But it is difficult to ensure that the region server
   * doesn't finish the log splitting before it aborts. Also now, there is
   * this code path where the master will preempt the region server when master
   * detects that the region server has aborted.
   * @throws Exception
   */
@Ignore("Disabled because flakey")
@Test(timeout = 300000)
public void testWorkerAbort() throws Exception {
    LOG.info("testWorkerAbort");
    startCluster(3);
    final int NUM_LOG_LINES = 10000;
    final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
    FileSystem fs = master.getMasterFileSystem().getFileSystem();
    final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    HRegionServer hrs = findRSToKill(false, "table");
    Path rootdir = FSUtils.getRootDir(conf);
    final Path logDir = new Path(rootdir, AbstractFSWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
    Table t = installTable(new ZooKeeperWatcher(conf, "table-creation", null), "table", "family", 40);
    try {
        makeWAL(hrs, ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()), "table", "family", NUM_LOG_LINES, 100);
        new Thread() {

            @Override
            public void run() {
                waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
                for (RegionServerThread rst : rsts) {
                    rst.getRegionServer().abort("testing");
                    break;
                }
            }
        }.start();
        // slm.splitLogDistributed(logDir);
        FileStatus[] logfiles = fs.listStatus(logDir);
        TaskBatch batch = new TaskBatch();
        slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
        //waitForCounter but for one of the 2 counters
        long curt = System.currentTimeMillis();
        long waitTime = 80000;
        long endt = curt + waitTime;
        while (curt < endt) {
            if ((tot_wkr_task_resigned.get() + tot_wkr_task_err.get() + tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() + tot_wkr_preempt_task.get()) == 0) {
                Thread.yield();
                curt = System.currentTimeMillis();
            } else {
                assertTrue(1 <= (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() + tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() + tot_wkr_preempt_task.get()));
                return;
            }
        }
        fail("none of the following counters went up in " + waitTime + " milliseconds - " + "tot_wkr_task_resigned, tot_wkr_task_err, " + "tot_wkr_final_transition_failed, tot_wkr_task_done, " + "tot_wkr_preempt_task");
    } finally {
        if (t != null)
            t.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) TaskBatch(org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) MasterThread(org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) FileSystem(org.apache.hadoop.fs.FileSystem) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 98 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class TestDistributedLogSplitting method testReadWriteSeqIdFiles.

@Test(timeout = 300000)
public void testReadWriteSeqIdFiles() throws Exception {
    LOG.info("testReadWriteSeqIdFiles");
    startCluster(2);
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table ht = installTable(zkw, name.getMethodName(), "family", 10);
    try {
        FileSystem fs = master.getMasterFileSystem().getFileSystem();
        Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), TableName.valueOf(name.getMethodName()));
        List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
        long newSeqId = WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0), 1L, 1000L);
        WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0), 1L, 1000L);
        assertEquals(newSeqId + 2000, WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0), 3L, 1000L));
        Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(regionDirs.get(0));
        FileStatus[] files = FSUtils.listStatus(fs, editsdir, new PathFilter() {

            @Override
            public boolean accept(Path p) {
                return WALSplitter.isSequenceIdFile(p);
            }
        });
        // only one seqid file should exist
        assertEquals(1, files.length);
        // verify all seqId files aren't treated as recovered.edits files
        NavigableSet<Path> recoveredEdits = WALSplitter.getSplitEditFilesSorted(fs, regionDirs.get(0));
        assertEquals(0, recoveredEdits.size());
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.junit.Test)

Example 99 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class TestDistributedLogSplitting method testDelayedDeleteOnFailure.

@Test(timeout = 30000)
public void testDelayedDeleteOnFailure() throws Exception {
    LOG.info("testDelayedDeleteOnFailure");
    startCluster(1);
    final SplitLogManager slm = master.getMasterWalManager().getSplitLogManager();
    final FileSystem fs = master.getMasterFileSystem().getFileSystem();
    final Path logDir = new Path(new Path(FSUtils.getRootDir(conf), HConstants.HREGION_LOGDIR_NAME), ServerName.valueOf("x", 1, 1).toString());
    fs.mkdirs(logDir);
    ExecutorService executor = null;
    try {
        final Path corruptedLogFile = new Path(logDir, "x");
        FSDataOutputStream out;
        out = fs.create(corruptedLogFile);
        out.write(0);
        out.write(Bytes.toBytes("corrupted bytes"));
        out.close();
        ZKSplitLogManagerCoordination coordination = (ZKSplitLogManagerCoordination) ((BaseCoordinatedStateManager) master.getCoordinatedStateManager()).getSplitLogManagerCoordination();
        coordination.setIgnoreDeleteForTesting(true);
        executor = Executors.newSingleThreadExecutor();
        Runnable runnable = new Runnable() {

            @Override
            public void run() {
                try {
                    // since the logDir is a fake, corrupted one, so the split log worker
                    // will finish it quickly with error, and this call will fail and throw
                    // an IOException.
                    slm.splitLogDistributed(logDir);
                } catch (IOException ioe) {
                    try {
                        assertTrue(fs.exists(corruptedLogFile));
                        // this call will block waiting for the task to be removed from the
                        // tasks map which is not going to happen since ignoreZKDeleteForTesting
                        // is set to true, until it is interrupted.
                        slm.splitLogDistributed(logDir);
                    } catch (IOException e) {
                        assertTrue(Thread.currentThread().isInterrupted());
                        return;
                    }
                    fail("did not get the expected IOException from the 2nd call");
                }
                fail("did not get the expected IOException from the 1st call");
            }
        };
        Future<?> result = executor.submit(runnable);
        try {
            result.get(2000, TimeUnit.MILLISECONDS);
        } catch (TimeoutException te) {
        // it is ok, expected.
        }
        waitForCounter(tot_mgr_wait_for_zk_delete, 0, 1, 10000);
        executor.shutdownNow();
        executor = null;
        // make sure the runnable is finished with no exception thrown.
        result.get();
    } finally {
        if (executor != null) {
            // interrupt the thread in case the test fails in the middle.
            // it has no effect if the thread is already terminated.
            executor.shutdownNow();
        }
        fs.delete(logDir, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) ZKSplitLogManagerCoordination(org.apache.hadoop.hbase.coordination.ZKSplitLogManagerCoordination) ExecutorService(java.util.concurrent.ExecutorService) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 100 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class TestAssignmentManagerOnCluster method testOpenFailedUnrecoverable.

/**
   * This tests region open failure which is not recoverable
   */
@Test(timeout = 60000)
public void testOpenFailedUnrecoverable() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    try {
        HTableDescriptor desc = new HTableDescriptor(tableName);
        desc.addFamily(new HColumnDescriptor(FAMILY));
        admin.createTable(desc);
        Table meta = TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME);
        HRegionInfo hri = new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
        MetaTableAccessor.addRegionToMeta(meta, hri);
        FileSystem fs = FileSystem.get(conf);
        Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), tableName);
        Path regionDir = new Path(tableDir, hri.getEncodedName());
        // create a file named the same as the region dir to
        // mess up with region opening
        fs.create(regionDir, true);
        HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
        AssignmentManager am = master.getAssignmentManager();
        assertFalse(TEST_UTIL.assignRegion(hri));
        RegionState state = am.getRegionStates().getRegionState(hri);
        assertEquals(RegionState.State.FAILED_OPEN, state.getState());
        // Failed to open due to file system issue. Region state should
        // carry the opening region server so that we can force close it
        // later on before opening it again. See HBASE-9092.
        assertNotNull(state.getServerName());
        // remove the blocking file, so that region can be opened
        fs.delete(regionDir, true);
        assertTrue(TEST_UTIL.assignRegion(hri));
        ServerName serverName = master.getAssignmentManager().getRegionStates().getRegionServerOfRegion(hri);
        TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
    } finally {
        TEST_UTIL.deleteTable(tableName);
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) Path(org.apache.hadoop.fs.Path) TableName(org.apache.hadoop.hbase.TableName) Table(org.apache.hadoop.hbase.client.Table) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) ServerName(org.apache.hadoop.hbase.ServerName) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Test(org.junit.Test)

Aggregations

Path (org.apache.hadoop.fs.Path)11752 Test (org.junit.Test)4193 FileSystem (org.apache.hadoop.fs.FileSystem)3587 IOException (java.io.IOException)2631 Configuration (org.apache.hadoop.conf.Configuration)2621 FileStatus (org.apache.hadoop.fs.FileStatus)1568 ArrayList (java.util.ArrayList)1145 File (java.io.File)987 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)924 HashMap (java.util.HashMap)570 Job (org.apache.hadoop.mapreduce.Job)492 JobConf (org.apache.hadoop.mapred.JobConf)477 URI (java.net.URI)465 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)462 FileNotFoundException (java.io.FileNotFoundException)441 FsPermission (org.apache.hadoop.fs.permission.FsPermission)375 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)362 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)355 Map (java.util.Map)326 List (java.util.List)316