Search in sources :

Example 1 with ZooKeeperWatcher

use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.

the class MasterMetaBootstrap method unassignExcessMetaReplica.

private void unassignExcessMetaReplica(int numMetaReplicasConfigured) {
    final ZooKeeperWatcher zooKeeper = master.getZooKeeper();
    // with a replication of 3 and now it is 2, we need to unassign the 1 unneeded replica)
    try {
        List<String> metaReplicaZnodes = zooKeeper.getMetaReplicaNodes();
        for (String metaReplicaZnode : metaReplicaZnodes) {
            int replicaId = zooKeeper.znodePaths.getMetaReplicaIdFromZnode(metaReplicaZnode);
            if (replicaId >= numMetaReplicasConfigured) {
                RegionState r = MetaTableLocator.getMetaRegionState(zooKeeper, replicaId);
                LOG.info("Closing excess replica of meta region " + r.getRegion());
                // send a close and wait for a max of 30 seconds
                ServerManager.closeRegionSilentlyAndWait(master.getClusterConnection(), r.getServerName(), r.getRegion(), 30000);
                ZKUtil.deleteNode(zooKeeper, zooKeeper.znodePaths.getZNodeForReplica(replicaId));
            }
        }
    } catch (Exception ex) {
        // ignore the exception since we don't want the master to be wedged due to potential
        // issues in the cleanup of the extra regions. We can do that cleanup via hbck or manually
        LOG.warn("Ignoring exception " + ex);
    }
}
Also used : ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException)

Example 2 with ZooKeeperWatcher

use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.

the class MasterMetaBootstrap method getPreviouselyFailedMetaServersFromZK.

/**
   * This function returns a set of region server names under hbase:meta recovering region ZK node
   * @return Set of meta server names which were recorded in ZK
   */
private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
    final ZooKeeperWatcher zooKeeper = master.getZooKeeper();
    Set<ServerName> result = new HashSet<>();
    String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.znodePaths.recoveringRegionsZNode, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
    List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
    if (regionFailedServers == null)
        return result;
    for (String failedServer : regionFailedServers) {
        ServerName server = ServerName.parseServerName(failedServer);
        result.add(server);
    }
    return result;
}
Also used : ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) ServerName(org.apache.hadoop.hbase.ServerName) HashSet(java.util.HashSet)

Example 3 with ZooKeeperWatcher

use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.

the class TestAssignmentListener method testAddNewServerThatExistsInDraining.

@Test
public void testAddNewServerThatExistsInDraining() throws Exception {
    // Under certain circumstances, such as when we failover to the Backup
    // HMaster, the DrainingServerTracker is started with existing servers in
    // draining before all of the Region Servers register with the
    // ServerManager as "online".  This test is to ensure that Region Servers
    // are properly added to the ServerManager.drainingServers when they
    // register with the ServerManager under these circumstances.
    Configuration conf = TEST_UTIL.getConfiguration();
    ZooKeeperWatcher zooKeeper = new ZooKeeperWatcher(conf, "zkWatcher-NewServerDrainTest", abortable, true);
    String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
    String drainingZNode = ZKUtil.joinZNode(baseZNode, conf.get("zookeeper.znode.draining.rs", "draining"));
    HMaster master = Mockito.mock(HMaster.class);
    Mockito.when(master.getConfiguration()).thenReturn(conf);
    ServerName SERVERNAME_A = ServerName.valueOf("mockserverbulk_a.org", 1000, 8000);
    ServerName SERVERNAME_B = ServerName.valueOf("mockserverbulk_b.org", 1001, 8000);
    ServerName SERVERNAME_C = ServerName.valueOf("mockserverbulk_c.org", 1002, 8000);
    // We'll start with 2 servers in draining that existed before the
    // HMaster started.
    ArrayList<ServerName> drainingServers = new ArrayList<>();
    drainingServers.add(SERVERNAME_A);
    drainingServers.add(SERVERNAME_B);
    // We'll have 2 servers that come online AFTER the DrainingServerTracker
    // is started (just as we see when we failover to the Backup HMaster).
    // One of these will already be a draining server.
    HashMap<ServerName, ServerLoad> onlineServers = new HashMap<>();
    onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD);
    onlineServers.put(SERVERNAME_C, ServerLoad.EMPTY_SERVERLOAD);
    // performed when the previous HMaster was running.
    for (ServerName sn : drainingServers) {
        String znode = ZKUtil.joinZNode(drainingZNode, sn.getServerName());
        ZKUtil.createAndFailSilent(zooKeeper, znode);
    }
    // Now, we follow the same order of steps that the HMaster does to setup
    // the ServerManager, RegionServerTracker, and DrainingServerTracker.
    ServerManager serverManager = new ServerManager(master);
    RegionServerTracker regionServerTracker = new RegionServerTracker(zooKeeper, master, serverManager);
    regionServerTracker.start();
    DrainingServerTracker drainingServerTracker = new DrainingServerTracker(zooKeeper, master, serverManager);
    drainingServerTracker.start();
    // Confirm our ServerManager lists are empty.
    Assert.assertEquals(serverManager.getOnlineServers(), new HashMap<ServerName, ServerLoad>());
    Assert.assertEquals(serverManager.getDrainingServersList(), new ArrayList<ServerName>());
    // checkAndRecordNewServer() is how servers are added to the ServerManager.
    ArrayList<ServerName> onlineDrainingServers = new ArrayList<>();
    for (ServerName sn : onlineServers.keySet()) {
        // Here's the actual test.
        serverManager.checkAndRecordNewServer(sn, onlineServers.get(sn));
        if (drainingServers.contains(sn)) {
            // keeping track for later verification
            onlineDrainingServers.add(sn);
        }
    }
    // Verify the ServerManager lists are correctly updated.
    Assert.assertEquals(serverManager.getOnlineServers(), onlineServers);
    Assert.assertEquals(serverManager.getDrainingServersList(), onlineDrainingServers);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) DrainingServerTracker(org.apache.hadoop.hbase.zookeeper.DrainingServerTracker) ArrayList(java.util.ArrayList) ServerLoad(org.apache.hadoop.hbase.ServerLoad) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) ServerName(org.apache.hadoop.hbase.ServerName) RegionServerTracker(org.apache.hadoop.hbase.zookeeper.RegionServerTracker) Test(org.junit.Test)

Example 4 with ZooKeeperWatcher

use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.

the class TestDistributedLogSplitting method testLogReplayForDisablingTable.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testLogReplayForDisablingTable() throws Exception {
    LOG.info("testLogReplayForDisablingTable");
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    startCluster(NUM_RS);
    final int NUM_REGIONS_TO_CREATE = 40;
    final int NUM_LOG_LINES = 1000;
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
    Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
    try {
        // turn off load balancing to prevent regions from moving around otherwise
        // they will consume recovered.edits
        master.balanceSwitch(false);
        List<HRegionInfo> regions = null;
        HRegionServer hrs = null;
        boolean hasRegionsForBothTables = false;
        String tableName = null;
        for (int i = 0; i < NUM_RS; i++) {
            tableName = null;
            hasRegionsForBothTables = false;
            boolean isCarryingSystem = false;
            hrs = rsts.get(i).getRegionServer();
            regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            for (HRegionInfo region : regions) {
                if (region.getTable().isSystemTable()) {
                    isCarryingSystem = true;
                    break;
                }
                if (tableName != null && !tableName.equalsIgnoreCase(region.getTable().getNameAsString())) {
                    // make sure that we find a RS has online regions for both "table" and "disableTable"
                    hasRegionsForBothTables = true;
                    break;
                } else if (tableName == null) {
                    tableName = region.getTable().getNameAsString();
                }
            }
            if (isCarryingSystem) {
                continue;
            }
            if (hasRegionsForBothTables) {
                break;
            }
        }
        // make sure we found a good RS
        Assert.assertTrue(hasRegionsForBothTables);
        LOG.info("#regions = " + regions.size());
        Iterator<HRegionInfo> it = regions.iterator();
        while (it.hasNext()) {
            HRegionInfo region = it.next();
            if (region.isMetaTable()) {
                it.remove();
            }
        }
        makeWAL(hrs, regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
        makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
        LOG.info("Disabling table\n");
        TEST_UTIL.getAdmin().disableTable(TableName.valueOf(name.getMethodName()));
        TEST_UTIL.waitTableDisabled(TableName.valueOf(name.getMethodName()).getName());
        // abort RS
        LOG.info("Aborting region server: " + hrs.getServerName());
        hrs.abort("testing");
        // wait for abort completes
        TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
            }
        });
        // wait for regions come online
        TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (HBaseTestingUtility.getAllOnlineRegions(cluster).size() >= (NUM_REGIONS_TO_CREATE + 1));
            }
        });
        // wait for all regions are fully recovered
        TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(zkw.znodePaths.recoveringRegionsZNode, false);
                ServerManager serverManager = master.getServerManager();
                return (!serverManager.areDeadServersInProgress() && recoveringRegions != null && recoveringRegions.isEmpty());
            }
        });
        int count = 0;
        FileSystem fs = master.getMasterFileSystem().getFileSystem();
        Path rootdir = FSUtils.getRootDir(conf);
        Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf(name.getMethodName()));
        for (HRegionInfo hri : regions) {
            Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
            LOG.debug("checking edits dir " + editsdir);
            if (!fs.exists(editsdir))
                continue;
            FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {

                @Override
                public boolean accept(Path p) {
                    if (WALSplitter.isSequenceIdFile(p)) {
                        return false;
                    }
                    return true;
                }
            });
            if (files != null) {
                for (FileStatus file : files) {
                    int c = countWAL(file.getPath(), fs, conf);
                    count += c;
                    LOG.info(c + " edits in " + file.getPath());
                }
            }
        }
        LOG.info("Verify edits in recovered.edits files");
        assertEquals(NUM_LOG_LINES, count);
        LOG.info("Verify replayed edits");
        assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
        // clean up
        for (HRegionInfo hri : regions) {
            Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, hri.getEncodedName()));
            fs.delete(editsdir, true);
        }
        disablingHT.close();
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) Table(org.apache.hadoop.hbase.client.Table) FileStatus(org.apache.hadoop.fs.FileStatus) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Waiter(org.apache.hadoop.hbase.Waiter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 5 with ZooKeeperWatcher

use of org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher in project hbase by apache.

the class TestDistributedLogSplitting method testSameVersionUpdatesRecoveryWithCompaction.

@Ignore("DLR is broken by HBASE-12751")
@Test(timeout = 300000)
public void testSameVersionUpdatesRecoveryWithCompaction() throws Exception {
    LOG.info("testSameVersionUpdatesRecoveryWithWrites");
    conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
    conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
    conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 30 * 1024);
    conf.setInt("hbase.hstore.compactionThreshold", 3);
    startCluster(NUM_RS);
    final AtomicLong sequenceId = new AtomicLong(100);
    final int NUM_REGIONS_TO_CREATE = 40;
    final int NUM_LOG_LINES = 2000;
    // turn off load balancing to prevent regions from moving around otherwise
    // they will consume recovered.edits
    master.balanceSwitch(false);
    List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
    final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
    Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
    try {
        List<HRegionInfo> regions = null;
        HRegionServer hrs = null;
        for (int i = 0; i < NUM_RS; i++) {
            boolean isCarryingMeta = false;
            hrs = rsts.get(i).getRegionServer();
            regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
            for (HRegionInfo region : regions) {
                if (region.isMetaRegion()) {
                    isCarryingMeta = true;
                    break;
                }
            }
            if (isCarryingMeta) {
                continue;
            }
            break;
        }
        LOG.info("#regions = " + regions.size());
        Iterator<HRegionInfo> it = regions.iterator();
        while (it.hasNext()) {
            HRegionInfo region = it.next();
            if (region.isMetaTable() || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
                it.remove();
            }
        }
        if (regions.isEmpty())
            return;
        HRegionInfo curRegionInfo = regions.get(0);
        byte[] startRow = curRegionInfo.getStartKey();
        if (startRow == null || startRow.length == 0) {
            startRow = new byte[] { 0, 0, 0, 0, 1 };
        }
        byte[] row = Bytes.incrementBytes(startRow, 1);
        // use last 5 bytes because HBaseTestingUtility.createMultiRegions use 5 bytes key
        row = Arrays.copyOfRange(row, 3, 8);
        long value = 0;
        final TableName tableName = TableName.valueOf(name.getMethodName());
        byte[] family = Bytes.toBytes("family");
        byte[] qualifier = Bytes.toBytes("c1");
        long timeStamp = System.currentTimeMillis();
        HTableDescriptor htd = new HTableDescriptor(tableName);
        htd.addFamily(new HColumnDescriptor(family));
        final WAL wal = hrs.getWAL(curRegionInfo);
        for (int i = 0; i < NUM_LOG_LINES; i += 1) {
            WALEdit e = new WALEdit();
            value++;
            e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
            wal.append(curRegionInfo, new WALKey(curRegionInfo.getEncodedNameAsBytes(), tableName, System.currentTimeMillis()), e, true);
        }
        wal.sync();
        wal.shutdown();
        // wait for abort completes
        this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
        // verify we got the last value
        LOG.info("Verification Starts...");
        Get g = new Get(row);
        Result r = ht.get(g);
        long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
        assertEquals(value, theStoredVal);
        // after flush & compaction
        LOG.info("Verification after flush...");
        TEST_UTIL.getAdmin().flush(tableName);
        TEST_UTIL.getAdmin().compact(tableName);
        // wait for compaction completes
        TEST_UTIL.waitFor(30000, 200, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return (TEST_UTIL.getAdmin().getCompactionState(tableName) == CompactionState.NONE);
            }
        });
        r = ht.get(g);
        theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
        assertEquals(value, theStoredVal);
    } finally {
        if (ht != null)
            ht.close();
        if (zkw != null)
            zkw.close();
    }
}
Also used : WAL(org.apache.hadoop.hbase.wal.WAL) KeyValue(org.apache.hadoop.hbase.KeyValue) Result(org.apache.hadoop.hbase.client.Result) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) WALKey(org.apache.hadoop.hbase.wal.WALKey) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) ZooKeeperWatcher(org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher) Table(org.apache.hadoop.hbase.client.Table) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) OperationConflictException(org.apache.hadoop.hbase.exceptions.OperationConflictException) RegionInRecoveryException(org.apache.hadoop.hbase.exceptions.RegionInRecoveryException) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) RetriesExhaustedWithDetailsException(org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException) ServerNotRunningYetException(org.apache.hadoop.hbase.ipc.ServerNotRunningYetException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) TableName(org.apache.hadoop.hbase.TableName) AtomicLong(java.util.concurrent.atomic.AtomicLong) Get(org.apache.hadoop.hbase.client.Get) RegionServerThread(org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread) Waiter(org.apache.hadoop.hbase.Waiter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

ZooKeeperWatcher (org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher)60 Test (org.junit.Test)33 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)20 Table (org.apache.hadoop.hbase.client.Table)19 Configuration (org.apache.hadoop.conf.Configuration)16 HRegionServer (org.apache.hadoop.hbase.regionserver.HRegionServer)15 Ignore (org.junit.Ignore)15 IOException (java.io.IOException)13 ServerName (org.apache.hadoop.hbase.ServerName)12 RegionServerThread (org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread)12 TimeoutException (java.util.concurrent.TimeoutException)9 Waiter (org.apache.hadoop.hbase.Waiter)9 OperationConflictException (org.apache.hadoop.hbase.exceptions.OperationConflictException)9 ArrayList (java.util.ArrayList)8 RetriesExhaustedWithDetailsException (org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException)8 RegionInRecoveryException (org.apache.hadoop.hbase.exceptions.RegionInRecoveryException)8 ServerNotRunningYetException (org.apache.hadoop.hbase.ipc.ServerNotRunningYetException)8 MetaTableLocator (org.apache.hadoop.hbase.zookeeper.MetaTableLocator)7 HashSet (java.util.HashSet)6 TableName (org.apache.hadoop.hbase.TableName)6