Search in sources :

Example 36 with ClusterMetrics

use of org.apache.hadoop.hbase.ClusterMetrics in project hbase by apache.

the class TestReplicationStatus method testReplicationStatus.

/**
 * Test for HBASE-9531.
 * <p/>
 * put a few rows into htable1, which should be replicated to htable2 <br/>
 * create a ClusterStatus instance 'status' from HBaseAdmin <br/>
 * test : status.getLoad(server).getReplicationLoadSourceList() <br/>
 * test : status.getLoad(server).getReplicationLoadSink()
 */
@Test
public void testReplicationStatus() throws Exception {
    // This test wants two RS's up. We only run one generally so add one.
    UTIL1.getMiniHBaseCluster().startRegionServer();
    Waiter.waitFor(UTIL1.getConfiguration(), 30000, new Waiter.Predicate<Exception>() {

        @Override
        public boolean evaluate() throws Exception {
            return UTIL1.getMiniHBaseCluster().getLiveRegionServerThreads().size() > 1;
        }
    });
    Admin hbaseAdmin = UTIL1.getAdmin();
    // disable peer <= WHY? I DON'T GET THIS DISABLE BUT TEST FAILS W/O IT.
    hbaseAdmin.disableReplicationPeer(PEER_ID2);
    insertRowsOnSource();
    LOG.info("AFTER PUTS");
    // TODO: Change this wait to a barrier. I tried waiting on replication stats to
    // change but sleeping in main thread seems to mess up background replication.
    // HACK! To address flakeyness.
    Threads.sleep(10000);
    ClusterMetrics metrics = hbaseAdmin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS));
    for (JVMClusterUtil.RegionServerThread thread : UTIL1.getHBaseCluster().getRegionServerThreads()) {
        ServerName server = thread.getRegionServer().getServerName();
        assertTrue("" + server, metrics.getLiveServerMetrics().containsKey(server));
        ServerMetrics sm = metrics.getLiveServerMetrics().get(server);
        List<ReplicationLoadSource> rLoadSourceList = sm.getReplicationLoadSourceList();
        ReplicationLoadSink rLoadSink = sm.getReplicationLoadSink();
        // check SourceList only has one entry, because only has one peer
        assertEquals("Failed to get ReplicationLoadSourceList " + rLoadSourceList + ", " + server, 1, rLoadSourceList.size());
        assertEquals(PEER_ID2, rLoadSourceList.get(0).getPeerID());
        // check Sink exist only as it is difficult to verify the value on the fly
        assertTrue("failed to get ReplicationLoadSink.AgeOfLastShippedOp ", (rLoadSink.getAgeOfLastAppliedOp() >= 0));
        assertTrue("failed to get ReplicationLoadSink.TimeStampsOfLastAppliedOp ", (rLoadSink.getTimestampsOfLastAppliedOp() >= 0));
    }
    // Stop rs1, then the queue of rs1 will be transfered to rs0
    HRegionServer hrs = UTIL1.getHBaseCluster().getRegionServer(1);
    hrs.stop("Stop RegionServer");
    while (hrs.isAlive()) {
        Threads.sleep(100);
    }
    // To be sure it dead and references cleaned up. TODO: Change this to a barrier.
    // I tried waiting on replication stats to change but sleeping in main thread
    // seems to mess up background replication.
    Threads.sleep(10000);
    ServerName server = UTIL1.getHBaseCluster().getRegionServer(0).getServerName();
    List<ReplicationLoadSource> rLoadSourceList = waitOnMetricsReport(1, server);
    // The remaining server should now have two queues -- the original and then the one that was
    // added because of failover. The original should still be PEER_ID2 though.
    assertEquals("Failed ReplicationLoadSourceList " + rLoadSourceList, 2, rLoadSourceList.size());
    assertEquals(PEER_ID2, rLoadSourceList.get(0).getPeerID());
}
Also used : Admin(org.apache.hadoop.hbase.client.Admin) IOException(java.io.IOException) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) ClusterMetrics(org.apache.hadoop.hbase.ClusterMetrics) JVMClusterUtil(org.apache.hadoop.hbase.util.JVMClusterUtil) ServerName(org.apache.hadoop.hbase.ServerName) ServerMetrics(org.apache.hadoop.hbase.ServerMetrics) Waiter(org.apache.hadoop.hbase.Waiter) Test(org.junit.Test)

Example 37 with ClusterMetrics

use of org.apache.hadoop.hbase.ClusterMetrics in project hbase by apache.

the class TestReplicationStatusSink method getLatestSinkMetric.

private ReplicationLoadSink getLatestSinkMetric(Admin admin, ServerName server) throws IOException {
    ClusterMetrics metrics = admin.getClusterMetrics(EnumSet.of(ClusterMetrics.Option.LIVE_SERVERS));
    ServerMetrics sm = metrics.getLiveServerMetrics().get(server);
    return sm.getReplicationLoadSink();
}
Also used : ClusterMetrics(org.apache.hadoop.hbase.ClusterMetrics) ServerMetrics(org.apache.hadoop.hbase.ServerMetrics)

Example 38 with ClusterMetrics

use of org.apache.hadoop.hbase.ClusterMetrics in project hbase by apache.

the class TestRegionsRecoveryChore method testRegionReopensWithStoreRefConfig.

@Test
public void testRegionReopensWithStoreRefConfig() throws Exception {
    regionNo = 0;
    ClusterMetrics clusterMetrics = TestRegionsRecoveryChore.getClusterMetrics(4);
    final Map<ServerName, ServerMetrics> serverMetricsMap = clusterMetrics.getLiveServerMetrics();
    LOG.debug("All Region Names with refCount....");
    for (ServerMetrics serverMetrics : serverMetricsMap.values()) {
        Map<byte[], RegionMetrics> regionMetricsMap = serverMetrics.getRegionMetrics();
        for (RegionMetrics regionMetrics : regionMetricsMap.values()) {
            LOG.debug("name: " + new String(regionMetrics.getRegionName()) + " refCount: " + regionMetrics.getStoreRefCount());
        }
    }
    Mockito.when(hMaster.getClusterMetrics()).thenReturn(clusterMetrics);
    Mockito.when(hMaster.getAssignmentManager()).thenReturn(assignmentManager);
    for (byte[] regionName : REGION_NAME_LIST) {
        Mockito.when(assignmentManager.getRegionInfo(regionName)).thenReturn(TestRegionsRecoveryChore.getRegionInfo(regionName));
    }
    Stoppable stoppable = new StoppableImplementation();
    Configuration configuration = getCustomConf();
    configuration.setInt("hbase.regions.recovery.store.file.ref.count", 300);
    regionsRecoveryChore = new RegionsRecoveryChore(stoppable, configuration, hMaster);
    regionsRecoveryChore.chore();
    // Verify that we need to reopen regions of 2 tables
    Mockito.verify(hMaster, Mockito.times(2)).reopenRegions(Mockito.any(), Mockito.anyList(), Mockito.anyLong(), Mockito.anyLong());
    Mockito.verify(hMaster, Mockito.times(1)).getClusterMetrics();
    // Verify that we need to reopen total 3 regions that have refCount > 300
    Mockito.verify(hMaster, Mockito.times(3)).getAssignmentManager();
    Mockito.verify(assignmentManager, Mockito.times(3)).getRegionInfo(Mockito.any());
}
Also used : ClusterMetrics(org.apache.hadoop.hbase.ClusterMetrics) Configuration(org.apache.hadoop.conf.Configuration) ServerName(org.apache.hadoop.hbase.ServerName) ServerMetrics(org.apache.hadoop.hbase.ServerMetrics) Stoppable(org.apache.hadoop.hbase.Stoppable) RegionMetrics(org.apache.hadoop.hbase.RegionMetrics) Test(org.junit.Test)

Example 39 with ClusterMetrics

use of org.apache.hadoop.hbase.ClusterMetrics in project hbase by apache.

the class TestFavoredStochasticBalancerPickers method testPickers.

@Test
public void testPickers() throws Exception {
    TableName tableName = TableName.valueOf(name.getMethodName());
    ColumnFamilyDescriptor columnFamilyDescriptor = ColumnFamilyDescriptorBuilder.newBuilder(HConstants.CATALOG_FAMILY).build();
    TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(columnFamilyDescriptor).build();
    admin.createTable(desc, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGIONS);
    TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
    TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY);
    admin.flush(tableName);
    HMaster master = cluster.getMaster();
    FavoredNodesManager fnm = master.getFavoredNodesManager();
    ServerName masterServerName = master.getServerName();
    List<ServerName> excludedServers = Lists.newArrayList(masterServerName);
    final ServerName mostLoadedServer = getRSWithMaxRegions(tableName, excludedServers);
    assertNotNull(mostLoadedServer);
    int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size();
    excludedServers.add(mostLoadedServer);
    // Lets find another server with more regions to calculate number of regions to move
    ServerName source = getRSWithMaxRegions(tableName, excludedServers);
    assertNotNull(source);
    int regionsToMove = getTableRegionsFromServer(tableName, source).size() / 2;
    // Since move only works if the target is part of favored nodes of the region, lets get all
    // regions that are movable to mostLoadedServer
    List<RegionInfo> hris = getRegionsThatCanBeMoved(tableName, mostLoadedServer);
    RegionStates rst = master.getAssignmentManager().getRegionStates();
    for (int i = 0; i < regionsToMove; i++) {
        final RegionInfo regionInfo = hris.get(i);
        admin.move(regionInfo.getEncodedNameAsBytes(), mostLoadedServer);
        LOG.info("Moving region: " + hris.get(i).getRegionNameAsString() + " to " + mostLoadedServer);
        TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {

            @Override
            public boolean evaluate() throws Exception {
                return ServerName.isSameAddress(rst.getRegionServerOfRegion(regionInfo), mostLoadedServer);
            }
        });
    }
    final int finalRegions = numRegions + regionsToMove;
    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
    TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {

        @Override
        public boolean evaluate() throws Exception {
            int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size();
            return (numRegions == finalRegions);
        }
    });
    TEST_UTIL.getHBaseCluster().startRegionServerAndWait(60000);
    Map<ServerName, List<RegionInfo>> serverAssignments = Maps.newHashMap();
    ClusterMetrics status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS));
    for (ServerName sn : status.getLiveServerMetrics().keySet()) {
        if (!ServerName.isSameAddress(sn, masterServerName)) {
            serverAssignments.put(sn, getTableRegionsFromServer(tableName, sn));
        }
    }
    RegionHDFSBlockLocationFinder regionFinder = new RegionHDFSBlockLocationFinder();
    regionFinder.setClusterMetrics(admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)));
    regionFinder.setConf(conf);
    regionFinder.setClusterInfoProvider(new MasterClusterInfoProvider(TEST_UTIL.getMiniHBaseCluster().getMaster()));
    BalancerClusterState cluster = new BalancerClusterState(serverAssignments, null, regionFinder, new RackManager(conf));
    LoadOnlyFavoredStochasticBalancer balancer = (LoadOnlyFavoredStochasticBalancer) TEST_UTIL.getMiniHBaseCluster().getMaster().getLoadBalancer().getInternalBalancer();
    cluster.sortServersByRegionCount();
    Integer[] servers = cluster.serverIndicesSortedByRegionCount;
    LOG.info("Servers sorted by region count:" + Arrays.toString(servers));
    LOG.info("Cluster dump: " + cluster);
    if (!mostLoadedServer.equals(cluster.servers[servers[servers.length - 1]])) {
        LOG.error("Most loaded server: " + mostLoadedServer + " does not match: " + cluster.servers[servers[servers.length - 1]]);
    }
    assertEquals(mostLoadedServer, cluster.servers[servers[servers.length - 1]]);
    FavoredStochasticBalancer.FavoredNodeLoadPicker loadPicker = balancer.new FavoredNodeLoadPicker();
    boolean userRegionPicked = false;
    for (int i = 0; i < 100; i++) {
        if (userRegionPicked) {
            break;
        } else {
            BalanceAction action = loadPicker.generate(cluster);
            if (action.getType() == BalanceAction.Type.MOVE_REGION) {
                MoveRegionAction moveRegionAction = (MoveRegionAction) action;
                RegionInfo region = cluster.regions[moveRegionAction.getRegion()];
                assertNotEquals(-1, moveRegionAction.getToServer());
                ServerName destinationServer = cluster.servers[moveRegionAction.getToServer()];
                assertEquals(cluster.servers[moveRegionAction.getFromServer()], mostLoadedServer);
                if (!region.getTable().isSystemTable()) {
                    List<ServerName> favNodes = fnm.getFavoredNodes(region);
                    assertTrue(favNodes.contains(ServerName.valueOf(destinationServer.getAddress(), -1)));
                    userRegionPicked = true;
                }
            }
        }
    }
    assertTrue("load picker did not pick expected regions in 100 iterations.", userRegionPicked);
}
Also used : RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) List(java.util.List) RackManager(org.apache.hadoop.hbase.master.RackManager) FavoredNodesManager(org.apache.hadoop.hbase.favored.FavoredNodesManager) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor) IOException(java.io.IOException) TableName(org.apache.hadoop.hbase.TableName) RegionStates(org.apache.hadoop.hbase.master.assignment.RegionStates) ClusterMetrics(org.apache.hadoop.hbase.ClusterMetrics) ServerName(org.apache.hadoop.hbase.ServerName) HMaster(org.apache.hadoop.hbase.master.HMaster) Waiter(org.apache.hadoop.hbase.Waiter) Test(org.junit.Test)

Aggregations

ClusterMetrics (org.apache.hadoop.hbase.ClusterMetrics)39 ServerName (org.apache.hadoop.hbase.ServerName)30 Test (org.junit.Test)19 ServerMetrics (org.apache.hadoop.hbase.ServerMetrics)18 ArrayList (java.util.ArrayList)13 List (java.util.List)12 HashMap (java.util.HashMap)9 RegionMetrics (org.apache.hadoop.hbase.RegionMetrics)8 Admin (org.apache.hadoop.hbase.client.Admin)8 IOException (java.io.IOException)7 Map (java.util.Map)7 TableName (org.apache.hadoop.hbase.TableName)7 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)7 HashSet (java.util.HashSet)6 Configuration (org.apache.hadoop.conf.Configuration)6 TreeMap (java.util.TreeMap)5 Collections (java.util.Collections)4 LinkedList (java.util.LinkedList)4 Collectors (java.util.stream.Collectors)4 Put (org.apache.hadoop.hbase.client.Put)4