Search in sources :

Example 51 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class RestartRandomZKNodeAction method perform.

@Override
public void perform() throws Exception {
    LOG.info("Performing action: Restart random zookeeper node");
    ServerName server = PolicyBasedChaosMonkey.selectRandomItem(ZKServerTool.readZKNodes(getConf()));
    restartZKNode(server, sleepTime);
}
Also used : ServerName(org.apache.hadoop.hbase.ServerName)

Example 52 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class RollingBatchRestartRsAction method perform.

@Override
public void perform() throws Exception {
    LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", (int) (ratio * 100)));
    List<ServerName> selectedServers = selectServers();
    Queue<ServerName> serversToBeKilled = new LinkedList<>(selectedServers);
    Queue<ServerName> deadServers = new LinkedList<>();
    // loop while there are servers to be killed or dead servers to be restarted
    while ((!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) && !context.isStopping()) {
        KillOrStart action = KillOrStart.KILL;
        if (serversToBeKilled.isEmpty()) {
            // no more servers to kill
            action = KillOrStart.START;
        } else if (deadServers.isEmpty()) {
            // no more servers to start
            action = KillOrStart.KILL;
        } else if (deadServers.size() >= maxDeadServers) {
            // we have too many dead servers. Don't kill any more
            action = KillOrStart.START;
        } else {
            // do a coin toss
            action = RandomUtils.nextBoolean() ? KillOrStart.KILL : KillOrStart.START;
        }
        ServerName server;
        switch(action) {
            case KILL:
                server = serversToBeKilled.remove();
                try {
                    killRs(server);
                } catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
                    // We've seen this in test runs where we timeout but the kill went through. HBASE-9743
                    // So, add to deadServers even if exception so the start gets called.
                    LOG.info("Problem killing but presume successful; code=" + e.getExitCode(), e);
                }
                deadServers.add(server);
                break;
            case START:
                try {
                    server = deadServers.remove();
                    startRs(server);
                } catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
                    // The start may fail but better to just keep going though we may lose server.
                    //
                    LOG.info("Problem starting, will retry; code=" + e.getExitCode(), e);
                }
                break;
        }
        sleep(RandomUtils.nextInt((int) sleepTime));
    }
}
Also used : ServerName(org.apache.hadoop.hbase.ServerName) LinkedList(java.util.LinkedList)

Example 53 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class BatchRestartRsAction method perform.

@Override
public void perform() throws Exception {
    LOG.info(String.format("Performing action: Batch restarting %d%% of region servers", (int) (ratio * 100)));
    List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(), ratio);
    Set<ServerName> killedServers = new HashSet<>();
    for (ServerName server : selectedServers) {
        // trying to stop the monkey.
        if (context.isStopping()) {
            break;
        }
        LOG.info("Killing region server:" + server);
        cluster.killRegionServer(server);
        killedServers.add(server);
    }
    for (ServerName server : killedServers) {
        cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT);
    }
    LOG.info("Killed " + killedServers.size() + " region servers. Reported num of rs:" + cluster.getClusterStatus().getServersSize());
    sleep(sleepTime);
    for (ServerName server : killedServers) {
        LOG.info("Starting region server:" + server.getHostname());
        cluster.startRegionServer(server.getHostname(), server.getPort());
    }
    for (ServerName server : killedServers) {
        cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), PolicyBasedChaosMonkey.TIMEOUT);
    }
    LOG.info("Started " + killedServers.size() + " region servers. Reported num of rs:" + cluster.getClusterStatus().getServersSize());
}
Also used : ServerName(org.apache.hadoop.hbase.ServerName) HashSet(java.util.HashSet)

Example 54 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class RegionsResource method get.

@GET
@Produces({ MIMETYPE_TEXT, MIMETYPE_XML, MIMETYPE_JSON, MIMETYPE_PROTOBUF, MIMETYPE_PROTOBUF_IETF })
public Response get(@Context final UriInfo uriInfo) {
    if (LOG.isTraceEnabled()) {
        LOG.trace("GET " + uriInfo.getAbsolutePath());
    }
    servlet.getMetrics().incrementRequests(1);
    try {
        TableName tableName = TableName.valueOf(tableResource.getName());
        TableInfoModel model = new TableInfoModel(tableName.getNameAsString());
        Connection connection = ConnectionFactory.createConnection(servlet.getConfiguration());
        @SuppressWarnings("deprecation") Map<HRegionInfo, ServerName> regions = MetaTableAccessor.allTableRegions(connection, tableName);
        connection.close();
        for (Map.Entry<HRegionInfo, ServerName> e : regions.entrySet()) {
            HRegionInfo hri = e.getKey();
            ServerName addr = e.getValue();
            model.add(new TableRegionModel(tableName.getNameAsString(), hri.getRegionId(), hri.getStartKey(), hri.getEndKey(), addr.getHostAndPort()));
        }
        ResponseBuilder response = Response.ok(model);
        response.cacheControl(cacheControl);
        servlet.getMetrics().incrementSucessfulGetRequests(1);
        return response.build();
    } catch (TableNotFoundException e) {
        servlet.getMetrics().incrementFailedGetRequests(1);
        return Response.status(Response.Status.NOT_FOUND).type(MIMETYPE_TEXT).entity("Not found" + CRLF).build();
    } catch (IOException e) {
        servlet.getMetrics().incrementFailedGetRequests(1);
        return Response.status(Response.Status.SERVICE_UNAVAILABLE).type(MIMETYPE_TEXT).entity("Unavailable" + CRLF).build();
    }
}
Also used : Connection(org.apache.hadoop.hbase.client.Connection) IOException(java.io.IOException) TableInfoModel(org.apache.hadoop.hbase.rest.model.TableInfoModel) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableName(org.apache.hadoop.hbase.TableName) TableNotFoundException(org.apache.hadoop.hbase.TableNotFoundException) ServerName(org.apache.hadoop.hbase.ServerName) TableRegionModel(org.apache.hadoop.hbase.rest.model.TableRegionModel) ResponseBuilder(javax.ws.rs.core.Response.ResponseBuilder) Map(java.util.Map) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 55 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class StorageClusterStatusResource method get.

@GET
@Produces({ MIMETYPE_TEXT, MIMETYPE_XML, MIMETYPE_JSON, MIMETYPE_PROTOBUF, MIMETYPE_PROTOBUF_IETF })
public Response get(@Context final UriInfo uriInfo) {
    if (LOG.isTraceEnabled()) {
        LOG.trace("GET " + uriInfo.getAbsolutePath());
    }
    servlet.getMetrics().incrementRequests(1);
    try {
        ClusterStatus status = servlet.getAdmin().getClusterStatus();
        StorageClusterStatusModel model = new StorageClusterStatusModel();
        model.setRegions(status.getRegionsCount());
        model.setRequests(status.getRequestsCount());
        model.setAverageLoad(status.getAverageLoad());
        for (ServerName info : status.getServers()) {
            ServerLoad load = status.getLoad(info);
            StorageClusterStatusModel.Node node = model.addLiveNode(info.getHostname() + ":" + Integer.toString(info.getPort()), info.getStartcode(), load.getUsedHeapMB(), load.getMaxHeapMB());
            node.setRequests(load.getNumberOfRequests());
            for (RegionLoad region : load.getRegionsLoad().values()) {
                node.addRegion(region.getName(), region.getStores(), region.getStorefiles(), region.getStorefileSizeMB(), region.getMemStoreSizeMB(), region.getStorefileIndexSizeMB(), region.getReadRequestsCount(), region.getWriteRequestsCount(), region.getRootIndexSizeKB(), region.getTotalStaticIndexSizeKB(), region.getTotalStaticBloomSizeKB(), region.getTotalCompactingKVs(), region.getCurrentCompactedKVs());
            }
        }
        for (ServerName name : status.getDeadServerNames()) {
            model.addDeadNode(name.toString());
        }
        ResponseBuilder response = Response.ok(model);
        response.cacheControl(cacheControl);
        servlet.getMetrics().incrementSucessfulGetRequests(1);
        return response.build();
    } catch (IOException e) {
        servlet.getMetrics().incrementFailedGetRequests(1);
        return Response.status(Response.Status.SERVICE_UNAVAILABLE).type(MIMETYPE_TEXT).entity("Unavailable" + CRLF).build();
    }
}
Also used : ServerLoad(org.apache.hadoop.hbase.ServerLoad) RegionLoad(org.apache.hadoop.hbase.RegionLoad) ServerName(org.apache.hadoop.hbase.ServerName) StorageClusterStatusModel(org.apache.hadoop.hbase.rest.model.StorageClusterStatusModel) IOException(java.io.IOException) ResponseBuilder(javax.ws.rs.core.Response.ResponseBuilder) ClusterStatus(org.apache.hadoop.hbase.ClusterStatus) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Aggregations

ServerName (org.apache.hadoop.hbase.ServerName)426 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)202 Test (org.junit.Test)163 ArrayList (java.util.ArrayList)97 TableName (org.apache.hadoop.hbase.TableName)89 IOException (java.io.IOException)87 HashMap (java.util.HashMap)81 List (java.util.List)72 Map (java.util.Map)54 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)45 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)34 Table (org.apache.hadoop.hbase.client.Table)33 HashSet (java.util.HashSet)32 TreeMap (java.util.TreeMap)31 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)29 Configuration (org.apache.hadoop.conf.Configuration)26 HRegionServer (org.apache.hadoop.hbase.regionserver.HRegionServer)26 Pair (org.apache.hadoop.hbase.util.Pair)24 KeeperException (org.apache.zookeeper.KeeperException)23 InterruptedIOException (java.io.InterruptedIOException)22