use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class RestartRandomZKNodeAction method perform.
@Override
public void perform() throws Exception {
LOG.info("Performing action: Restart random zookeeper node");
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(ZKServerTool.readZKNodes(getConf()));
restartZKNode(server, sleepTime);
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class RollingBatchRestartRsAction method perform.
@Override
public void perform() throws Exception {
LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", (int) (ratio * 100)));
List<ServerName> selectedServers = selectServers();
Queue<ServerName> serversToBeKilled = new LinkedList<>(selectedServers);
Queue<ServerName> deadServers = new LinkedList<>();
// loop while there are servers to be killed or dead servers to be restarted
while ((!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) && !context.isStopping()) {
KillOrStart action = KillOrStart.KILL;
if (serversToBeKilled.isEmpty()) {
// no more servers to kill
action = KillOrStart.START;
} else if (deadServers.isEmpty()) {
// no more servers to start
action = KillOrStart.KILL;
} else if (deadServers.size() >= maxDeadServers) {
// we have too many dead servers. Don't kill any more
action = KillOrStart.START;
} else {
// do a coin toss
action = RandomUtils.nextBoolean() ? KillOrStart.KILL : KillOrStart.START;
}
ServerName server;
switch(action) {
case KILL:
server = serversToBeKilled.remove();
try {
killRs(server);
} catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
// We've seen this in test runs where we timeout but the kill went through. HBASE-9743
// So, add to deadServers even if exception so the start gets called.
LOG.info("Problem killing but presume successful; code=" + e.getExitCode(), e);
}
deadServers.add(server);
break;
case START:
try {
server = deadServers.remove();
startRs(server);
} catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
// The start may fail but better to just keep going though we may lose server.
//
LOG.info("Problem starting, will retry; code=" + e.getExitCode(), e);
}
break;
}
sleep(RandomUtils.nextInt((int) sleepTime));
}
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class BatchRestartRsAction method perform.
@Override
public void perform() throws Exception {
LOG.info(String.format("Performing action: Batch restarting %d%% of region servers", (int) (ratio * 100)));
List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(), ratio);
Set<ServerName> killedServers = new HashSet<>();
for (ServerName server : selectedServers) {
// trying to stop the monkey.
if (context.isStopping()) {
break;
}
LOG.info("Killing region server:" + server);
cluster.killRegionServer(server);
killedServers.add(server);
}
for (ServerName server : killedServers) {
cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT);
}
LOG.info("Killed " + killedServers.size() + " region servers. Reported num of rs:" + cluster.getClusterStatus().getServersSize());
sleep(sleepTime);
for (ServerName server : killedServers) {
LOG.info("Starting region server:" + server.getHostname());
cluster.startRegionServer(server.getHostname(), server.getPort());
}
for (ServerName server : killedServers) {
cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), PolicyBasedChaosMonkey.TIMEOUT);
}
LOG.info("Started " + killedServers.size() + " region servers. Reported num of rs:" + cluster.getClusterStatus().getServersSize());
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class RegionsResource method get.
@GET
@Produces({ MIMETYPE_TEXT, MIMETYPE_XML, MIMETYPE_JSON, MIMETYPE_PROTOBUF, MIMETYPE_PROTOBUF_IETF })
public Response get(@Context final UriInfo uriInfo) {
if (LOG.isTraceEnabled()) {
LOG.trace("GET " + uriInfo.getAbsolutePath());
}
servlet.getMetrics().incrementRequests(1);
try {
TableName tableName = TableName.valueOf(tableResource.getName());
TableInfoModel model = new TableInfoModel(tableName.getNameAsString());
Connection connection = ConnectionFactory.createConnection(servlet.getConfiguration());
@SuppressWarnings("deprecation") Map<HRegionInfo, ServerName> regions = MetaTableAccessor.allTableRegions(connection, tableName);
connection.close();
for (Map.Entry<HRegionInfo, ServerName> e : regions.entrySet()) {
HRegionInfo hri = e.getKey();
ServerName addr = e.getValue();
model.add(new TableRegionModel(tableName.getNameAsString(), hri.getRegionId(), hri.getStartKey(), hri.getEndKey(), addr.getHostAndPort()));
}
ResponseBuilder response = Response.ok(model);
response.cacheControl(cacheControl);
servlet.getMetrics().incrementSucessfulGetRequests(1);
return response.build();
} catch (TableNotFoundException e) {
servlet.getMetrics().incrementFailedGetRequests(1);
return Response.status(Response.Status.NOT_FOUND).type(MIMETYPE_TEXT).entity("Not found" + CRLF).build();
} catch (IOException e) {
servlet.getMetrics().incrementFailedGetRequests(1);
return Response.status(Response.Status.SERVICE_UNAVAILABLE).type(MIMETYPE_TEXT).entity("Unavailable" + CRLF).build();
}
}
use of org.apache.hadoop.hbase.ServerName in project hbase by apache.
the class StorageClusterStatusResource method get.
@GET
@Produces({ MIMETYPE_TEXT, MIMETYPE_XML, MIMETYPE_JSON, MIMETYPE_PROTOBUF, MIMETYPE_PROTOBUF_IETF })
public Response get(@Context final UriInfo uriInfo) {
if (LOG.isTraceEnabled()) {
LOG.trace("GET " + uriInfo.getAbsolutePath());
}
servlet.getMetrics().incrementRequests(1);
try {
ClusterStatus status = servlet.getAdmin().getClusterStatus();
StorageClusterStatusModel model = new StorageClusterStatusModel();
model.setRegions(status.getRegionsCount());
model.setRequests(status.getRequestsCount());
model.setAverageLoad(status.getAverageLoad());
for (ServerName info : status.getServers()) {
ServerLoad load = status.getLoad(info);
StorageClusterStatusModel.Node node = model.addLiveNode(info.getHostname() + ":" + Integer.toString(info.getPort()), info.getStartcode(), load.getUsedHeapMB(), load.getMaxHeapMB());
node.setRequests(load.getNumberOfRequests());
for (RegionLoad region : load.getRegionsLoad().values()) {
node.addRegion(region.getName(), region.getStores(), region.getStorefiles(), region.getStorefileSizeMB(), region.getMemStoreSizeMB(), region.getStorefileIndexSizeMB(), region.getReadRequestsCount(), region.getWriteRequestsCount(), region.getRootIndexSizeKB(), region.getTotalStaticIndexSizeKB(), region.getTotalStaticBloomSizeKB(), region.getTotalCompactingKVs(), region.getCurrentCompactedKVs());
}
}
for (ServerName name : status.getDeadServerNames()) {
model.addDeadNode(name.toString());
}
ResponseBuilder response = Response.ok(model);
response.cacheControl(cacheControl);
servlet.getMetrics().incrementSucessfulGetRequests(1);
return response.build();
} catch (IOException e) {
servlet.getMetrics().incrementFailedGetRequests(1);
return Response.status(Response.Status.SERVICE_UNAVAILABLE).type(MIMETYPE_TEXT).entity("Unavailable" + CRLF).build();
}
}
Aggregations