use of org.apache.hadoop.hbase.master.assignment.AssignmentManager in project hbase by apache.
the class HMaster method getClusterMetricsWithoutCoprocessor.
public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> options) throws InterruptedIOException {
ClusterMetricsBuilder builder = ClusterMetricsBuilder.newBuilder();
// we return all information to client if the list of Option is empty.
if (options.isEmpty()) {
options = EnumSet.allOf(Option.class);
}
// TASKS and/or LIVE_SERVERS will populate this map, which will be given to the builder if
// not null after option processing completes.
Map<ServerName, ServerMetrics> serverMetricsMap = null;
for (Option opt : options) {
switch(opt) {
case HBASE_VERSION:
builder.setHBaseVersion(VersionInfo.getVersion());
break;
case CLUSTER_ID:
builder.setClusterId(getClusterId());
break;
case MASTER:
builder.setMasterName(getServerName());
break;
case BACKUP_MASTERS:
builder.setBackerMasterNames(getBackupMasters());
break;
case TASKS:
{
// Master tasks
builder.setMasterTasks(TaskMonitor.get().getTasks().stream().map(task -> ServerTaskBuilder.newBuilder().setDescription(task.getDescription()).setStatus(task.getStatus()).setState(ServerTask.State.valueOf(task.getState().name())).setStartTime(task.getStartTime()).setCompletionTime(task.getCompletionTimestamp()).build()).collect(Collectors.toList()));
// Add entries to serverMetricsMap for all live servers, if we haven't already done so
if (serverMetricsMap == null) {
serverMetricsMap = getOnlineServers();
}
break;
}
case LIVE_SERVERS:
{
// Add entries to serverMetricsMap for all live servers, if we haven't already done so
if (serverMetricsMap == null) {
serverMetricsMap = getOnlineServers();
}
break;
}
case DEAD_SERVERS:
{
if (serverManager != null) {
builder.setDeadServerNames(new ArrayList<>(serverManager.getDeadServers().copyServerNames()));
}
break;
}
case MASTER_COPROCESSORS:
{
if (cpHost != null) {
builder.setMasterCoprocessorNames(Arrays.asList(getMasterCoprocessors()));
}
break;
}
case REGIONS_IN_TRANSITION:
{
if (assignmentManager != null) {
builder.setRegionsInTransition(assignmentManager.getRegionStates().getRegionsStateInTransition());
}
break;
}
case BALANCER_ON:
{
if (loadBalancerTracker != null) {
builder.setBalancerOn(loadBalancerTracker.isBalancerOn());
}
break;
}
case MASTER_INFO_PORT:
{
if (infoServer != null) {
builder.setMasterInfoPort(infoServer.getPort());
}
break;
}
case SERVERS_NAME:
{
if (serverManager != null) {
builder.setServerNames(serverManager.getOnlineServersList());
}
break;
}
case TABLE_TO_REGIONS_COUNT:
{
if (isActiveMaster() && isInitialized() && assignmentManager != null) {
try {
Map<TableName, RegionStatesCount> tableRegionStatesCountMap = new HashMap<>();
Map<String, TableDescriptor> tableDescriptorMap = getTableDescriptors().getAll();
for (TableDescriptor tableDescriptor : tableDescriptorMap.values()) {
TableName tableName = tableDescriptor.getTableName();
RegionStatesCount regionStatesCount = assignmentManager.getRegionStatesCount(tableName);
tableRegionStatesCountMap.put(tableName, regionStatesCount);
}
builder.setTableRegionStatesCount(tableRegionStatesCountMap);
} catch (IOException e) {
LOG.error("Error while populating TABLE_TO_REGIONS_COUNT for Cluster Metrics..", e);
}
}
break;
}
}
}
if (serverMetricsMap != null) {
builder.setLiveServerMetrics(serverMetricsMap);
}
return builder.build();
}
use of org.apache.hadoop.hbase.master.assignment.AssignmentManager in project hbase by apache.
the class CloneSnapshotProcedure method executeFromState.
@Override
protected Flow executeFromState(final MasterProcedureEnv env, final CloneSnapshotState state) throws InterruptedException {
LOG.trace("{} execute state={}", this, state);
try {
switch(state) {
case CLONE_SNAPSHOT_PRE_OPERATION:
// Verify if we can clone the table
prepareClone(env);
preCloneSnapshot(env);
setNextState(CloneSnapshotState.CLONE_SNAPSHOT_WRITE_FS_LAYOUT);
break;
case CLONE_SNAPSHOT_WRITE_FS_LAYOUT:
updateTableDescriptorWithSFT();
newRegions = createFilesystemLayout(env, tableDescriptor, newRegions);
env.getMasterServices().getTableDescriptors().update(tableDescriptor, true);
setNextState(CloneSnapshotState.CLONE_SNAPSHOT_ADD_TO_META);
break;
case CLONE_SNAPSHOT_ADD_TO_META:
addRegionsToMeta(env);
setNextState(CloneSnapshotState.CLONE_SNAPSHOT_ASSIGN_REGIONS);
break;
case CLONE_SNAPSHOT_ASSIGN_REGIONS:
CreateTableProcedure.setEnablingState(env, getTableName());
// Separate newRegions to split regions and regions to assign
List<RegionInfo> splitRegions = new ArrayList<>();
List<RegionInfo> regionsToAssign = new ArrayList<>();
newRegions.forEach(ri -> {
if (ri.isOffline() && (ri.isSplit() || ri.isSplitParent())) {
splitRegions.add(ri);
} else {
regionsToAssign.add(ri);
}
});
// For split regions, add them to RegionStates
AssignmentManager am = env.getAssignmentManager();
splitRegions.forEach(ri -> am.getRegionStates().updateRegionState(ri, RegionState.State.SPLIT));
addChildProcedure(env.getAssignmentManager().createRoundRobinAssignProcedures(regionsToAssign));
setNextState(CloneSnapshotState.CLONE_SNAPSHOT_UPDATE_DESC_CACHE);
break;
case CLONE_SNAPSHOT_UPDATE_DESC_CACHE:
// XXX: this stage should be named as set table enabled, as now we will cache the
// descriptor after writing fs layout.
CreateTableProcedure.setEnabledState(env, getTableName());
setNextState(CloneSnapshotState.CLONE_SNAPHOST_RESTORE_ACL);
break;
case CLONE_SNAPHOST_RESTORE_ACL:
restoreSnapshotAcl(env);
setNextState(CloneSnapshotState.CLONE_SNAPSHOT_POST_OPERATION);
break;
case CLONE_SNAPSHOT_POST_OPERATION:
postCloneSnapshot(env);
MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
metricsSnapshot.addSnapshotClone(getMonitorStatus().getCompletionTimestamp() - getMonitorStatus().getStartTime());
getMonitorStatus().markComplete("Clone snapshot '" + snapshot.getName() + "' completed!");
return Flow.NO_MORE_STATE;
default:
throw new UnsupportedOperationException("unhandled state=" + state);
}
} catch (IOException e) {
if (isRollbackSupported(state)) {
setFailure("master-clone-snapshot", e);
} else {
LOG.warn("Retriable error trying to clone snapshot=" + snapshot.getName() + " to table=" + getTableName() + " state=" + state, e);
}
}
return Flow.HAS_MORE_STATE;
}
use of org.apache.hadoop.hbase.master.assignment.AssignmentManager in project hbase by apache.
the class MasterDumpServlet method dumpRIT.
private void dumpRIT(HMaster master, PrintWriter out) {
AssignmentManager am = master.getAssignmentManager();
if (am == null) {
out.println("AssignmentManager is not initialized");
return;
}
for (RegionStateNode rs : am.getRegionsInTransition()) {
String rid = rs.getRegionInfo().getEncodedName();
out.println("Region " + rid + ": " + rs.toDescriptiveString());
}
}
use of org.apache.hadoop.hbase.master.assignment.AssignmentManager in project hbase by apache.
the class ServerCrashProcedure method assignRegions.
/**
* Assign the regions on the crashed RS to other Rses.
* <p/>
* In this method we will go through all the RegionStateNodes of the give regions to find out
* whether there is already an TRSP for the region, if so we interrupt it and let it retry on
* other server, otherwise we will schedule a TRSP to bring the region online.
* <p/>
* We will also check whether the table for a region is enabled, if not, we will skip assigning
* it.
*/
private void assignRegions(MasterProcedureEnv env, List<RegionInfo> regions) throws IOException {
AssignmentManager am = env.getMasterServices().getAssignmentManager();
boolean retainAssignment = env.getMasterConfiguration().getBoolean(MASTER_SCP_RETAIN_ASSIGNMENT, DEFAULT_MASTER_SCP_RETAIN_ASSIGNMENT);
for (RegionInfo region : regions) {
RegionStateNode regionNode = am.getRegionStates().getOrCreateRegionStateNode(region);
regionNode.lock();
try {
// in the way of our clearing out 'Unknown Servers'.
if (!isMatchingRegionLocation(regionNode)) {
// double checking here to confirm that we do not skip assignment incorrectly.
if (!am.isRunning()) {
throw new DoNotRetryIOException("AssignmentManager has been stopped, can not process assignment any more");
}
LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...", this, regionNode, serverName);
continue;
}
if (regionNode.getProcedure() != null) {
LOG.info("{} found RIT {}; {}", this, regionNode.getProcedure(), regionNode);
regionNode.getProcedure().serverCrashed(env, regionNode, getServerName(), !retainAssignment);
continue;
}
if (env.getMasterServices().getTableStateManager().isTableState(regionNode.getTable(), TableState.State.DISABLING)) {
// We need to change the state here otherwise the TRSP scheduled by DTP will try to
// close the region from a dead server and will never succeed. Please see HBASE-23636
// for more details.
env.getAssignmentManager().regionClosedAbnormally(regionNode);
LOG.info("{} found table disabling for region {}, set it state to ABNORMALLY_CLOSED.", this, regionNode);
continue;
}
if (env.getMasterServices().getTableStateManager().isTableState(regionNode.getTable(), TableState.State.DISABLED)) {
// This should not happen, table disabled but has regions on server.
LOG.warn("Found table disabled for region {}, procDetails: {}", regionNode, this);
continue;
}
TransitRegionStateProcedure proc = TransitRegionStateProcedure.assign(env, region, !retainAssignment, null);
regionNode.setProcedure(proc);
addChildProcedure(proc);
} finally {
regionNode.unlock();
}
}
}
use of org.apache.hadoop.hbase.master.assignment.AssignmentManager in project hbase by apache.
the class ServerCrashProcedure method zkCoordinatedSplitLogs.
/**
* Split logs using 'classic' zk-based coordination.
* Superceded by procedure-based WAL splitting.
* @see #createSplittingWalProcedures(MasterProcedureEnv, boolean)
*/
private void zkCoordinatedSplitLogs(final MasterProcedureEnv env) throws IOException {
LOG.debug("Splitting WALs {}", this);
MasterWalManager mwm = env.getMasterServices().getMasterWalManager();
AssignmentManager am = env.getMasterServices().getAssignmentManager();
// TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running.
// PROBLEM!!! WE BLOCK HERE. Can block for hours if hundreds of WALs to split and hundreds
// of SCPs running because big cluster crashed down.
am.getRegionStates().logSplitting(this.serverName);
mwm.splitLog(this.serverName);
if (!carryingMeta) {
mwm.archiveMetaLog(this.serverName);
}
am.getRegionStates().logSplit(this.serverName);
LOG.debug("Done splitting WALs {}", this);
}
Aggregations