Search in sources :

Example 61 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class FavoredNodesManager method updateFavoredNodes.

public synchronized void updateFavoredNodes(Map<HRegionInfo, List<ServerName>> regionFNMap) throws IOException {
    Map<HRegionInfo, List<ServerName>> regionToFavoredNodes = new HashMap<>();
    for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionFNMap.entrySet()) {
        HRegionInfo regionInfo = entry.getKey();
        List<ServerName> servers = entry.getValue();
        /*
       * None of the following error conditions should happen. If it does, there is an issue with
       * favored nodes generation or the regions its called on.
       */
        if (servers.size() != Sets.newHashSet(servers).size()) {
            throw new IOException("Duplicates found: " + servers);
        }
        if (!isFavoredNodeApplicable(regionInfo)) {
            throw new IOException("Can't update FN for a un-applicable region: " + regionInfo.getRegionNameAsString() + " with " + servers);
        }
        if (servers.size() != FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) {
            throw new IOException("At least " + FavoredNodeAssignmentHelper.FAVORED_NODES_NUM + " favored nodes should be present for region : " + regionInfo.getEncodedName() + " current FN servers:" + servers);
        }
        List<ServerName> serversWithNoStartCodes = Lists.newArrayList();
        for (ServerName sn : servers) {
            if (sn.getStartcode() == ServerName.NON_STARTCODE) {
                serversWithNoStartCodes.add(sn);
            } else {
                serversWithNoStartCodes.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), ServerName.NON_STARTCODE));
            }
        }
        regionToFavoredNodes.put(regionInfo, serversWithNoStartCodes);
    }
    // Lets do a bulk update to meta since that reduces the RPC's
    FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo(regionToFavoredNodes, masterServices.getConnection());
    deleteFavoredNodesForRegions(regionToFavoredNodes.keySet());
    for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionToFavoredNodes.entrySet()) {
        HRegionInfo regionInfo = entry.getKey();
        List<ServerName> serversWithNoStartCodes = entry.getValue();
        globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(regionInfo, serversWithNoStartCodes);
        addToReplicaLoad(regionInfo, serversWithNoStartCodes);
    }
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HashMap(java.util.HashMap) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList) List(java.util.List) IOException(java.io.IOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) HashMap(java.util.HashMap) Map(java.util.Map)

Example 62 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class TakeSnapshotHandler method process.

/**
   * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
   * call should get implemented for each snapshot flavor.
   */
@Override
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "REC_CATCH_EXCEPTION", justification = "Intentional")
public void process() {
    String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " " + eventType + " on table " + snapshotTable;
    LOG.info(msg);
    ReentrantLock lock = snapshotManager.getLocks().acquireLock(snapshot.getName());
    status.setStatus(msg);
    try {
        // If regions move after this meta scan, the region specific snapshot should fail, triggering
        // an external exception that gets captured here.
        // write down the snapshot info in the working directory
        SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs);
        snapshotManifest.addTableDescriptor(this.htd);
        monitor.rethrowException();
        List<Pair<HRegionInfo, ServerName>> regionsAndLocations;
        if (TableName.META_TABLE_NAME.equals(snapshotTable)) {
            regionsAndLocations = new MetaTableLocator().getMetaRegionsAndLocations(server.getZooKeeper());
        } else {
            regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations(server.getConnection(), snapshotTable, false);
        }
        // run the snapshot
        snapshotRegions(regionsAndLocations);
        monitor.rethrowException();
        // extract each pair to separate lists
        Set<String> serverNames = new HashSet<>();
        for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
            if (p != null && p.getFirst() != null && p.getSecond() != null) {
                HRegionInfo hri = p.getFirst();
                if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent()))
                    continue;
                serverNames.add(p.getSecond().toString());
            }
        }
        // flush the in-memory state, and write the single manifest
        status.setStatus("Consolidate snapshot: " + snapshot.getName());
        snapshotManifest.consolidate();
        // verify the snapshot is valid
        status.setStatus("Verifying snapshot: " + snapshot.getName());
        verifier.verifySnapshot(this.workingDir, serverNames);
        // complete the snapshot, atomically moving from tmp to .snapshot dir.
        completeSnapshot(this.snapshotDir, this.workingDir, this.fs);
        msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
        status.markComplete(msg);
        LOG.info(msg);
        metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
    } catch (Exception e) {
        // FindBugs: REC_CATCH_EXCEPTION
        status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " + snapshotTable + " because " + e.getMessage());
        String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " due to exception:" + e.getMessage();
        LOG.error(reason, e);
        ForeignException ee = new ForeignException(reason, e);
        monitor.receive(ee);
        // need to mark this completed to close off and allow cleanup to happen.
        cancel(reason);
    } finally {
        LOG.debug("Launching cleanup of working dir:" + workingDir);
        try {
            // it.
            if (fs.exists(workingDir) && !this.fs.delete(workingDir, true)) {
                LOG.error("Couldn't delete snapshot working directory:" + workingDir);
            }
        } catch (IOException e) {
            LOG.error("Couldn't delete snapshot working directory:" + workingDir);
        }
        lock.unlock();
        tableLock.release();
    }
}
Also used : ReentrantLock(java.util.concurrent.locks.ReentrantLock) IOException(java.io.IOException) SnapshotCreationException(org.apache.hadoop.hbase.snapshot.SnapshotCreationException) ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException) KeeperException(org.apache.zookeeper.KeeperException) CancellationException(java.util.concurrent.CancellationException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) MetaTableLocator(org.apache.hadoop.hbase.zookeeper.MetaTableLocator) ServerName(org.apache.hadoop.hbase.ServerName) ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException) Pair(org.apache.hadoop.hbase.util.Pair) HashSet(java.util.HashSet)

Example 63 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class DisabledTableSnapshotHandler method snapshotRegions.

// TODO consider parallelizing these operations since they are independent. Right now its just
// easier to keep them serial though
@Override
public void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regionsAndLocations) throws IOException, KeeperException {
    try {
        // 1. get all the regions hosting this table.
        // extract each pair to separate lists
        Set<HRegionInfo> regions = new HashSet<>();
        for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
            // Don't include non-default regions
            HRegionInfo hri = p.getFirst();
            if (RegionReplicaUtil.isDefaultReplica(hri)) {
                regions.add(hri);
            }
        }
        // handle the mob files if any.
        boolean mobEnabled = MobUtils.hasMobColumns(htd);
        if (mobEnabled) {
            // snapshot the mob files as a offline region.
            HRegionInfo mobRegionInfo = MobUtils.getMobRegionInfo(htd.getTableName());
            regions.add(mobRegionInfo);
        }
        // 2. for each region, write all the info to disk
        String msg = "Starting to write region info and WALs for regions for offline snapshot:" + ClientSnapshotDescriptionUtils.toString(snapshot);
        LOG.info(msg);
        status.setStatus(msg);
        ThreadPoolExecutor exec = SnapshotManifest.createExecutor(conf, "DisabledTableSnapshot");
        try {
            ModifyRegionUtils.editRegions(exec, regions, new ModifyRegionUtils.RegionEditTask() {

                @Override
                public void editRegion(final HRegionInfo regionInfo) throws IOException {
                    snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo);
                }
            });
        } finally {
            exec.shutdown();
        }
    } catch (Exception e) {
        // make sure we capture the exception to propagate back to the client later
        String reason = "Failed snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " due to exception:" + e.getMessage();
        ForeignException ee = new ForeignException(reason, e);
        monitor.receive(ee);
        status.abort("Snapshot of table: " + snapshotTable + " failed because " + e.getMessage());
    } finally {
        LOG.debug("Marking snapshot" + ClientSnapshotDescriptionUtils.toString(snapshot) + " as finished.");
    }
}
Also used : IOException(java.io.IOException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) ServerName(org.apache.hadoop.hbase.ServerName) ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException) ModifyRegionUtils(org.apache.hadoop.hbase.util.ModifyRegionUtils) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) HashSet(java.util.HashSet)

Example 64 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class ServerCrashProcedure method calcRegionsToAssign.

/**
   * Figure out what we need to assign. Should be idempotent.
   * @param env
   * @return List of calculated regions to assign; may be empty or null.
   * @throws IOException
   */
private List<HRegionInfo> calcRegionsToAssign(final MasterProcedureEnv env) throws IOException {
    AssignmentManager am = env.getMasterServices().getAssignmentManager();
    List<HRegionInfo> regionsToAssignAggregator = new ArrayList<>();
    int replicaCount = env.getMasterConfiguration().getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM);
    for (int i = 1; i < replicaCount; i++) {
        HRegionInfo metaHri = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i);
        if (am.isCarryingMetaReplica(this.serverName, metaHri)) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Reassigning meta replica" + metaHri + " that was on " + this.serverName);
            }
            regionsToAssignAggregator.add(metaHri);
        }
    }
    // Clean out anything in regions in transition.
    List<HRegionInfo> regionsInTransition = am.cleanOutCrashedServerReferences(serverName);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Reassigning " + size(this.regionsOnCrashedServer) + " region(s) that " + (serverName == null ? "null" : serverName) + " was carrying (and " + regionsInTransition.size() + " regions(s) that were opening on this server)");
    }
    regionsToAssignAggregator.addAll(regionsInTransition);
    // Iterate regions that were on this server and figure which of these we need to reassign
    if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) {
        RegionStates regionStates = am.getRegionStates();
        for (HRegionInfo hri : this.regionsOnCrashedServer) {
            if (regionsInTransition.contains(hri))
                continue;
            String encodedName = hri.getEncodedName();
            Lock lock = am.acquireRegionLock(encodedName);
            try {
                RegionState rit = regionStates.getRegionTransitionState(hri);
                if (processDeadRegion(hri, am)) {
                    ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri);
                    if (addressFromAM != null && !addressFromAM.equals(this.serverName)) {
                        // If this region is in transition on the dead server, it must be
                        // opening or pending_open, which should have been covered by
                        // AM#cleanOutCrashedServerReferences
                        LOG.info("Skip assigning " + hri.getRegionNameAsString() + " because opened on " + addressFromAM.getServerName());
                        continue;
                    }
                    if (rit != null) {
                        if (rit.getServerName() != null && !rit.isOnServer(this.serverName)) {
                            // Skip regions that are in transition on other server
                            LOG.info("Skip assigning region in transition on other server" + rit);
                            continue;
                        }
                        LOG.info("Reassigning region " + rit + " and clearing zknode if exists");
                        regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
                    } else if (regionStates.isRegionInState(hri, RegionState.State.SPLITTING_NEW, RegionState.State.MERGING_NEW)) {
                        regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
                    }
                    regionsToAssignAggregator.add(hri);
                // TODO: The below else if is different in branch-1 from master branch.
                } else if (rit != null) {
                    if ((rit.isClosing() || rit.isFailedClose() || rit.isOffline()) && am.getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING) || am.getReplicasToClose().contains(hri)) {
                        // If the table was partially disabled and the RS went down, we should clear the
                        // RIT and remove the node for the region.
                        // The rit that we use may be stale in case the table was in DISABLING state
                        // but though we did assign we will not be clearing the znode in CLOSING state.
                        // Doing this will have no harm. See HBASE-5927
                        regionStates.updateRegionState(hri, RegionState.State.OFFLINE);
                        am.offlineDisabledRegion(hri);
                    } else {
                        LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition " + rit + " not to be assigned by SSH of server " + serverName);
                    }
                }
            } finally {
                lock.unlock();
            }
        }
    }
    return regionsToAssignAggregator;
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) RegionState(org.apache.hadoop.hbase.master.RegionState) RegionStates(org.apache.hadoop.hbase.master.RegionStates) ServerName(org.apache.hadoop.hbase.ServerName) AssignmentManager(org.apache.hadoop.hbase.master.AssignmentManager) ArrayList(java.util.ArrayList) Lock(java.util.concurrent.locks.Lock)

Example 65 with ServerName

use of org.apache.hadoop.hbase.ServerName in project hbase by apache.

the class Canary method sniff.

/*
   * Loops over regions that owns this table, and output some information abouts the state.
   */
private static List<Future<Void>> sniff(final Admin admin, final Sink sink, HTableDescriptor tableDesc, ExecutorService executor, TaskType taskType, boolean rawScanEnabled) throws Exception {
    if (LOG.isDebugEnabled()) {
        LOG.debug(String.format("reading list of regions for table %s", tableDesc.getTableName()));
    }
    Table table = null;
    try {
        table = admin.getConnection().getTable(tableDesc.getTableName());
    } catch (TableNotFoundException e) {
        return new ArrayList<>();
    } finally {
        if (table != null) {
            table.close();
        }
    }
    List<RegionTask> tasks = new ArrayList<>();
    RegionLocator regionLocator = null;
    try {
        regionLocator = admin.getConnection().getRegionLocator(tableDesc.getTableName());
        for (HRegionLocation location : regionLocator.getAllRegionLocations()) {
            ServerName rs = location.getServerName();
            HRegionInfo region = location.getRegionInfo();
            tasks.add(new RegionTask(admin.getConnection(), region, rs, sink, taskType, rawScanEnabled));
        }
    } finally {
        if (regionLocator != null) {
            regionLocator.close();
        }
    }
    return executor.invokeAll(tasks);
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableNotFoundException(org.apache.hadoop.hbase.TableNotFoundException) RegionLocator(org.apache.hadoop.hbase.client.RegionLocator) Table(org.apache.hadoop.hbase.client.Table) HRegionLocation(org.apache.hadoop.hbase.HRegionLocation) ServerName(org.apache.hadoop.hbase.ServerName) ArrayList(java.util.ArrayList)

Aggregations

ServerName (org.apache.hadoop.hbase.ServerName)426 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)202 Test (org.junit.Test)163 ArrayList (java.util.ArrayList)97 TableName (org.apache.hadoop.hbase.TableName)89 IOException (java.io.IOException)87 HashMap (java.util.HashMap)81 List (java.util.List)72 Map (java.util.Map)54 HRegionLocation (org.apache.hadoop.hbase.HRegionLocation)45 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)34 Table (org.apache.hadoop.hbase.client.Table)33 HashSet (java.util.HashSet)32 TreeMap (java.util.TreeMap)31 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)29 Configuration (org.apache.hadoop.conf.Configuration)26 HRegionServer (org.apache.hadoop.hbase.regionserver.HRegionServer)26 Pair (org.apache.hadoop.hbase.util.Pair)24 KeeperException (org.apache.zookeeper.KeeperException)23 InterruptedIOException (java.io.InterruptedIOException)22