Search in sources :

Example 11 with ForeignException

use of org.apache.hadoop.hbase.errorhandling.ForeignException in project hbase by apache.

the class TakeSnapshotHandler method process.

/**
 * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
 * call should get implemented for each snapshot flavor.
 */
@Override
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "REC_CATCH_EXCEPTION", justification = "Intentional")
public void process() {
    String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " " + eventType + " on table " + snapshotTable;
    LOG.info(msg);
    MasterLock tableLockToRelease = this.tableLock;
    status.setStatus(msg);
    try {
        if (downgradeToSharedTableLock()) {
            // release the exclusive lock and hold the shared lock instead
            tableLockToRelease = master.getLockManager().createMasterLock(snapshotTable, LockType.SHARED, this.getClass().getName() + ": take snapshot " + snapshot.getName());
            tableLock.release();
            tableLockToRelease.acquire();
        }
        // If regions move after this meta scan, the region specific snapshot should fail, triggering
        // an external exception that gets captured here.
        // write down the snapshot info in the working directory
        SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, workingDirFs);
        snapshotManifest.addTableDescriptor(this.htd);
        monitor.rethrowException();
        List<Pair<RegionInfo, ServerName>> regionsAndLocations = master.getAssignmentManager().getTableRegionsAndLocations(snapshotTable, false);
        // run the snapshot
        snapshotRegions(regionsAndLocations);
        monitor.rethrowException();
        // extract each pair to separate lists
        Set<String> serverNames = new HashSet<>();
        for (Pair<RegionInfo, ServerName> p : regionsAndLocations) {
            if (p != null && p.getFirst() != null && p.getSecond() != null) {
                RegionInfo hri = p.getFirst();
                if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent()))
                    continue;
                serverNames.add(p.getSecond().toString());
            }
        }
        // flush the in-memory state, and write the single manifest
        status.setStatus("Consolidate snapshot: " + snapshot.getName());
        snapshotManifest.consolidate();
        // verify the snapshot is valid
        status.setStatus("Verifying snapshot: " + snapshot.getName());
        verifier.verifySnapshot(this.workingDir, serverNames);
        // complete the snapshot, atomically moving from tmp to .snapshot dir.
        SnapshotDescriptionUtils.completeSnapshot(this.snapshotDir, this.workingDir, this.rootFs, this.workingDirFs, this.conf);
        finished = true;
        msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
        status.markComplete(msg);
        LOG.info(msg);
        metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
        if (master.getMasterCoprocessorHost() != null) {
            master.getMasterCoprocessorHost().postCompletedSnapshotAction(ProtobufUtil.createSnapshotDesc(snapshot), this.htd);
        }
    } catch (Exception e) {
        // FindBugs: REC_CATCH_EXCEPTION
        status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " + snapshotTable + " because " + e.getMessage());
        String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " due to exception:" + e.getMessage();
        LOG.error(reason, e);
        ForeignException ee = new ForeignException(reason, e);
        monitor.receive(ee);
        // need to mark this completed to close off and allow cleanup to happen.
        cancel(reason);
    } finally {
        LOG.debug("Launching cleanup of working dir:" + workingDir);
        try {
            // it.
            if (!workingDirFs.delete(workingDir, true)) {
                LOG.error("Couldn't delete snapshot working directory:" + workingDir);
            }
        } catch (IOException e) {
            LOG.error("Couldn't delete snapshot working directory:" + workingDir);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Table snapshot journal : \n" + status.prettyPrintJournal());
        }
        tableLockToRelease.release();
    }
}
Also used : ServerName(org.apache.hadoop.hbase.ServerName) ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) IOException(java.io.IOException) MasterLock(org.apache.hadoop.hbase.master.locking.LockManager.MasterLock) ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException) KeeperException(org.apache.zookeeper.KeeperException) CancellationException(java.util.concurrent.CancellationException) IOException(java.io.IOException) Pair(org.apache.hadoop.hbase.util.Pair) HashSet(java.util.HashSet)

Example 12 with ForeignException

use of org.apache.hadoop.hbase.errorhandling.ForeignException in project hbase by apache.

the class ProcedureCoordinator method rpcConnectionFailure.

/**
 * The connection to the rest of the procedure group (members and coordinator) has been
 * broken/lost/failed. This should fail any interested procedures, but not attempt to notify other
 * members since we cannot reach them anymore.
 * @param message description of the error
 * @param cause the actual cause of the failure
 */
void rpcConnectionFailure(final String message, final IOException cause) {
    Collection<Procedure> toNotify = procedures.values();
    boolean isTraceEnabled = LOG.isTraceEnabled();
    LOG.debug("received connection failure: " + message, cause);
    for (Procedure proc : toNotify) {
        if (proc == null) {
            continue;
        }
        // notify the elements, if they aren't null
        if (isTraceEnabled) {
            LOG.trace("connection failure - notify procedure: " + proc.getName());
        }
        proc.receive(new ForeignException(proc.getName(), cause));
    }
}
Also used : ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException)

Example 13 with ForeignException

use of org.apache.hadoop.hbase.errorhandling.ForeignException in project hbase by apache.

the class RestoreSnapshotProcedure method updateMETA.

/**
 * Apply changes to hbase:meta
 */
private void updateMETA(final MasterProcedureEnv env) throws IOException {
    try {
        Connection conn = env.getMasterServices().getConnection();
        RegionStateStore regionStateStore = env.getAssignmentManager().getRegionStateStore();
        int regionReplication = modifiedTableDescriptor.getRegionReplication();
        // 1. Prepare to restore
        getMonitorStatus().setStatus("Preparing to restore each region");
        // that are not correct after the restore.
        if (regionsToRemove != null) {
            regionStateStore.deleteRegions(regionsToRemove);
            deleteRegionsFromInMemoryStates(regionsToRemove, env, regionReplication);
        }
        // in the snapshot folder.
        if (regionsToAdd != null) {
            MetaTableAccessor.addRegionsToMeta(conn, regionsToAdd, regionReplication);
            addRegionsToInMemoryStates(regionsToAdd, env, regionReplication);
        }
        if (regionsToRestore != null) {
            regionStateStore.overwriteRegions(regionsToRestore, regionReplication);
            deleteRegionsFromInMemoryStates(regionsToRestore, env, regionReplication);
            addRegionsToInMemoryStates(regionsToRestore, env, regionReplication);
        }
        RestoreSnapshotHelper.RestoreMetaChanges metaChanges = new RestoreSnapshotHelper.RestoreMetaChanges(modifiedTableDescriptor, parentsToChildrenPairMap);
        metaChanges.updateMetaParentRegions(conn, regionsToAdd);
        // At this point the restore is complete.
        LOG.info("Restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + " on table=" + getTableName() + " completed!");
    } catch (IOException e) {
        final ForeignExceptionDispatcher monitorException = new ForeignExceptionDispatcher();
        String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + " failed in meta update. Try re-running the restore command.";
        LOG.error(msg, e);
        monitorException.receive(new ForeignException(env.getMasterServices().getServerName().toString(), e));
        throw new IOException(msg, e);
    }
    monitorStatus.markComplete("Restore snapshot '" + snapshot.getName() + "'!");
    MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
    metricsSnapshot.addSnapshotRestore(monitorStatus.getCompletionTimestamp() - monitorStatus.getStartTime());
}
Also used : RegionStateStore(org.apache.hadoop.hbase.master.assignment.RegionStateStore) MetricsSnapshot(org.apache.hadoop.hbase.master.MetricsSnapshot) ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException) Connection(org.apache.hadoop.hbase.client.Connection) RestoreSnapshotHelper(org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) IOException(java.io.IOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) ForeignExceptionDispatcher(org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher)

Example 14 with ForeignException

use of org.apache.hadoop.hbase.errorhandling.ForeignException in project hbase by apache.

the class RestoreSnapshotProcedure method restoreSnapshot.

/**
 * Execute the on-disk Restore
 * @param env MasterProcedureEnv
 * @throws IOException
 */
private void restoreSnapshot(final MasterProcedureEnv env) throws IOException {
    MasterFileSystem fileSystemManager = env.getMasterServices().getMasterFileSystem();
    FileSystem fs = fileSystemManager.getFileSystem();
    Path rootDir = fileSystemManager.getRootDir();
    final ForeignExceptionDispatcher monitorException = new ForeignExceptionDispatcher();
    final Configuration conf = new Configuration(env.getMasterConfiguration());
    LOG.info("Starting restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot));
    try {
        Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
        SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshot);
        RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper(conf, fs, manifest, modifiedTableDescriptor, rootDir, monitorException, getMonitorStatus());
        RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions();
        regionsToRestore = metaChanges.getRegionsToRestore();
        regionsToRemove = metaChanges.getRegionsToRemove();
        regionsToAdd = metaChanges.getRegionsToAdd();
        parentsToChildrenPairMap = metaChanges.getParentToChildrenPairMap();
    } catch (IOException e) {
        String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + " failed in on-disk restore. Try re-running the restore command.";
        LOG.error(msg, e);
        monitorException.receive(new ForeignException(env.getMasterServices().getServerName().toString(), e));
        throw new IOException(msg, e);
    }
}
Also used : MasterFileSystem(org.apache.hadoop.hbase.master.MasterFileSystem) Path(org.apache.hadoop.fs.Path) SnapshotManifest(org.apache.hadoop.hbase.snapshot.SnapshotManifest) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) MasterFileSystem(org.apache.hadoop.hbase.master.MasterFileSystem) ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException) RestoreSnapshotHelper(org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) IOException(java.io.IOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) ForeignExceptionDispatcher(org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher)

Example 15 with ForeignException

use of org.apache.hadoop.hbase.errorhandling.ForeignException in project hbase by apache.

the class SimpleMasterProcedureManager method execProcedureWithRet.

@Override
public byte[] execProcedureWithRet(ProcedureDescription desc) throws IOException {
    this.done = false;
    // start the process on the RS
    ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(desc.getInstance());
    List<ServerName> serverNames = master.getServerManager().getOnlineServersList();
    List<String> servers = new ArrayList<>();
    for (ServerName sn : serverNames) {
        servers.add(sn.toString());
    }
    Procedure proc = coordinator.startProcedure(monitor, desc.getInstance(), new byte[0], servers);
    if (proc == null) {
        String msg = "Failed to submit distributed procedure for '" + getProcedureSignature() + "'";
        LOG.error(msg);
        throw new IOException(msg);
    }
    HashMap<String, byte[]> returnData = null;
    try {
        // wait for the procedure to complete.  A timer thread is kicked off that should cancel this
        // if it takes too long.
        returnData = proc.waitForCompletedWithRet();
        LOG.info("Done waiting - exec procedure for " + desc.getInstance());
        this.done = true;
    } catch (InterruptedException e) {
        ForeignException ee = new ForeignException("Interrupted while waiting for procdure to finish", e);
        monitor.receive(ee);
        Thread.currentThread().interrupt();
    } catch (ForeignException e) {
        monitor.receive(e);
    }
    // return the first value for testing
    return returnData.values().iterator().next();
}
Also used : ServerName(org.apache.hadoop.hbase.ServerName) ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ForeignExceptionDispatcher(org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher)

Aggregations

ForeignException (org.apache.hadoop.hbase.errorhandling.ForeignException)29 IOException (java.io.IOException)17 ForeignExceptionDispatcher (org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher)11 ServerName (org.apache.hadoop.hbase.ServerName)8 ArrayList (java.util.ArrayList)7 HashSet (java.util.HashSet)6 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)6 KeeperException (org.apache.zookeeper.KeeperException)6 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)5 Procedure (org.apache.hadoop.hbase.procedure.Procedure)5 RestoreSnapshotHelper (org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper)4 ServiceException (org.apache.hbase.thirdparty.com.google.protobuf.ServiceException)4 Test (org.junit.Test)4 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)3 Configuration (org.apache.hadoop.conf.Configuration)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Path (org.apache.hadoop.fs.Path)3 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)3 TableName (org.apache.hadoop.hbase.TableName)3 MasterFileSystem (org.apache.hadoop.hbase.master.MasterFileSystem)3