Search in sources :

Example 1 with MasterLock

use of org.apache.hadoop.hbase.master.locking.LockManager.MasterLock in project hbase by apache.

the class TakeSnapshotHandler method process.

/**
 * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
 * call should get implemented for each snapshot flavor.
 */
@Override
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "REC_CATCH_EXCEPTION", justification = "Intentional")
public void process() {
    String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " " + eventType + " on table " + snapshotTable;
    LOG.info(msg);
    MasterLock tableLockToRelease = this.tableLock;
    status.setStatus(msg);
    try {
        if (downgradeToSharedTableLock()) {
            // release the exclusive lock and hold the shared lock instead
            tableLockToRelease = master.getLockManager().createMasterLock(snapshotTable, LockType.SHARED, this.getClass().getName() + ": take snapshot " + snapshot.getName());
            tableLock.release();
            tableLockToRelease.acquire();
        }
        // If regions move after this meta scan, the region specific snapshot should fail, triggering
        // an external exception that gets captured here.
        // write down the snapshot info in the working directory
        SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, workingDirFs);
        snapshotManifest.addTableDescriptor(this.htd);
        monitor.rethrowException();
        List<Pair<RegionInfo, ServerName>> regionsAndLocations = master.getAssignmentManager().getTableRegionsAndLocations(snapshotTable, false);
        // run the snapshot
        snapshotRegions(regionsAndLocations);
        monitor.rethrowException();
        // extract each pair to separate lists
        Set<String> serverNames = new HashSet<>();
        for (Pair<RegionInfo, ServerName> p : regionsAndLocations) {
            if (p != null && p.getFirst() != null && p.getSecond() != null) {
                RegionInfo hri = p.getFirst();
                if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent()))
                    continue;
                serverNames.add(p.getSecond().toString());
            }
        }
        // flush the in-memory state, and write the single manifest
        status.setStatus("Consolidate snapshot: " + snapshot.getName());
        snapshotManifest.consolidate();
        // verify the snapshot is valid
        status.setStatus("Verifying snapshot: " + snapshot.getName());
        verifier.verifySnapshot(this.workingDir, serverNames);
        // complete the snapshot, atomically moving from tmp to .snapshot dir.
        SnapshotDescriptionUtils.completeSnapshot(this.snapshotDir, this.workingDir, this.rootFs, this.workingDirFs, this.conf);
        finished = true;
        msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
        status.markComplete(msg);
        LOG.info(msg);
        metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
        if (master.getMasterCoprocessorHost() != null) {
            master.getMasterCoprocessorHost().postCompletedSnapshotAction(ProtobufUtil.createSnapshotDesc(snapshot), this.htd);
        }
    } catch (Exception e) {
        // FindBugs: REC_CATCH_EXCEPTION
        status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " + snapshotTable + " because " + e.getMessage());
        String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " due to exception:" + e.getMessage();
        LOG.error(reason, e);
        ForeignException ee = new ForeignException(reason, e);
        monitor.receive(ee);
        // need to mark this completed to close off and allow cleanup to happen.
        cancel(reason);
    } finally {
        LOG.debug("Launching cleanup of working dir:" + workingDir);
        try {
            // it.
            if (!workingDirFs.delete(workingDir, true)) {
                LOG.error("Couldn't delete snapshot working directory:" + workingDir);
            }
        } catch (IOException e) {
            LOG.error("Couldn't delete snapshot working directory:" + workingDir);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Table snapshot journal : \n" + status.prettyPrintJournal());
        }
        tableLockToRelease.release();
    }
}
Also used : ServerName(org.apache.hadoop.hbase.ServerName) ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) IOException(java.io.IOException) MasterLock(org.apache.hadoop.hbase.master.locking.LockManager.MasterLock) ForeignException(org.apache.hadoop.hbase.errorhandling.ForeignException) KeeperException(org.apache.zookeeper.KeeperException) CancellationException(java.util.concurrent.CancellationException) IOException(java.io.IOException) Pair(org.apache.hadoop.hbase.util.Pair) HashSet(java.util.HashSet)

Example 2 with MasterLock

use of org.apache.hadoop.hbase.master.locking.LockManager.MasterLock in project hbase by apache.

the class TestSnapshotWhileRSCrashes method test.

@Test
public void test() throws InterruptedException, IOException {
    String snName = "sn";
    MasterLock lock = UTIL.getMiniHBaseCluster().getMaster().getLockManager().createMasterLock(NAME, LockType.EXCLUSIVE, "for testing");
    lock.acquire();
    Thread t = new Thread(() -> {
        try {
            UTIL.getAdmin().snapshot(snName, NAME);
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    });
    t.setDaemon(true);
    t.start();
    ProcedureExecutor<MasterProcedureEnv> procExec = UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
    UTIL.waitFor(10000, () -> procExec.getProcedures().stream().filter(p -> !p.isFinished()).filter(p -> p instanceof LockProcedure).map(p -> (LockProcedure) p).filter(p -> NAME.equals(p.getTableName())).anyMatch(p -> !p.isLocked()));
    UTIL.getMiniHBaseCluster().stopRegionServer(0);
    lock.release();
    // sure that the regions could online
    try (Table table = UTIL.getConnection().getTable(NAME);
        ResultScanner scanner = table.getScanner(CF)) {
        assertNull(scanner.next());
    }
}
Also used : MasterLock(org.apache.hadoop.hbase.master.locking.LockManager.MasterLock) TableName(org.apache.hadoop.hbase.TableName) AfterClass(org.junit.AfterClass) BeforeClass(org.junit.BeforeClass) HBaseTestingUtil(org.apache.hadoop.hbase.HBaseTestingUtil) MediumTests(org.apache.hadoop.hbase.testclassification.MediumTests) HBaseClassTestRule(org.apache.hadoop.hbase.HBaseClassTestRule) IOException(java.io.IOException) Test(org.junit.Test) MasterProcedureEnv(org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv) Category(org.junit.experimental.categories.Category) LockProcedure(org.apache.hadoop.hbase.master.locking.LockProcedure) LockType(org.apache.hadoop.hbase.procedure2.LockType) UncheckedIOException(java.io.UncheckedIOException) Assert.assertNull(org.junit.Assert.assertNull) ProcedureExecutor(org.apache.hadoop.hbase.procedure2.ProcedureExecutor) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) MasterTests(org.apache.hadoop.hbase.testclassification.MasterTests) Table(org.apache.hadoop.hbase.client.Table) ClassRule(org.junit.ClassRule) Bytes(org.apache.hadoop.hbase.util.Bytes) LockProcedure(org.apache.hadoop.hbase.master.locking.LockProcedure) Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) UncheckedIOException(java.io.UncheckedIOException) MasterProcedureEnv(org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) MasterLock(org.apache.hadoop.hbase.master.locking.LockManager.MasterLock) Test(org.junit.Test)

Aggregations

IOException (java.io.IOException)2 MasterLock (org.apache.hadoop.hbase.master.locking.LockManager.MasterLock)2 UncheckedIOException (java.io.UncheckedIOException)1 HashSet (java.util.HashSet)1 CancellationException (java.util.concurrent.CancellationException)1 HBaseClassTestRule (org.apache.hadoop.hbase.HBaseClassTestRule)1 HBaseTestingUtil (org.apache.hadoop.hbase.HBaseTestingUtil)1 ServerName (org.apache.hadoop.hbase.ServerName)1 TableName (org.apache.hadoop.hbase.TableName)1 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)1 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)1 Table (org.apache.hadoop.hbase.client.Table)1 ForeignException (org.apache.hadoop.hbase.errorhandling.ForeignException)1 LockProcedure (org.apache.hadoop.hbase.master.locking.LockProcedure)1 MasterProcedureEnv (org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv)1 LockType (org.apache.hadoop.hbase.procedure2.LockType)1 ProcedureExecutor (org.apache.hadoop.hbase.procedure2.ProcedureExecutor)1 MasterTests (org.apache.hadoop.hbase.testclassification.MasterTests)1 MediumTests (org.apache.hadoop.hbase.testclassification.MediumTests)1 Bytes (org.apache.hadoop.hbase.util.Bytes)1