use of org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher in project hbase by apache.
the class LogRollRegionServerProcedureManager method buildSubprocedure.
/**
* If in a running state, creates the specified subprocedure for handling a backup procedure.
* @return Subprocedure to submit to the ProcedureMemeber.
*/
public Subprocedure buildSubprocedure(byte[] data) {
// don't run a backup if the parent is stop(ping)
if (rss.isStopping() || rss.isStopped()) {
throw new IllegalStateException("Can't start backup procedure on RS: " + rss.getServerName() + ", because stopping/stopped!");
}
LOG.info("Attempting to run a roll log procedure for backup.");
ForeignExceptionDispatcher errorDispatcher = new ForeignExceptionDispatcher();
Configuration conf = rss.getConfiguration();
long timeoutMillis = conf.getLong(BACKUP_TIMEOUT_MILLIS_KEY, BACKUP_TIMEOUT_MILLIS_DEFAULT);
long wakeMillis = conf.getLong(BACKUP_REQUEST_WAKE_MILLIS_KEY, BACKUP_REQUEST_WAKE_MILLIS_DEFAULT);
LogRollBackupSubprocedurePool taskManager = new LogRollBackupSubprocedurePool(rss.getServerName().toString(), conf);
return new LogRollBackupSubprocedure(rss, member, errorDispatcher, wakeMillis, timeoutMillis, taskManager, data);
}
use of org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher in project hbase by apache.
the class MasterFlushTableProcedureManager method execProcedure.
@Override
public void execProcedure(ProcedureDescription desc) throws IOException {
TableName tableName = TableName.valueOf(desc.getInstance());
// call pre coproc hook
MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
if (cpHost != null) {
cpHost.preTableFlush(tableName);
}
// Get the list of region servers that host the online regions for table.
// We use the procedure instance name to carry the table name from the client.
// It is possible that regions may move after we get the region server list.
// Each region server will get its own online regions for the table.
// We may still miss regions that need to be flushed.
List<Pair<HRegionInfo, ServerName>> regionsAndLocations;
if (TableName.META_TABLE_NAME.equals(tableName)) {
regionsAndLocations = new MetaTableLocator().getMetaRegionsAndLocations(master.getZooKeeper());
} else {
regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations(master.getConnection(), tableName, false);
}
Set<String> regionServers = new HashSet<>(regionsAndLocations.size());
for (Pair<HRegionInfo, ServerName> region : regionsAndLocations) {
if (region != null && region.getFirst() != null && region.getSecond() != null) {
HRegionInfo hri = region.getFirst();
if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent()))
continue;
regionServers.add(region.getSecond().toString());
}
}
ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(desc.getInstance());
// Kick of the global procedure from the master coordinator to the region servers.
// We rely on the existing Distributed Procedure framework to prevent any concurrent
// procedure with the same name.
Procedure proc = coordinator.startProcedure(monitor, desc.getInstance(), new byte[0], Lists.newArrayList(regionServers));
monitor.rethrowException();
if (proc == null) {
String msg = "Failed to submit distributed procedure " + desc.getSignature() + " for '" + desc.getInstance() + "'. " + "Another flush procedure is running?";
LOG.error(msg);
throw new IOException(msg);
}
procMap.put(tableName, proc);
try {
// wait for the procedure to complete. A timer thread is kicked off that should cancel this
// if it takes too long.
proc.waitForCompleted();
LOG.info("Done waiting - exec procedure " + desc.getSignature() + " for '" + desc.getInstance() + "'");
LOG.info("Master flush table procedure is successful!");
} catch (InterruptedException e) {
ForeignException ee = new ForeignException("Interrupted while waiting for flush table procdure to finish", e);
monitor.receive(ee);
Thread.currentThread().interrupt();
} catch (ForeignException e) {
ForeignException ee = new ForeignException("Exception while waiting for flush table procdure to finish", e);
monitor.receive(ee);
}
monitor.rethrowException();
}
use of org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher in project hbase by apache.
the class TestZKProcedure method runCommit.
private void runCommit(String... members) throws Exception {
// make sure we just have an empty list
if (members == null) {
members = new String[0];
}
List<String> expected = Arrays.asList(members);
// setup the constants
ZooKeeperWatcher coordZkw = newZooKeeperWatcher();
String opDescription = "coordination test - " + members.length + " cohort members";
// start running the controller
ZKProcedureCoordinator coordinatorComms = new ZKProcedureCoordinator(coordZkw, opDescription, COORDINATOR_NODE_NAME);
ThreadPoolExecutor pool = ProcedureCoordinator.defaultPool(COORDINATOR_NODE_NAME, POOL_SIZE, KEEP_ALIVE);
ProcedureCoordinator coordinator = new ProcedureCoordinator(coordinatorComms, pool) {
@Override
public Procedure createProcedure(ForeignExceptionDispatcher fed, String procName, byte[] procArgs, List<String> expectedMembers) {
return Mockito.spy(super.createProcedure(fed, procName, procArgs, expectedMembers));
}
};
// build and start members
// NOTE: There is a single subprocedure builder for all members here.
SubprocedureFactory subprocFactory = Mockito.mock(SubprocedureFactory.class);
List<Pair<ProcedureMember, ZKProcedureMemberRpcs>> procMembers = new ArrayList<>(members.length);
// start each member
for (String member : members) {
ZooKeeperWatcher watcher = newZooKeeperWatcher();
ZKProcedureMemberRpcs comms = new ZKProcedureMemberRpcs(watcher, opDescription);
ThreadPoolExecutor pool2 = ProcedureMember.defaultPool(member, 1, KEEP_ALIVE);
ProcedureMember procMember = new ProcedureMember(comms, pool2, subprocFactory);
procMembers.add(new Pair<>(procMember, comms));
comms.start(member, procMember);
}
// setup mock member subprocedures
final List<Subprocedure> subprocs = new ArrayList<>();
for (int i = 0; i < procMembers.size(); i++) {
ForeignExceptionDispatcher cohortMonitor = new ForeignExceptionDispatcher();
Subprocedure commit = Mockito.spy(new SubprocedureImpl(procMembers.get(i).getFirst(), opName, cohortMonitor, WAKE_FREQUENCY, TIMEOUT));
subprocs.add(commit);
}
// link subprocedure to buildNewOperation invocation.
// NOTE: would be racy if not an AtomicInteger
final AtomicInteger i = new AtomicInteger(0);
Mockito.when(subprocFactory.buildSubprocedure(Mockito.eq(opName), (byte[]) Mockito.argThat(new ArrayEquals(data)))).thenAnswer(new Answer<Subprocedure>() {
@Override
public Subprocedure answer(InvocationOnMock invocation) throws Throwable {
int index = i.getAndIncrement();
LOG.debug("Task size:" + subprocs.size() + ", getting:" + index);
Subprocedure commit = subprocs.get(index);
return commit;
}
});
// setup spying on the coordinator
// Procedure proc = Mockito.spy(procBuilder.createProcedure(coordinator, opName, data, expected));
// Mockito.when(procBuilder.build(coordinator, opName, data, expected)).thenReturn(proc);
// start running the operation
Procedure task = coordinator.startProcedure(new ForeignExceptionDispatcher(), opName, data, expected);
// assertEquals("Didn't mock coordinator task", proc, task);
// verify all things ran as expected
// waitAndVerifyProc(proc, once, once, never(), once, false);
waitAndVerifyProc(task, once, once, never(), once, false);
verifyCohortSuccessful(expected, subprocFactory, subprocs, once, once, never(), once, false);
// close all the things
closeAll(coordinator, coordinatorComms, procMembers);
}
use of org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher in project hbase by apache.
the class TestZKProcedure method testMultiCohortWithMemberTimeoutDuringPrepare.
/**
* Test a distributed commit with multiple cohort members, where one of the cohort members has a
* timeout exception during the prepare stage.
*/
@Test
public void testMultiCohortWithMemberTimeoutDuringPrepare() throws Exception {
String opDescription = "error injection coordination";
String[] cohortMembers = new String[] { "one", "two", "three" };
List<String> expected = Lists.newArrayList(cohortMembers);
// error constants
final int memberErrorIndex = 2;
final CountDownLatch coordinatorReceivedErrorLatch = new CountDownLatch(1);
// start running the coordinator and its controller
ZooKeeperWatcher coordinatorWatcher = newZooKeeperWatcher();
ZKProcedureCoordinator coordinatorController = new ZKProcedureCoordinator(coordinatorWatcher, opDescription, COORDINATOR_NODE_NAME);
ThreadPoolExecutor pool = ProcedureCoordinator.defaultPool(COORDINATOR_NODE_NAME, POOL_SIZE, KEEP_ALIVE);
ProcedureCoordinator coordinator = spy(new ProcedureCoordinator(coordinatorController, pool));
// start a member for each node
SubprocedureFactory subprocFactory = Mockito.mock(SubprocedureFactory.class);
List<Pair<ProcedureMember, ZKProcedureMemberRpcs>> members = new ArrayList<>(expected.size());
for (String member : expected) {
ZooKeeperWatcher watcher = newZooKeeperWatcher();
ZKProcedureMemberRpcs controller = new ZKProcedureMemberRpcs(watcher, opDescription);
ThreadPoolExecutor pool2 = ProcedureMember.defaultPool(member, 1, KEEP_ALIVE);
ProcedureMember mem = new ProcedureMember(controller, pool2, subprocFactory);
members.add(new Pair<>(mem, controller));
controller.start(member, mem);
}
// setup mock subprocedures
final List<Subprocedure> cohortTasks = new ArrayList<>();
final int[] elem = new int[1];
for (int i = 0; i < members.size(); i++) {
ForeignExceptionDispatcher cohortMonitor = new ForeignExceptionDispatcher();
final ProcedureMember comms = members.get(i).getFirst();
Subprocedure commit = Mockito.spy(new SubprocedureImpl(comms, opName, cohortMonitor, WAKE_FREQUENCY, TIMEOUT));
// This nasty bit has one of the impls throw a TimeoutException
Mockito.doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
int index = elem[0];
if (index == memberErrorIndex) {
LOG.debug("Sending error to coordinator");
ForeignException remoteCause = new ForeignException("TIMER", new TimeoutException("subprocTimeout", 1, 2, 0));
Subprocedure r = ((Subprocedure) invocation.getMock());
LOG.error("Remote commit failure, not propagating error:" + remoteCause);
comms.receiveAbortProcedure(r.getName(), remoteCause);
assertEquals(r.isComplete(), true);
// notification (which ensures that we never progress past prepare)
try {
Procedure.waitForLatch(coordinatorReceivedErrorLatch, new ForeignExceptionDispatcher(), WAKE_FREQUENCY, "coordinator received error");
} catch (InterruptedException e) {
LOG.debug("Wait for latch interrupted, done:" + (coordinatorReceivedErrorLatch.getCount() == 0));
// reset the interrupt status on the thread
Thread.currentThread().interrupt();
}
}
elem[0] = ++index;
return null;
}
}).when(commit).acquireBarrier();
cohortTasks.add(commit);
}
// pass out a task per member
final AtomicInteger taskIndex = new AtomicInteger();
Mockito.when(subprocFactory.buildSubprocedure(Mockito.eq(opName), (byte[]) Mockito.argThat(new ArrayEquals(data)))).thenAnswer(new Answer<Subprocedure>() {
@Override
public Subprocedure answer(InvocationOnMock invocation) throws Throwable {
int index = taskIndex.getAndIncrement();
Subprocedure commit = cohortTasks.get(index);
return commit;
}
});
// setup spying on the coordinator
ForeignExceptionDispatcher coordinatorTaskErrorMonitor = Mockito.spy(new ForeignExceptionDispatcher());
Procedure coordinatorTask = Mockito.spy(new Procedure(coordinator, coordinatorTaskErrorMonitor, WAKE_FREQUENCY, TIMEOUT, opName, data, expected));
when(coordinator.createProcedure(any(ForeignExceptionDispatcher.class), eq(opName), eq(data), anyListOf(String.class))).thenReturn(coordinatorTask);
// count down the error latch when we get the remote error
Mockito.doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
// pass on the error to the master
invocation.callRealMethod();
// then count down the got error latch
coordinatorReceivedErrorLatch.countDown();
return null;
}
}).when(coordinatorTask).receive(Mockito.any(ForeignException.class));
// ----------------------------
// start running the operation
// ----------------------------
Procedure task = coordinator.startProcedure(coordinatorTaskErrorMonitor, opName, data, expected);
assertEquals("Didn't mock coordinator task", coordinatorTask, task);
// wait for the task to complete
try {
task.waitForCompleted();
} catch (ForeignException fe) {
// this may get caught or may not
}
// -------------
// verification
// -------------
// always expect prepared, never committed, and possible to have cleanup and finish (racy since
// error case)
waitAndVerifyProc(coordinatorTask, once, never(), once, atMost(1), true);
verifyCohortSuccessful(expected, subprocFactory, cohortTasks, once, never(), once, once, true);
// close all the open things
closeAll(coordinator, coordinatorController, members);
}
use of org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher in project hbase by apache.
the class TestProcedure method testMultipleMember.
@Test(timeout = 60000)
public void testMultipleMember() throws Exception {
// 2 members
List<String> members = new ArrayList<>();
members.add("member1");
members.add("member2");
LatchedProcedure proc = new LatchedProcedure(coord, new ForeignExceptionDispatcher(), 100, Integer.MAX_VALUE, "op", null, members);
final LatchedProcedure procspy = spy(proc);
// start the barrier procedure
new Thread() {
public void run() {
procspy.call();
}
}.start();
// coordinator: wait for the barrier to be acquired, then send start barrier
procspy.startedAcquireBarrier.await();
// we only know that {@link Procedure#sendStartBarrier()} was called, and others are blocked.
verify(procspy).sendGlobalBarrierStart();
verify(procspy, never()).sendGlobalBarrierReached();
verify(procspy, never()).sendGlobalBarrierComplete();
// no externals
verify(procspy, never()).barrierAcquiredByMember(anyString());
// member0: [1/2] trigger global barrier acquisition.
procspy.barrierAcquiredByMember(members.get(0));
// coordinator not satisified.
verify(procspy).sendGlobalBarrierStart();
verify(procspy, never()).sendGlobalBarrierReached();
verify(procspy, never()).sendGlobalBarrierComplete();
// member 1: [2/2] trigger global barrier acquisition.
procspy.barrierAcquiredByMember(members.get(1));
// coordinator: wait for global barrier to be acquired.
procspy.startedDuringBarrier.await();
// old news
verify(procspy).sendGlobalBarrierStart();
// member 1, 2: trigger global barrier release
procspy.barrierReleasedByMember(members.get(0), new byte[0]);
procspy.barrierReleasedByMember(members.get(1), new byte[0]);
// coordinator wait for procedure to be completed
procspy.completedProcedure.await();
verify(procspy).sendGlobalBarrierReached();
verify(procspy).sendGlobalBarrierComplete();
verify(procspy, never()).receive(any(ForeignException.class));
}
Aggregations