use of org.apache.hadoop.hbase.errorhandling.TimeoutException in project hbase by apache.
the class TestZKProcedure method testMultiCohortWithMemberTimeoutDuringPrepare.
/**
* Test a distributed commit with multiple cohort members, where one of the cohort members has a
* timeout exception during the prepare stage.
*/
@Test
public void testMultiCohortWithMemberTimeoutDuringPrepare() throws Exception {
String opDescription = "error injection coordination";
String[] cohortMembers = new String[] { "one", "two", "three" };
List<String> expected = Lists.newArrayList(cohortMembers);
// error constants
final int memberErrorIndex = 2;
final CountDownLatch coordinatorReceivedErrorLatch = new CountDownLatch(1);
// start running the coordinator and its controller
ZooKeeperWatcher coordinatorWatcher = newZooKeeperWatcher();
ZKProcedureCoordinator coordinatorController = new ZKProcedureCoordinator(coordinatorWatcher, opDescription, COORDINATOR_NODE_NAME);
ThreadPoolExecutor pool = ProcedureCoordinator.defaultPool(COORDINATOR_NODE_NAME, POOL_SIZE, KEEP_ALIVE);
ProcedureCoordinator coordinator = spy(new ProcedureCoordinator(coordinatorController, pool));
// start a member for each node
SubprocedureFactory subprocFactory = Mockito.mock(SubprocedureFactory.class);
List<Pair<ProcedureMember, ZKProcedureMemberRpcs>> members = new ArrayList<>(expected.size());
for (String member : expected) {
ZooKeeperWatcher watcher = newZooKeeperWatcher();
ZKProcedureMemberRpcs controller = new ZKProcedureMemberRpcs(watcher, opDescription);
ThreadPoolExecutor pool2 = ProcedureMember.defaultPool(member, 1, KEEP_ALIVE);
ProcedureMember mem = new ProcedureMember(controller, pool2, subprocFactory);
members.add(new Pair<>(mem, controller));
controller.start(member, mem);
}
// setup mock subprocedures
final List<Subprocedure> cohortTasks = new ArrayList<>();
final int[] elem = new int[1];
for (int i = 0; i < members.size(); i++) {
ForeignExceptionDispatcher cohortMonitor = new ForeignExceptionDispatcher();
final ProcedureMember comms = members.get(i).getFirst();
Subprocedure commit = Mockito.spy(new SubprocedureImpl(comms, opName, cohortMonitor, WAKE_FREQUENCY, TIMEOUT));
// This nasty bit has one of the impls throw a TimeoutException
Mockito.doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
int index = elem[0];
if (index == memberErrorIndex) {
LOG.debug("Sending error to coordinator");
ForeignException remoteCause = new ForeignException("TIMER", new TimeoutException("subprocTimeout", 1, 2, 0));
Subprocedure r = ((Subprocedure) invocation.getMock());
LOG.error("Remote commit failure, not propagating error:" + remoteCause);
comms.receiveAbortProcedure(r.getName(), remoteCause);
assertEquals(r.isComplete(), true);
// notification (which ensures that we never progress past prepare)
try {
Procedure.waitForLatch(coordinatorReceivedErrorLatch, new ForeignExceptionDispatcher(), WAKE_FREQUENCY, "coordinator received error");
} catch (InterruptedException e) {
LOG.debug("Wait for latch interrupted, done:" + (coordinatorReceivedErrorLatch.getCount() == 0));
// reset the interrupt status on the thread
Thread.currentThread().interrupt();
}
}
elem[0] = ++index;
return null;
}
}).when(commit).acquireBarrier();
cohortTasks.add(commit);
}
// pass out a task per member
final AtomicInteger taskIndex = new AtomicInteger();
Mockito.when(subprocFactory.buildSubprocedure(Mockito.eq(opName), (byte[]) Mockito.argThat(new ArrayEquals(data)))).thenAnswer(new Answer<Subprocedure>() {
@Override
public Subprocedure answer(InvocationOnMock invocation) throws Throwable {
int index = taskIndex.getAndIncrement();
Subprocedure commit = cohortTasks.get(index);
return commit;
}
});
// setup spying on the coordinator
ForeignExceptionDispatcher coordinatorTaskErrorMonitor = Mockito.spy(new ForeignExceptionDispatcher());
Procedure coordinatorTask = Mockito.spy(new Procedure(coordinator, coordinatorTaskErrorMonitor, WAKE_FREQUENCY, TIMEOUT, opName, data, expected));
when(coordinator.createProcedure(any(ForeignExceptionDispatcher.class), eq(opName), eq(data), anyListOf(String.class))).thenReturn(coordinatorTask);
// count down the error latch when we get the remote error
Mockito.doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
// pass on the error to the master
invocation.callRealMethod();
// then count down the got error latch
coordinatorReceivedErrorLatch.countDown();
return null;
}
}).when(coordinatorTask).receive(Mockito.any(ForeignException.class));
// ----------------------------
// start running the operation
// ----------------------------
Procedure task = coordinator.startProcedure(coordinatorTaskErrorMonitor, opName, data, expected);
assertEquals("Didn't mock coordinator task", coordinatorTask, task);
// wait for the task to complete
try {
task.waitForCompleted();
} catch (ForeignException fe) {
// this may get caught or may not
}
// -------------
// verification
// -------------
// always expect prepared, never committed, and possible to have cleanup and finish (racy since
// error case)
waitAndVerifyProc(coordinatorTask, once, never(), once, atMost(1), true);
verifyCohortSuccessful(expected, subprocFactory, cohortTasks, once, never(), once, once, true);
// close all the open things
closeAll(coordinator, coordinatorController, members);
}
use of org.apache.hadoop.hbase.errorhandling.TimeoutException in project hbase by apache.
the class TestProcedureMember method testCoordinatorAbort.
/**
* Fail correctly if coordinator aborts the procedure. The subprocedure will not interrupt a
* running {@link Subprocedure#prepare} -- prepare needs to finish first, and the the abort
* is checked. Thus, the {@link Subprocedure#prepare} should succeed but later get rolled back
* via {@link Subprocedure#cleanup}.
*/
@Test(timeout = 60000)
public void testCoordinatorAbort() throws Exception {
buildCohortMemberPair();
// mock that another node timed out or failed to prepare
final TimeoutException oate = new TimeoutException("bogus timeout", 1, 2, 0);
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
// inject a remote error (this would have come from an external thread)
spySub.cancel("bogus message", oate);
// sleep the wake frequency since that is what we promised
Thread.sleep(WAKE_FREQUENCY);
return null;
}
}).when(spySub).waitForReachedGlobalBarrier();
// run the operation
// build a new operation
Subprocedure subproc = member.createSubprocedure(op, data);
member.submitSubprocedure(subproc);
// if the operation doesn't die properly, then this will timeout
member.closeAndWait(TIMEOUT);
// make sure everything ran in order
InOrder order = inOrder(mockMemberComms, spySub);
order.verify(spySub).acquireBarrier();
order.verify(mockMemberComms).sendMemberAcquired(eq(spySub));
// Later phases not run
order.verify(spySub, never()).insideBarrier();
order.verify(mockMemberComms, never()).sendMemberCompleted(eq(spySub), eq(data));
// error recovery path exercised
order.verify(spySub).cancel(anyString(), any(Exception.class));
order.verify(spySub).cleanup(any(Exception.class));
}
use of org.apache.hadoop.hbase.errorhandling.TimeoutException in project hbase by apache.
the class TestProcedureMember method testMemberCommitCommsFailure.
/**
* Handle Failures if a member's commit phase succeeds but notification to coordinator fails
*
* NOTE: This is the core difference that makes this different from traditional 2PC. In true
* 2PC the transaction is committed just before the coordinator sends commit messages to the
* member. Members are then responsible for reading its TX log. This implementation actually
* rolls back, and thus breaks the normal TX guarantees.
*/
@Test(timeout = 60000)
public void testMemberCommitCommsFailure() throws Exception {
buildCohortMemberPair();
final TimeoutException oate = new TimeoutException("bogus timeout", 1, 2, 0);
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
// inject a remote error (this would have come from an external thread)
spySub.cancel("commit comms fail", oate);
// sleep the wake frequency since that is what we promised
Thread.sleep(WAKE_FREQUENCY);
return null;
}
}).when(mockMemberComms).sendMemberCompleted(any(Subprocedure.class), eq(data));
// run the operation
// build a new operation
Subprocedure subproc = member.createSubprocedure(op, data);
member.submitSubprocedure(subproc);
// if the operation doesn't die properly, then this will timeout
member.closeAndWait(TIMEOUT);
// make sure everything ran in order
InOrder order = inOrder(mockMemberComms, spySub);
order.verify(spySub).acquireBarrier();
order.verify(mockMemberComms).sendMemberAcquired(eq(spySub));
order.verify(spySub).insideBarrier();
order.verify(mockMemberComms).sendMemberCompleted(eq(spySub), eq(data));
// error recovery path exercised
order.verify(spySub).cancel(anyString(), any(Exception.class));
order.verify(spySub).cleanup(any(Exception.class));
}
Aggregations