use of org.apache.hadoop.hbase.SplitLogTask in project hbase by apache.
the class ZkSplitLogWorkerCoordination method attemptToOwnTask.
/**
* Try to own the task by transitioning the zk node data from UNASSIGNED to OWNED.
* <p>
* This method is also used to periodically heartbeat the task progress by transitioning the node
* from OWNED to OWNED.
* <p>
* @param isFirstTime shows whther it's the first attempt.
* @param zkw zk wathcer
* @param server name
* @param task to own
* @param taskZKVersion version of the task in zk
* @return non-negative integer value when task can be owned by current region server otherwise -1
*/
protected static int attemptToOwnTask(boolean isFirstTime, ZKWatcher zkw, ServerName server, String task, int taskZKVersion) {
int latestZKVersion = FAILED_TO_OWN_TASK;
try {
SplitLogTask slt = new SplitLogTask.Owned(server);
Stat stat = zkw.getRecoverableZooKeeper().setData(task, slt.toByteArray(), taskZKVersion);
if (stat == null) {
LOG.warn("zk.setData() returned null for path " + task);
SplitLogCounters.tot_wkr_task_heartbeat_failed.increment();
return FAILED_TO_OWN_TASK;
}
latestZKVersion = stat.getVersion();
SplitLogCounters.tot_wkr_task_heartbeat.increment();
return latestZKVersion;
} catch (KeeperException e) {
if (!isFirstTime) {
if (e.code().equals(KeeperException.Code.NONODE)) {
LOG.warn("NONODE failed to assert ownership for " + task, e);
} else if (e.code().equals(KeeperException.Code.BADVERSION)) {
LOG.warn("BADVERSION failed to assert ownership for " + task, e);
} else {
LOG.warn("failed to assert ownership for " + task, e);
}
}
} catch (InterruptedException e1) {
LOG.warn("Interrupted while trying to assert ownership of " + task + " " + StringUtils.stringifyException(e1));
Thread.currentThread().interrupt();
}
SplitLogCounters.tot_wkr_task_heartbeat_failed.increment();
return FAILED_TO_OWN_TASK;
}
use of org.apache.hadoop.hbase.SplitLogTask in project hbase by apache.
the class TestSplitLogManager method testTaskResigned.
@Test
public void testTaskResigned() throws Exception {
LOG.info("TestTaskResigned - resubmit task node once in RESIGNED state");
assertEquals(0, tot_mgr_resubmit.sum());
slm = new SplitLogManager(master, conf);
assertEquals(0, tot_mgr_resubmit.sum());
TaskBatch batch = new TaskBatch();
String tasknode = submitTaskAndWait(batch, "foo/1");
assertEquals(0, tot_mgr_resubmit.sum());
final ServerName worker1 = ServerName.valueOf("worker1,1,1");
assertEquals(0, tot_mgr_resubmit.sum());
SplitLogTask slt = new SplitLogTask.Resigned(worker1);
assertEquals(0, tot_mgr_resubmit.sum());
ZKUtil.setData(zkw, tasknode, slt.toByteArray());
ZKUtil.checkExists(zkw, tasknode);
// Could be small race here.
if (tot_mgr_resubmit.sum() == 0) {
waitForCounter(tot_mgr_resubmit, 0, 1, to / 2);
}
assertEquals(1, tot_mgr_resubmit.sum());
byte[] taskstate = ZKUtil.getData(zkw, tasknode);
slt = SplitLogTask.parseFrom(taskstate);
assertTrue(slt.isUnassigned(master.getServerName()));
}
use of org.apache.hadoop.hbase.SplitLogTask in project hbase by apache.
the class TestSplitLogManager method testWorkerCrash.
@Test
public void testWorkerCrash() throws Exception {
slm = new SplitLogManager(master, conf);
TaskBatch batch = new TaskBatch();
String tasknode = submitTaskAndWait(batch, "foo/1");
final ServerName worker1 = ServerName.valueOf("worker1,1,1");
SplitLogTask slt = new SplitLogTask.Owned(worker1);
ZKUtil.setData(zkw, tasknode, slt.toByteArray());
if (tot_mgr_heartbeat.sum() == 0) {
waitForCounter(tot_mgr_heartbeat, 0, 1, to / 2);
}
// Not yet resubmitted.
Assert.assertEquals(0, tot_mgr_resubmit.sum());
// This server becomes dead
Mockito.when(sm.isServerOnline(worker1)).thenReturn(false);
// The timeout checker is done every 1000 ms (hardcoded).
Thread.sleep(1300);
// It has been resubmitted
Assert.assertEquals(1, tot_mgr_resubmit.sum());
}
use of org.apache.hadoop.hbase.SplitLogTask in project hbase by apache.
the class TestSplitLogManager method testTaskDone.
@Test
public void testTaskDone() throws Exception {
LOG.info("TestTaskDone - cleanup task node once in DONE state");
slm = new SplitLogManager(master, conf);
TaskBatch batch = new TaskBatch();
String tasknode = submitTaskAndWait(batch, "foo/1");
final ServerName worker1 = ServerName.valueOf("worker1,1,1");
SplitLogTask slt = new SplitLogTask.Done(worker1);
ZKUtil.setData(zkw, tasknode, slt.toByteArray());
synchronized (batch) {
while (batch.installed != batch.done) {
batch.wait();
}
}
waitForCounter(tot_mgr_task_deleted, 0, 1, to / 2);
assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1);
}
use of org.apache.hadoop.hbase.SplitLogTask in project hbase by apache.
the class TestSplitLogManager method testMultipleResubmits.
@Test
public void testMultipleResubmits() throws Exception {
LOG.info("TestMultipleResbmits - no indefinite resubmissions");
conf.setInt("hbase.splitlog.max.resubmit", 2);
slm = new SplitLogManager(master, conf);
TaskBatch batch = new TaskBatch();
String tasknode = submitTaskAndWait(batch, "foo/1");
int version = ZKUtil.checkExists(zkw, tasknode);
final ServerName worker1 = ServerName.valueOf("worker1,1,1");
final ServerName worker2 = ServerName.valueOf("worker2,1,1");
final ServerName worker3 = ServerName.valueOf("worker3,1,1");
SplitLogTask slt = new SplitLogTask.Owned(worker1);
ZKUtil.setData(zkw, tasknode, slt.toByteArray());
waitForCounter(tot_mgr_heartbeat, 0, 1, to / 2);
waitForCounter(tot_mgr_resubmit, 0, 1, to + to / 2);
int version1 = ZKUtil.checkExists(zkw, tasknode);
assertTrue(version1 > version);
slt = new SplitLogTask.Owned(worker2);
ZKUtil.setData(zkw, tasknode, slt.toByteArray());
waitForCounter(tot_mgr_heartbeat, 1, 2, to / 2);
waitForCounter(tot_mgr_resubmit, 1, 2, to + to / 2);
int version2 = ZKUtil.checkExists(zkw, tasknode);
assertTrue(version2 > version1);
slt = new SplitLogTask.Owned(worker3);
ZKUtil.setData(zkw, tasknode, slt.toByteArray());
waitForCounter(tot_mgr_heartbeat, 2, 3, to / 2);
waitForCounter(tot_mgr_resubmit_threshold_reached, 0, 1, to + to / 2);
Thread.sleep(to + to / 2);
assertEquals(2L, tot_mgr_resubmit.sum() - tot_mgr_resubmit_force.sum());
}
Aggregations