Search in sources :

Example 6 with SplitLogTask

use of org.apache.hadoop.hbase.SplitLogTask in project hbase by apache.

the class ZkSplitLogWorkerCoordination method attemptToOwnTask.

/**
 * Try to own the task by transitioning the zk node data from UNASSIGNED to OWNED.
 * <p>
 * This method is also used to periodically heartbeat the task progress by transitioning the node
 * from OWNED to OWNED.
 * <p>
 * @param isFirstTime shows whther it's the first attempt.
 * @param zkw zk wathcer
 * @param server name
 * @param task to own
 * @param taskZKVersion version of the task in zk
 * @return non-negative integer value when task can be owned by current region server otherwise -1
 */
protected static int attemptToOwnTask(boolean isFirstTime, ZKWatcher zkw, ServerName server, String task, int taskZKVersion) {
    int latestZKVersion = FAILED_TO_OWN_TASK;
    try {
        SplitLogTask slt = new SplitLogTask.Owned(server);
        Stat stat = zkw.getRecoverableZooKeeper().setData(task, slt.toByteArray(), taskZKVersion);
        if (stat == null) {
            LOG.warn("zk.setData() returned null for path " + task);
            SplitLogCounters.tot_wkr_task_heartbeat_failed.increment();
            return FAILED_TO_OWN_TASK;
        }
        latestZKVersion = stat.getVersion();
        SplitLogCounters.tot_wkr_task_heartbeat.increment();
        return latestZKVersion;
    } catch (KeeperException e) {
        if (!isFirstTime) {
            if (e.code().equals(KeeperException.Code.NONODE)) {
                LOG.warn("NONODE failed to assert ownership for " + task, e);
            } else if (e.code().equals(KeeperException.Code.BADVERSION)) {
                LOG.warn("BADVERSION failed to assert ownership for " + task, e);
            } else {
                LOG.warn("failed to assert ownership for " + task, e);
            }
        }
    } catch (InterruptedException e1) {
        LOG.warn("Interrupted while trying to assert ownership of " + task + " " + StringUtils.stringifyException(e1));
        Thread.currentThread().interrupt();
    }
    SplitLogCounters.tot_wkr_task_heartbeat_failed.increment();
    return FAILED_TO_OWN_TASK;
}
Also used : Stat(org.apache.zookeeper.data.Stat) SplitLogTask(org.apache.hadoop.hbase.SplitLogTask) KeeperException(org.apache.zookeeper.KeeperException)

Example 7 with SplitLogTask

use of org.apache.hadoop.hbase.SplitLogTask in project hbase by apache.

the class TestSplitLogManager method testTaskResigned.

@Test
public void testTaskResigned() throws Exception {
    LOG.info("TestTaskResigned - resubmit task node once in RESIGNED state");
    assertEquals(0, tot_mgr_resubmit.sum());
    slm = new SplitLogManager(master, conf);
    assertEquals(0, tot_mgr_resubmit.sum());
    TaskBatch batch = new TaskBatch();
    String tasknode = submitTaskAndWait(batch, "foo/1");
    assertEquals(0, tot_mgr_resubmit.sum());
    final ServerName worker1 = ServerName.valueOf("worker1,1,1");
    assertEquals(0, tot_mgr_resubmit.sum());
    SplitLogTask slt = new SplitLogTask.Resigned(worker1);
    assertEquals(0, tot_mgr_resubmit.sum());
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    ZKUtil.checkExists(zkw, tasknode);
    // Could be small race here.
    if (tot_mgr_resubmit.sum() == 0) {
        waitForCounter(tot_mgr_resubmit, 0, 1, to / 2);
    }
    assertEquals(1, tot_mgr_resubmit.sum());
    byte[] taskstate = ZKUtil.getData(zkw, tasknode);
    slt = SplitLogTask.parseFrom(taskstate);
    assertTrue(slt.isUnassigned(master.getServerName()));
}
Also used : ServerName(org.apache.hadoop.hbase.ServerName) TaskBatch(org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch) SplitLogTask(org.apache.hadoop.hbase.SplitLogTask) Test(org.junit.Test)

Example 8 with SplitLogTask

use of org.apache.hadoop.hbase.SplitLogTask in project hbase by apache.

the class TestSplitLogManager method testWorkerCrash.

@Test
public void testWorkerCrash() throws Exception {
    slm = new SplitLogManager(master, conf);
    TaskBatch batch = new TaskBatch();
    String tasknode = submitTaskAndWait(batch, "foo/1");
    final ServerName worker1 = ServerName.valueOf("worker1,1,1");
    SplitLogTask slt = new SplitLogTask.Owned(worker1);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    if (tot_mgr_heartbeat.sum() == 0) {
        waitForCounter(tot_mgr_heartbeat, 0, 1, to / 2);
    }
    // Not yet resubmitted.
    Assert.assertEquals(0, tot_mgr_resubmit.sum());
    // This server becomes dead
    Mockito.when(sm.isServerOnline(worker1)).thenReturn(false);
    // The timeout checker is done every 1000 ms (hardcoded).
    Thread.sleep(1300);
    // It has been resubmitted
    Assert.assertEquals(1, tot_mgr_resubmit.sum());
}
Also used : ServerName(org.apache.hadoop.hbase.ServerName) TaskBatch(org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch) SplitLogTask(org.apache.hadoop.hbase.SplitLogTask) Test(org.junit.Test)

Example 9 with SplitLogTask

use of org.apache.hadoop.hbase.SplitLogTask in project hbase by apache.

the class TestSplitLogManager method testTaskDone.

@Test
public void testTaskDone() throws Exception {
    LOG.info("TestTaskDone - cleanup task node once in DONE state");
    slm = new SplitLogManager(master, conf);
    TaskBatch batch = new TaskBatch();
    String tasknode = submitTaskAndWait(batch, "foo/1");
    final ServerName worker1 = ServerName.valueOf("worker1,1,1");
    SplitLogTask slt = new SplitLogTask.Done(worker1);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    synchronized (batch) {
        while (batch.installed != batch.done) {
            batch.wait();
        }
    }
    waitForCounter(tot_mgr_task_deleted, 0, 1, to / 2);
    assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1);
}
Also used : ServerName(org.apache.hadoop.hbase.ServerName) TaskBatch(org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch) SplitLogTask(org.apache.hadoop.hbase.SplitLogTask) Test(org.junit.Test)

Example 10 with SplitLogTask

use of org.apache.hadoop.hbase.SplitLogTask in project hbase by apache.

the class TestSplitLogManager method testMultipleResubmits.

@Test
public void testMultipleResubmits() throws Exception {
    LOG.info("TestMultipleResbmits - no indefinite resubmissions");
    conf.setInt("hbase.splitlog.max.resubmit", 2);
    slm = new SplitLogManager(master, conf);
    TaskBatch batch = new TaskBatch();
    String tasknode = submitTaskAndWait(batch, "foo/1");
    int version = ZKUtil.checkExists(zkw, tasknode);
    final ServerName worker1 = ServerName.valueOf("worker1,1,1");
    final ServerName worker2 = ServerName.valueOf("worker2,1,1");
    final ServerName worker3 = ServerName.valueOf("worker3,1,1");
    SplitLogTask slt = new SplitLogTask.Owned(worker1);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    waitForCounter(tot_mgr_heartbeat, 0, 1, to / 2);
    waitForCounter(tot_mgr_resubmit, 0, 1, to + to / 2);
    int version1 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version1 > version);
    slt = new SplitLogTask.Owned(worker2);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    waitForCounter(tot_mgr_heartbeat, 1, 2, to / 2);
    waitForCounter(tot_mgr_resubmit, 1, 2, to + to / 2);
    int version2 = ZKUtil.checkExists(zkw, tasknode);
    assertTrue(version2 > version1);
    slt = new SplitLogTask.Owned(worker3);
    ZKUtil.setData(zkw, tasknode, slt.toByteArray());
    waitForCounter(tot_mgr_heartbeat, 2, 3, to / 2);
    waitForCounter(tot_mgr_resubmit_threshold_reached, 0, 1, to + to / 2);
    Thread.sleep(to + to / 2);
    assertEquals(2L, tot_mgr_resubmit.sum() - tot_mgr_resubmit_force.sum());
}
Also used : ServerName(org.apache.hadoop.hbase.ServerName) TaskBatch(org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch) SplitLogTask(org.apache.hadoop.hbase.SplitLogTask) Test(org.junit.Test)

Aggregations

SplitLogTask (org.apache.hadoop.hbase.SplitLogTask)28 Test (org.junit.Test)19 ServerName (org.apache.hadoop.hbase.ServerName)17 TaskBatch (org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch)9 KeeperException (org.apache.zookeeper.KeeperException)6 DeserializationException (org.apache.hadoop.hbase.exceptions.DeserializationException)4 Stat (org.apache.zookeeper.data.Stat)3 IOException (java.io.IOException)2 InterruptedIOException (java.io.InterruptedIOException)2 Configuration (org.apache.hadoop.conf.Configuration)2 Path (org.apache.hadoop.fs.Path)2 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)2 Task (org.apache.hadoop.hbase.master.SplitLogManager.Task)2 MutableInt (org.apache.commons.lang3.mutable.MutableInt)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 RecoveryMode (org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode)1