Search in sources :

Example 11 with ConnectionLossException

use of org.apache.zookeeper.KeeperException.ConnectionLossException in project helios by spotify.

the class TaskHistoryWriter method run.

@Override
public void run() {
    while (true) {
        final TaskStatusEvent item = getNext();
        if (item == null) {
            return;
        }
        final JobId jobId = item.getStatus().getJob().getId();
        final String historyPath = Paths.historyJobHostEventsTimestamp(jobId, hostname, item.getTimestamp());
        try {
            log.debug("writing queued item to zookeeper {} {}", item.getStatus().getJob().getId(), item.getTimestamp());
            client.ensurePath(historyPath, true);
            client.createAndSetData(historyPath, item.getStatus().toJsonBytes());
            // See if too many
            final List<String> events = client.getChildren(Paths.historyJobHostEvents(jobId, hostname));
            if (events.size() > MAX_NUMBER_STATUS_EVENTS_TO_RETAIN) {
                trimStatusEvents(events, jobId);
            }
        } catch (NodeExistsException e) {
            // Ahh, the two generals problem...  We handle by doing nothing since the thing
            // we wanted in, is in.
            log.debug("item we wanted in is already there");
        } catch (ConnectionLossException e) {
            log.warn("Connection lost while putting item into zookeeper, will retry");
            putBack(item);
            break;
        } catch (KeeperException e) {
            log.error("Error putting item into zookeeper, will retry", e);
            putBack(item);
            break;
        }
    }
}
Also used : TaskStatusEvent(com.spotify.helios.common.descriptors.TaskStatusEvent) NodeExistsException(org.apache.zookeeper.KeeperException.NodeExistsException) ConnectionLossException(org.apache.zookeeper.KeeperException.ConnectionLossException) JobId(com.spotify.helios.common.descriptors.JobId) KeeperException(org.apache.zookeeper.KeeperException)

Example 12 with ConnectionLossException

use of org.apache.zookeeper.KeeperException.ConnectionLossException in project zookeeper by apache.

the class SimpleSysTest method testSimpleCase.

/**
 * This test checks the following:
 * 1) All clients connect successfully
 * 2) Half of the servers die (assuming odd number) and a write succeeds
 * 3) All servers are restarted and cluster stays alive
 * 4) Clients see a change by the server
 * 5) Clients' ephemeral nodes are cleaned up
 *
 * @throws Exception
 */
@Test
public void testSimpleCase() throws Exception {
    configureServers(serverCount);
    configureClients(clientCount, SimpleClient.class, getHostPort());
    Stat stat = new Stat();
    startServers();
    LOG.debug("Connecting to " + getHostPort());
    ZooKeeper zk = new ZooKeeper(getHostPort(), 15000, this);
    waitForConnect(zk, 10000);
    zk.create("/simpleCase", "orig".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
    startClients();
    // Check that all clients connect properly
    for (int i = 0; i < getClientCount(); i++) {
        for (int j = 0; j < maxTries; j++) {
            try {
                byte[] b = zk.getData("/simpleCase/" + i, false, stat);
                Assert.assertEquals("orig", new String(b));
            } catch (NoNodeException e) {
                if (j + 1 == maxTries) {
                    Assert.fail("Max tries exceeded on client " + i);
                }
                Thread.sleep(1000);
            }
        }
    }
    // servers, and then bounce the other servers one by one
    for (int i = 0; i < getServerCount(); i++) {
        stopServer(i);
        if (i + 1 > getServerCount() / 2) {
            startServer(i);
        } else if (i + 1 == getServerCount() / 2) {
            Assert.assertTrue("Connection didn't recover", waitForConnect(zk, 10000));
            try {
                zk.setData("/simpleCase", "new".getBytes(), -1);
            } catch (ConnectionLossException e) {
                Assert.assertTrue("Connection didn't recover", waitForConnect(zk, 10000));
                zk.setData("/simpleCase", "new".getBytes(), -1);
            }
            for (int j = 0; j < i; j++) {
                LOG.info("Starting server " + j);
                startServer(i);
            }
        }
    }
    // wait for things to stabilize
    Thread.sleep(100);
    Assert.assertTrue("Servers didn't bounce", waitForConnect(zk, 15000));
    try {
        zk.getData("/simpleCase", false, stat);
    } catch (ConnectionLossException e) {
        Assert.assertTrue("Servers didn't bounce", waitForConnect(zk, 15000));
    }
    // check that the change has propagated to everyone
    for (int i = 0; i < getClientCount(); i++) {
        for (int j = 0; j < maxTries; j++) {
            byte[] data = zk.getData("/simpleCase/" + i, false, stat);
            if (new String(data).equals("new")) {
                break;
            }
            if (j + 1 == maxTries) {
                Assert.fail("max tries exceeded for " + i);
            }
            Thread.sleep(1000);
        }
    }
    // send out the kill signal
    zk.setData("/simpleCase", "die".getBytes(), -1);
    // watch for everyone to die
    for (int i = 0; i < getClientCount(); i++) {
        try {
            for (int j = 0; j < maxTries; j++) {
                zk.getData("/simpleCase/" + i, false, stat);
                if (j + 1 == maxTries) {
                    Assert.fail("max tries exceeded waiting for child " + i + " to die");
                }
                Thread.sleep(200);
            }
        } catch (NoNodeException e) {
        // Great this is what we were hoping for!
        }
    }
    stopClients();
    stopServers();
}
Also used : Stat(org.apache.zookeeper.data.Stat) ZooKeeper(org.apache.zookeeper.ZooKeeper) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ConnectionLossException(org.apache.zookeeper.KeeperException.ConnectionLossException) Test(org.junit.Test)

Example 13 with ConnectionLossException

use of org.apache.zookeeper.KeeperException.ConnectionLossException in project zookeeper by apache.

the class InstanceManager method getStatus.

public String getStatus(String name, long timeout) throws KeeperException, InterruptedException {
    Stat stat = new Stat();
    byte[] data = null;
    long endTime = Time.currentElapsedTime() + timeout;
    KeeperException lastException = null;
    for (int i = 0; i < maxTries && endTime > Time.currentElapsedTime(); i++) {
        try {
            data = zk.getData(reportsNode + '/' + name, false, stat);
            if (LOG.isDebugEnabled()) {
                LOG.debug("Got Data: " + ((data == null) ? "null" : new String(data)));
            }
            lastException = null;
            break;
        } catch (ConnectionLossException e) {
            lastException = e;
        } catch (NoNodeException e) {
            final Object eventObj = new Object();
            synchronized (eventObj) {
                // wait for the node to appear
                Stat eStat = zk.exists(reportsNode + '/' + name, new Watcher() {

                    public void process(WatchedEvent event) {
                        synchronized (eventObj) {
                            eventObj.notifyAll();
                        }
                    }
                });
                if (eStat == null) {
                    eventObj.wait(endTime - Time.currentElapsedTime());
                }
            }
            lastException = e;
        }
    }
    if (lastException != null) {
        throw lastException;
    }
    return new String(data);
}
Also used : WatchedEvent(org.apache.zookeeper.WatchedEvent) Stat(org.apache.zookeeper.data.Stat) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) Watcher(org.apache.zookeeper.Watcher) ConnectionLossException(org.apache.zookeeper.KeeperException.ConnectionLossException) KeeperException(org.apache.zookeeper.KeeperException)

Example 14 with ConnectionLossException

use of org.apache.zookeeper.KeeperException.ConnectionLossException in project zookeeper by apache.

the class ObserverMasterTest method testObserver.

/**
 * This test ensures two things:
 * 1. That Observers can successfully proxy requests to the ensemble.
 * 2. That Observers don't participate in leader elections.
 * The second is tested by constructing an ensemble where a leader would
 * be elected if and only if an Observer voted.
 */
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testObserver(boolean testObserverMaster) throws Exception {
    // We expect two notifications before we want to continue
    latch = new CountDownLatch(2);
    setUp(-1, testObserverMaster);
    q3.start();
    assertTrue(ClientBase.waitForServerUp("127.0.0.1:" + CLIENT_PORT_OBS, CONNECTION_TIMEOUT), "waiting for server 3 being up");
    validateObserverSyncTimeMetrics();
    if (testObserverMaster) {
        int masterPort = q3.getQuorumPeer().observer.getSocket().getPort();
        LOG.info("port {} {}", masterPort, OM_PORT);
        assertEquals(masterPort, OM_PORT, "observer failed to connect to observer master");
    }
    zk = new ZooKeeper("127.0.0.1:" + CLIENT_PORT_OBS, ClientBase.CONNECTION_TIMEOUT, this);
    zk.create("/obstest", "test".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
    // Assert that commands are getting forwarded correctly
    assertEquals(new String(zk.getData("/obstest", null, null)), "test");
    // Now check that other commands don't blow everything up
    zk.sync("/", null, null);
    zk.setData("/obstest", "test2".getBytes(), -1);
    zk.getChildren("/", false);
    assertEquals(zk.getState(), States.CONNECTED);
    LOG.info("Shutting down server 2");
    // Now kill one of the other real servers
    q2.shutdown();
    assertTrue(ClientBase.waitForServerDown("127.0.0.1:" + CLIENT_PORT_QP2, ClientBase.CONNECTION_TIMEOUT), "Waiting for server 2 to shut down");
    LOG.info("Server 2 down");
    // Now the resulting ensemble shouldn't be quorate
    latch.await();
    assertNotSame(KeeperState.SyncConnected, lastEvent.getState(), "Client is still connected to non-quorate cluster");
    LOG.info("Latch returned");
    try {
        assertNotEquals("Shouldn't get a response when cluster not quorate!", "test", new String(zk.getData("/obstest", null, null)));
    } catch (ConnectionLossException c) {
        LOG.info("Connection loss exception caught - ensemble not quorate (this is expected)");
    }
    latch = new CountDownLatch(1);
    LOG.info("Restarting server 2");
    // Bring it back
    // q2 = new MainThread(2, CLIENT_PORT_QP2, quorumCfgSection, extraCfgs);
    q2.start();
    LOG.info("Waiting for server 2 to come up");
    assertTrue(ClientBase.waitForServerUp("127.0.0.1:" + CLIENT_PORT_QP2, CONNECTION_TIMEOUT), "waiting for server 2 being up");
    LOG.info("Server 2 started, waiting for latch");
    latch.await();
    // It's possible our session expired - but this is ok, shows we
    // were able to talk to the ensemble
    assertTrue((KeeperState.SyncConnected == lastEvent.getState() || KeeperState.Expired == lastEvent.getState()), "Client didn't reconnect to quorate ensemble (state was" + lastEvent.getState() + ")");
    LOG.info("perform a revalidation test");
    int leaderProxyPort = PortAssignment.unique();
    int obsProxyPort = PortAssignment.unique();
    int leaderPort = q1.getQuorumPeer().leader == null ? CLIENT_PORT_QP2 : CLIENT_PORT_QP1;
    PortForwarder leaderPF = new PortForwarder(leaderProxyPort, leaderPort);
    latch = new CountDownLatch(1);
    ZooKeeper client = new ZooKeeper(String.format("127.0.0.1:%d,127.0.0.1:%d", leaderProxyPort, obsProxyPort), ClientBase.CONNECTION_TIMEOUT, this);
    latch.await();
    client.create("/revalidtest", "test".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
    assertNotNull(client.exists("/revalidtest", null), "Read-after write failed");
    latch = new CountDownLatch(2);
    PortForwarder obsPF = new PortForwarder(obsProxyPort, CLIENT_PORT_OBS);
    try {
        leaderPF.shutdown();
    } catch (Exception e) {
    // ignore?
    }
    latch.await();
    assertEquals(new String(client.getData("/revalidtest", null, null)), "test");
    client.close();
    obsPF.shutdown();
    shutdown();
}
Also used : PortForwarder(org.apache.zookeeper.server.util.PortForwarder) ZooKeeper(org.apache.zookeeper.ZooKeeper) CountDownLatch(java.util.concurrent.CountDownLatch) ConnectionLossException(org.apache.zookeeper.KeeperException.ConnectionLossException) AttributeNotFoundException(javax.management.AttributeNotFoundException) InstanceNotFoundException(javax.management.InstanceNotFoundException) ReflectionException(javax.management.ReflectionException) ConnectionLossException(org.apache.zookeeper.KeeperException.ConnectionLossException) KeeperException(org.apache.zookeeper.KeeperException) RuntimeMBeanException(javax.management.RuntimeMBeanException) IOException(java.io.IOException) MalformedObjectNameException(javax.management.MalformedObjectNameException) InvalidAttributeValueException(javax.management.InvalidAttributeValueException) MBeanException(javax.management.MBeanException) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

ConnectionLossException (org.apache.zookeeper.KeeperException.ConnectionLossException)14 KeeperException (org.apache.zookeeper.KeeperException)8 Test (org.junit.Test)6 ZooKeeper (org.apache.zookeeper.ZooKeeper)5 CountDownLatch (java.util.concurrent.CountDownLatch)4 NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)4 Stat (org.apache.zookeeper.data.Stat)4 IOException (java.io.IOException)3 TimeoutException (java.util.concurrent.TimeoutException)2 ZooKeeperException (org.apache.solr.common.cloud.ZooKeeperException)2 NodeExistsException (org.apache.zookeeper.KeeperException.NodeExistsException)2 InvocationOnMock (org.mockito.invocation.InvocationOnMock)2 JobId (com.spotify.helios.common.descriptors.JobId)1 TaskStatusEvent (com.spotify.helios.common.descriptors.TaskStatusEvent)1 DefaultZooKeeperClient (com.spotify.helios.servicescommon.coordination.DefaultZooKeeperClient)1 ZooKeeperClient (com.spotify.helios.servicescommon.coordination.ZooKeeperClient)1 ZooKeeperConnectionException (com.twitter.common.zookeeper.ZooKeeperClient.ZooKeeperConnectionException)1 BaseZooKeeperTest (com.twitter.common.zookeeper.testing.BaseZooKeeperTest)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 Method (java.lang.reflect.Method)1