Search in sources :

Example 1 with ClusterInfoException

use of com.nokia.dempsy.cluster.ClusterInfoException in project Dempsy by Dempsy.

the class TestUtils method stealShard.

/**
    * This method will grab the slot requested. It requires that it is already held by 
    * the session provided and that the entry there contains a valid DefaultRouterSlotInfo
    * which it will extract, modify and use to replace.
    * 
    * This will be accomplished by disrupting the session and trying to grab the slot
    * at the same time. It will try this over and over until it gets it, or until the
    * number of tries is exceeded.
    * 
    * @param originalSession is the session that will be disrupted in order to grab the shard.
    * @param factory is the {@link ClusterInfoSessionFactory} that will be used to create a new 
    * session that can be used to grab the slot.
    * @param shardPath is the path all the way to the directory containing the shard that you
    * want stolen.
    * 
    * @throws Assert when one of the test condition fails or grabbing the slot fails.
    */
public static ClusterInfoSession stealShard(final ClusterInfoSession originalSession, final ClusterInfoSessionFactory factory, final String shardPath, final long timeoutmillis) throws InterruptedException, ClusterInfoException {
    // get the current slot data to use as a template
    final DefaultRouterSlotInfo newSlot = (DefaultRouterSlotInfo) originalSession.getData(shardPath, null);
    final AtomicBoolean stillRunning = new AtomicBoolean(true);
    final AtomicBoolean failed = new AtomicBoolean(false);
    final ClusterInfoSession session = factory.createSession();
    Runnable slotGrabber = new Runnable() {

        @Override
        public void run() {
            try {
                Thread.currentThread().setPriority(Thread.MAX_PRIORITY);
                boolean haveSlot = false;
                while (!haveSlot && stillRunning.get()) {
                    newSlot.setDestination(new JunkDestination());
                    if (session.mkdir(shardPath, newSlot, DirMode.EPHEMERAL) != null)
                        haveSlot = true;
                    Thread.yield();
                }
            } catch (ClusterInfoException e) {
                failed.set(true);
            } catch (RuntimeException re) {
                re.printStackTrace();
                failed.set(true);
            } finally {
                stillRunning.set(false);
            }
        }
    };
    try {
        new Thread(slotGrabber).start();
        boolean onStandby = false;
        long startTime = System.currentTimeMillis();
        while (!onStandby && timeoutmillis >= (System.currentTimeMillis() - startTime)) {
            ((DisruptibleSession) originalSession).disrupt();
            Thread.sleep(100);
            if (!stillRunning.get())
                onStandby = true;
        }
        assertTrue(onStandby);
        assertFalse(failed.get());
    } catch (InterruptedException ie) {
        session.stop();
        throw ie;
    } catch (Error cie) {
        session.stop();
        throw cie;
    } finally {
        stillRunning.set(false);
    }
    return session;
}
Also used : ClusterInfoException(com.nokia.dempsy.cluster.ClusterInfoException) DisruptibleSession(com.nokia.dempsy.cluster.DisruptibleSession) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ClusterInfoSession(com.nokia.dempsy.cluster.ClusterInfoSession) DefaultRouterSlotInfo(com.nokia.dempsy.router.DecentralizedRoutingStrategy.DefaultRouterSlotInfo)

Example 2 with ClusterInfoException

use of com.nokia.dempsy.cluster.ClusterInfoException in project Dempsy by Dempsy.

the class TestZookeeperClusterResilience method testNoServerOnStartup.

@Test
public void testNoServerOnStartup() throws Throwable {
    // create a session factory
    ZookeeperSessionFactory factory = new ZookeeperSessionFactory("127.0.0.1:" + port, 5000);
    // create a session from the session factory
    ZookeeperSession session = (ZookeeperSession) factory.createSession();
    ClusterId clusterId = new ClusterId(appname, "testNoServerOnStartup");
    // hook a test watch to make sure that callbacks work correctly
    TestWatcher callback = new TestWatcher(session) {

        @Override
        public void process() {
            called.set(true);
        }
    };
    // now accessing the cluster should get us an error.
    boolean gotCorrectError = false;
    try {
        session.getSubdirs(clusterId.asPath(), callback);
    } catch (ClusterInfoException e) {
        gotCorrectError = true;
    }
    assertTrue(gotCorrectError);
    // now lets startup the server.
    ZookeeperTestServer server = null;
    try {
        server = new ZookeeperTestServer();
        server.start();
        // create a cluster from the session
        TestUtils.createClusterLevel(clusterId, session);
        // wait until this works.
        assertTrue(TestUtils.poll(baseTimeoutMillis, callback, new Condition<TestWatcher>() {

            @Override
            public boolean conditionMet(TestWatcher o) {
                return o.called.get();
            }
        }));
        // reset the callbacker ...
        callback.called.set(false);
        // now see if the cluster works.
        assertTrue(TestUtils.poll(baseTimeoutMillis, callback, new Condition<TestWatcher>() {

            @Override
            public boolean conditionMet(TestWatcher o) {
                return !o.called.get();
            }
        }));
        session.getSubdirs(clusterId.asPath(), callback);
        ZooKeeper origZk = session.zkref.get();
        ZookeeperTestServer.forceSessionExpiration(origZk);
        // wait for the callback
        assertTrue(TestUtils.poll(baseTimeoutMillis, callback, new Condition<TestWatcher>() {

            @Override
            public boolean conditionMet(TestWatcher o) {
                return o.called.get();
            }
        }));
        // unfortunately I cannot check the getActiveSlots for failure because there's a race condition I can't fix.
        //  No matter how fast I check it's possible that it's okay again OR that allSlots hasn't been cleared.
        // 
        // however, they should eventually recover.
        gotCorrectError = true;
        for (long endTime = System.currentTimeMillis() + baseTimeoutMillis; endTime > System.currentTimeMillis() && gotCorrectError; ) {
            Thread.sleep(1);
            try {
                session.getSubdirs(clusterId.asPath(), callback);
                gotCorrectError = false;
            } catch (ClusterInfoException e) {
            }
        }
        session.getSubdirs(clusterId.asPath(), callback);
        // And join should work
        gotCorrectError = true;
        for (long endTime = System.currentTimeMillis() + baseTimeoutMillis; endTime > System.currentTimeMillis() && gotCorrectError; ) {
            Thread.sleep(1);
            try {
                session.mkdir(clusterId.asPath() + "/join-1", null, DirMode.EPHEMERAL);
                gotCorrectError = false;
            } catch (ClusterInfoException e) {
            }
        }
        assertFalse(gotCorrectError);
    } finally {
        if (server != null)
            server.shutdown();
        if (session != null)
            session.stop();
    }
}
Also used : Condition(com.nokia.dempsy.TestUtils.Condition) ZooKeeper(org.apache.zookeeper.ZooKeeper) ClusterId(com.nokia.dempsy.config.ClusterId) ClusterInfoException(com.nokia.dempsy.cluster.ClusterInfoException) Test(org.junit.Test)

Example 3 with ClusterInfoException

use of com.nokia.dempsy.cluster.ClusterInfoException in project Dempsy by Dempsy.

the class TestZookeeperClusterResilience method testSessionExpiredWithFullApp.

@Test
public void testSessionExpiredWithFullApp() throws Throwable {
    // now lets startup the server.
    ZookeeperTestServer server = null;
    final AtomicReference<ZookeeperSession> sessionRef = new AtomicReference<ZookeeperSession>();
    ZookeeperSession session = null;
    final AtomicLong processCount = new AtomicLong(0);
    Dempsy[] dempsy = new Dempsy[3];
    try {
        server = new ZookeeperTestServer();
        server.start();
        session = new ZookeeperSession("127.0.0.1:" + port, 5000) {

            @Override
            public WatcherProxy makeWatcherProxy(ClusterInfoWatcher w) {
                processCount.incrementAndGet();
                return super.makeWatcherProxy(w);
            }

            ;
        };
        sessionRef.set(session);
        final FullApplication app = new FullApplication();
        ApplicationDefinition ad = app.getTopology();
        // no calls yet
        assertEquals(0, processCount.intValue());
        dempsy[0] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(), FullApplication.MyAdaptor.class.getSimpleName()), ad);
        dempsy[0].setClusterSessionFactory(new ZookeeperSessionFactory("127.0.0.1:" + port, 5000));
        dempsy[1] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(), FullApplication.MyMp.class.getSimpleName()), ad);
        dempsy[1].setClusterSessionFactory(new ZookeeperSessionFactory("127.0.0.1:" + port, 5000));
        dempsy[2] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(), FullApplication.MyRankMp.class.getSimpleName()), ad);
        //         dempsy[2].setClusterSessionFactory(new ZookeeperSessionFactory<ClusterInformation, SlotInformation>("127.0.0.1:" + port,5000));
        dempsy[2].setClusterSessionFactory(new ClusterInfoSessionFactory() {

            @Override
            public ClusterInfoSession createSession() throws ClusterInfoException {
                return sessionRef.get();
            }
        });
        // start everything in reverse order
        for (int i = 2; i >= 0; i--) dempsy[i].start();
        // make sure the final count is incrementing
        long curCount = app.finalMessageCount.get();
        assertTrue(poll(30000, curCount, new Condition<Long>() {

            @Override
            public boolean conditionMet(Long o) {
                return app.finalMessageCount.get() > (o + 100L);
            }
        }));
        logger.trace("Killing zookeeper");
        ZooKeeper origZk = session.zkref.get();
        ZookeeperTestServer.forceSessionExpiration(origZk);
        logger.trace("Killed zookeeper");
        // wait for the current session to go invalid
        assertTrue(poll(baseTimeoutMillis, origZk, new Condition<ZooKeeper>() {

            @Override
            public boolean conditionMet(ZooKeeper o) {
                return !o.getState().isAlive();
            }
        }));
        // make sure the final count is STILL incrementing
        curCount = app.finalMessageCount.get();
        assertTrue(poll(30000, curCount, new Condition<Long>() {

            @Override
            public boolean conditionMet(Long o) {
                return app.finalMessageCount.get() > (o + 100L);
            }
        }));
    } finally {
        if (server != null)
            server.shutdown();
        if (session != null)
            session.stop();
        for (int i = 0; i < dempsy.length; i++) if (dempsy[i] != null)
            dempsy[i].stop();
        for (int i = 0; i < dempsy.length; i++) if (dempsy[i] != null)
            assertTrue(dempsy[i].waitToBeStopped(baseTimeoutMillis));
    }
}
Also used : ClusterInfoException(com.nokia.dempsy.cluster.ClusterInfoException) ClusterInfoWatcher(com.nokia.dempsy.cluster.ClusterInfoWatcher) ClusterInfoSession(com.nokia.dempsy.cluster.ClusterInfoSession) Condition(com.nokia.dempsy.TestUtils.Condition) ClusterInfoSessionFactory(com.nokia.dempsy.cluster.ClusterInfoSessionFactory) ClusterId(com.nokia.dempsy.config.ClusterId) Dempsy(com.nokia.dempsy.Dempsy) AtomicReference(java.util.concurrent.atomic.AtomicReference) AtomicLong(java.util.concurrent.atomic.AtomicLong) ZooKeeper(org.apache.zookeeper.ZooKeeper) ApplicationDefinition(com.nokia.dempsy.config.ApplicationDefinition) AtomicLong(java.util.concurrent.atomic.AtomicLong) Test(org.junit.Test)

Example 4 with ClusterInfoException

use of com.nokia.dempsy.cluster.ClusterInfoException in project Dempsy by Dempsy.

the class TestZookeeperClusterResilience method testSessionExpired.

@Test
public void testSessionExpired() throws Throwable {
    // now lets startup the server.
    ZookeeperTestServer server = null;
    ZookeeperSession session = null;
    try {
        server = new ZookeeperTestServer();
        server.start();
        // the createExpireSessionClient actually results in a Disconnected/SyncConnected rotating events.
        // ... so we need to filter those out since it will result in a callback.
        session = new ZookeeperSession("127.0.0.1:" + port, 5000);
        final ClusterId clusterId = new ClusterId(appname, "testSessionExpired");
        createClusterLevel(clusterId, session);
        TestWatcher callback = new TestWatcher(session) {

            @Override
            public void process() {
                try {
                    called.set(true);
                    logger.trace("process called on TestWatcher.");
                    session.exists(clusterId.asPath(), this);
                    session.getSubdirs(clusterId.asPath(), this);
                } catch (ClusterInfoException cie) {
                    throw new RuntimeException(cie);
                }
            }
        };
        // now see if the cluster works.
        // this registers the session with the callback as the Watcher
        callback.process();
        // now reset the condition
        callback.called.set(false);
        ZookeeperTestServer.forceSessionExpiration(session.zkref.get());
        // we should see the session expiration in a callback
        assertTrue(poll(5000, callback, new Condition<TestWatcher>() {

            @Override
            public boolean conditionMet(TestWatcher o) {
                return o.called.get();
            }
        }));
        // and eventually a reconnect
        assertTrue(poll(5000, callback, new Condition<TestWatcher>() {

            @Override
            public boolean conditionMet(TestWatcher o) {
                try {
                    o.process();
                    return true;
                } catch (Throwable th) {
                    return false;
                }
            }
        }));
        createClusterLevel(clusterId, session);
        assertTrue(session.exists(clusterId.asPath(), callback));
    } finally {
        if (server != null)
            server.shutdown();
        if (session != null)
            session.stop();
    }
}
Also used : Condition(com.nokia.dempsy.TestUtils.Condition) ClusterId(com.nokia.dempsy.config.ClusterId) ClusterInfoException(com.nokia.dempsy.cluster.ClusterInfoException) Test(org.junit.Test)

Example 5 with ClusterInfoException

use of com.nokia.dempsy.cluster.ClusterInfoException in project Dempsy by Dempsy.

the class TestZookeeperClusterResilience method testRecoverWithIOException.

@Test
public void testRecoverWithIOException() throws Throwable {
    // now lets startup the server.
    ZookeeperTestServer server = null;
    ZookeeperSession sessiong = null;
    try {
        server = new ZookeeperTestServer();
        server.start();
        final ZookeeperSession session = new ZookeeperSession("127.0.0.1:" + port, 5000) {

            @Override
            protected ZooKeeper makeZooKeeperClient(String connectString, int sessionTimeout) throws IOException {
                if (forceIOException.get()) {
                    forceIOExceptionLatch.countDown();
                    throw new IOException("Fake IO Problem.");
                }
                return super.makeZooKeeperClient(connectString, sessionTimeout);
            }
        };
        sessiong = session;
        final ClusterId clusterId = new ClusterId(appname, "testRecoverWithIOException");
        TestUtils.createClusterLevel(clusterId, session);
        TestWatcher callback = new TestWatcher(session) {

            @Override
            public void process() {
                try {
                    session.getSubdirs(clusterId.asPath(), this);
                    called.set(true);
                } catch (ClusterInfoException cie) {
                    throw new RuntimeException(cie);
                }
            }
        };
        callback.process();
        // force the ioexception to happen
        forceIOException.set(true);
        ZookeeperTestServer.forceSessionExpiration(session.zkref.get());
        // now in the background it should be retrying but hosed.
        assertTrue(forceIOExceptionLatch.await(baseTimeoutMillis * 3, TimeUnit.MILLISECONDS));
        // now the getActiveSlots call should fail since i'm preventing the recovery by throwing IOExceptions
        assertTrue(TestUtils.poll(baseTimeoutMillis, clusterId, new Condition<ClusterId>() {

            @Override
            public boolean conditionMet(ClusterId o) throws Throwable {
                try {
                    session.mkdir(o.asPath() + "/join-1", null, DirMode.EPHEMERAL);
                    return false;
                } catch (ClusterInfoException e) {
                    return true;
                }
            }
        }));
        // reset the callbacker ...
        callback.called.set(false);
        // now we should allow the code to proceed.
        forceIOException.set(false);
        // wait for the callback
        assertTrue(poll(baseTimeoutMillis, callback, new Condition<TestWatcher>() {

            @Override
            public boolean conditionMet(TestWatcher o) {
                return o.called.get();
            }
        }));
        // this should eventually recover.
        assertTrue(TestUtils.poll(baseTimeoutMillis, clusterId, new Condition<ClusterId>() {

            @Override
            public boolean conditionMet(ClusterId o) throws Throwable {
                try {
                    TestUtils.createClusterLevel(o, session);
                    session.mkdir(o.asPath() + "/join-1", null, DirMode.EPHEMERAL);
                    return true;
                } catch (ClusterInfoException e) {
                    return false;
                }
            }
        }));
        session.getSubdirs(clusterId.asPath(), callback);
        // And join should work
        // And join should work
        assertTrue(TestUtils.poll(baseTimeoutMillis, clusterId, new Condition<ClusterId>() {

            @Override
            public boolean conditionMet(ClusterId o) throws Throwable {
                try {
                    session.mkdir(o.asPath() + "/join-1", null, DirMode.EPHEMERAL);
                    return true;
                } catch (ClusterInfoException e) {
                }
                return false;
            }
        }));
    } finally {
        if (server != null)
            server.shutdown();
        if (sessiong != null)
            sessiong.stop();
    }
}
Also used : Condition(com.nokia.dempsy.TestUtils.Condition) ClusterId(com.nokia.dempsy.config.ClusterId) ClusterInfoException(com.nokia.dempsy.cluster.ClusterInfoException) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

ClusterInfoException (com.nokia.dempsy.cluster.ClusterInfoException)13 Condition (com.nokia.dempsy.TestUtils.Condition)8 ClusterId (com.nokia.dempsy.config.ClusterId)8 Test (org.junit.Test)8 ZooKeeper (org.apache.zookeeper.ZooKeeper)5 ClusterInfoSession (com.nokia.dempsy.cluster.ClusterInfoSession)4 Dempsy (com.nokia.dempsy.Dempsy)3 MyMp (com.nokia.dempsy.cluster.zookeeper.FullApplication.MyMp)2 SafeString (com.nokia.dempsy.internal.util.SafeString)2 StatsCollector (com.nokia.dempsy.monitoring.StatsCollector)2 Pair (com.nokia.dempsy.util.Pair)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 ClassPathXmlApplicationContext (org.springframework.context.support.ClassPathXmlApplicationContext)2 ClusterInfoSessionFactory (com.nokia.dempsy.cluster.ClusterInfoSessionFactory)1 ClusterInfoWatcher (com.nokia.dempsy.cluster.ClusterInfoWatcher)1 DisruptibleSession (com.nokia.dempsy.cluster.DisruptibleSession)1 ApplicationDefinition (com.nokia.dempsy.config.ApplicationDefinition)1 DefaultRouterSlotInfo (com.nokia.dempsy.router.DecentralizedRoutingStrategy.DefaultRouterSlotInfo)1 SerializationException (com.nokia.dempsy.serialization.SerializationException)1 IOException (java.io.IOException)1