use of com.nokia.dempsy.cluster.ClusterInfoException in project Dempsy by Dempsy.
the class TestUtils method stealShard.
/**
* This method will grab the slot requested. It requires that it is already held by
* the session provided and that the entry there contains a valid DefaultRouterSlotInfo
* which it will extract, modify and use to replace.
*
* This will be accomplished by disrupting the session and trying to grab the slot
* at the same time. It will try this over and over until it gets it, or until the
* number of tries is exceeded.
*
* @param originalSession is the session that will be disrupted in order to grab the shard.
* @param factory is the {@link ClusterInfoSessionFactory} that will be used to create a new
* session that can be used to grab the slot.
* @param shardPath is the path all the way to the directory containing the shard that you
* want stolen.
*
* @throws Assert when one of the test condition fails or grabbing the slot fails.
*/
public static ClusterInfoSession stealShard(final ClusterInfoSession originalSession, final ClusterInfoSessionFactory factory, final String shardPath, final long timeoutmillis) throws InterruptedException, ClusterInfoException {
// get the current slot data to use as a template
final DefaultRouterSlotInfo newSlot = (DefaultRouterSlotInfo) originalSession.getData(shardPath, null);
final AtomicBoolean stillRunning = new AtomicBoolean(true);
final AtomicBoolean failed = new AtomicBoolean(false);
final ClusterInfoSession session = factory.createSession();
Runnable slotGrabber = new Runnable() {
@Override
public void run() {
try {
Thread.currentThread().setPriority(Thread.MAX_PRIORITY);
boolean haveSlot = false;
while (!haveSlot && stillRunning.get()) {
newSlot.setDestination(new JunkDestination());
if (session.mkdir(shardPath, newSlot, DirMode.EPHEMERAL) != null)
haveSlot = true;
Thread.yield();
}
} catch (ClusterInfoException e) {
failed.set(true);
} catch (RuntimeException re) {
re.printStackTrace();
failed.set(true);
} finally {
stillRunning.set(false);
}
}
};
try {
new Thread(slotGrabber).start();
boolean onStandby = false;
long startTime = System.currentTimeMillis();
while (!onStandby && timeoutmillis >= (System.currentTimeMillis() - startTime)) {
((DisruptibleSession) originalSession).disrupt();
Thread.sleep(100);
if (!stillRunning.get())
onStandby = true;
}
assertTrue(onStandby);
assertFalse(failed.get());
} catch (InterruptedException ie) {
session.stop();
throw ie;
} catch (Error cie) {
session.stop();
throw cie;
} finally {
stillRunning.set(false);
}
return session;
}
use of com.nokia.dempsy.cluster.ClusterInfoException in project Dempsy by Dempsy.
the class TestZookeeperClusterResilience method testNoServerOnStartup.
@Test
public void testNoServerOnStartup() throws Throwable {
// create a session factory
ZookeeperSessionFactory factory = new ZookeeperSessionFactory("127.0.0.1:" + port, 5000);
// create a session from the session factory
ZookeeperSession session = (ZookeeperSession) factory.createSession();
ClusterId clusterId = new ClusterId(appname, "testNoServerOnStartup");
// hook a test watch to make sure that callbacks work correctly
TestWatcher callback = new TestWatcher(session) {
@Override
public void process() {
called.set(true);
}
};
// now accessing the cluster should get us an error.
boolean gotCorrectError = false;
try {
session.getSubdirs(clusterId.asPath(), callback);
} catch (ClusterInfoException e) {
gotCorrectError = true;
}
assertTrue(gotCorrectError);
// now lets startup the server.
ZookeeperTestServer server = null;
try {
server = new ZookeeperTestServer();
server.start();
// create a cluster from the session
TestUtils.createClusterLevel(clusterId, session);
// wait until this works.
assertTrue(TestUtils.poll(baseTimeoutMillis, callback, new Condition<TestWatcher>() {
@Override
public boolean conditionMet(TestWatcher o) {
return o.called.get();
}
}));
// reset the callbacker ...
callback.called.set(false);
// now see if the cluster works.
assertTrue(TestUtils.poll(baseTimeoutMillis, callback, new Condition<TestWatcher>() {
@Override
public boolean conditionMet(TestWatcher o) {
return !o.called.get();
}
}));
session.getSubdirs(clusterId.asPath(), callback);
ZooKeeper origZk = session.zkref.get();
ZookeeperTestServer.forceSessionExpiration(origZk);
// wait for the callback
assertTrue(TestUtils.poll(baseTimeoutMillis, callback, new Condition<TestWatcher>() {
@Override
public boolean conditionMet(TestWatcher o) {
return o.called.get();
}
}));
// unfortunately I cannot check the getActiveSlots for failure because there's a race condition I can't fix.
// No matter how fast I check it's possible that it's okay again OR that allSlots hasn't been cleared.
//
// however, they should eventually recover.
gotCorrectError = true;
for (long endTime = System.currentTimeMillis() + baseTimeoutMillis; endTime > System.currentTimeMillis() && gotCorrectError; ) {
Thread.sleep(1);
try {
session.getSubdirs(clusterId.asPath(), callback);
gotCorrectError = false;
} catch (ClusterInfoException e) {
}
}
session.getSubdirs(clusterId.asPath(), callback);
// And join should work
gotCorrectError = true;
for (long endTime = System.currentTimeMillis() + baseTimeoutMillis; endTime > System.currentTimeMillis() && gotCorrectError; ) {
Thread.sleep(1);
try {
session.mkdir(clusterId.asPath() + "/join-1", null, DirMode.EPHEMERAL);
gotCorrectError = false;
} catch (ClusterInfoException e) {
}
}
assertFalse(gotCorrectError);
} finally {
if (server != null)
server.shutdown();
if (session != null)
session.stop();
}
}
use of com.nokia.dempsy.cluster.ClusterInfoException in project Dempsy by Dempsy.
the class TestZookeeperClusterResilience method testSessionExpiredWithFullApp.
@Test
public void testSessionExpiredWithFullApp() throws Throwable {
// now lets startup the server.
ZookeeperTestServer server = null;
final AtomicReference<ZookeeperSession> sessionRef = new AtomicReference<ZookeeperSession>();
ZookeeperSession session = null;
final AtomicLong processCount = new AtomicLong(0);
Dempsy[] dempsy = new Dempsy[3];
try {
server = new ZookeeperTestServer();
server.start();
session = new ZookeeperSession("127.0.0.1:" + port, 5000) {
@Override
public WatcherProxy makeWatcherProxy(ClusterInfoWatcher w) {
processCount.incrementAndGet();
return super.makeWatcherProxy(w);
}
;
};
sessionRef.set(session);
final FullApplication app = new FullApplication();
ApplicationDefinition ad = app.getTopology();
// no calls yet
assertEquals(0, processCount.intValue());
dempsy[0] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(), FullApplication.MyAdaptor.class.getSimpleName()), ad);
dempsy[0].setClusterSessionFactory(new ZookeeperSessionFactory("127.0.0.1:" + port, 5000));
dempsy[1] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(), FullApplication.MyMp.class.getSimpleName()), ad);
dempsy[1].setClusterSessionFactory(new ZookeeperSessionFactory("127.0.0.1:" + port, 5000));
dempsy[2] = getDempsyFor(new ClusterId(FullApplication.class.getSimpleName(), FullApplication.MyRankMp.class.getSimpleName()), ad);
// dempsy[2].setClusterSessionFactory(new ZookeeperSessionFactory<ClusterInformation, SlotInformation>("127.0.0.1:" + port,5000));
dempsy[2].setClusterSessionFactory(new ClusterInfoSessionFactory() {
@Override
public ClusterInfoSession createSession() throws ClusterInfoException {
return sessionRef.get();
}
});
// start everything in reverse order
for (int i = 2; i >= 0; i--) dempsy[i].start();
// make sure the final count is incrementing
long curCount = app.finalMessageCount.get();
assertTrue(poll(30000, curCount, new Condition<Long>() {
@Override
public boolean conditionMet(Long o) {
return app.finalMessageCount.get() > (o + 100L);
}
}));
logger.trace("Killing zookeeper");
ZooKeeper origZk = session.zkref.get();
ZookeeperTestServer.forceSessionExpiration(origZk);
logger.trace("Killed zookeeper");
// wait for the current session to go invalid
assertTrue(poll(baseTimeoutMillis, origZk, new Condition<ZooKeeper>() {
@Override
public boolean conditionMet(ZooKeeper o) {
return !o.getState().isAlive();
}
}));
// make sure the final count is STILL incrementing
curCount = app.finalMessageCount.get();
assertTrue(poll(30000, curCount, new Condition<Long>() {
@Override
public boolean conditionMet(Long o) {
return app.finalMessageCount.get() > (o + 100L);
}
}));
} finally {
if (server != null)
server.shutdown();
if (session != null)
session.stop();
for (int i = 0; i < dempsy.length; i++) if (dempsy[i] != null)
dempsy[i].stop();
for (int i = 0; i < dempsy.length; i++) if (dempsy[i] != null)
assertTrue(dempsy[i].waitToBeStopped(baseTimeoutMillis));
}
}
use of com.nokia.dempsy.cluster.ClusterInfoException in project Dempsy by Dempsy.
the class TestZookeeperClusterResilience method testSessionExpired.
@Test
public void testSessionExpired() throws Throwable {
// now lets startup the server.
ZookeeperTestServer server = null;
ZookeeperSession session = null;
try {
server = new ZookeeperTestServer();
server.start();
// the createExpireSessionClient actually results in a Disconnected/SyncConnected rotating events.
// ... so we need to filter those out since it will result in a callback.
session = new ZookeeperSession("127.0.0.1:" + port, 5000);
final ClusterId clusterId = new ClusterId(appname, "testSessionExpired");
createClusterLevel(clusterId, session);
TestWatcher callback = new TestWatcher(session) {
@Override
public void process() {
try {
called.set(true);
logger.trace("process called on TestWatcher.");
session.exists(clusterId.asPath(), this);
session.getSubdirs(clusterId.asPath(), this);
} catch (ClusterInfoException cie) {
throw new RuntimeException(cie);
}
}
};
// now see if the cluster works.
// this registers the session with the callback as the Watcher
callback.process();
// now reset the condition
callback.called.set(false);
ZookeeperTestServer.forceSessionExpiration(session.zkref.get());
// we should see the session expiration in a callback
assertTrue(poll(5000, callback, new Condition<TestWatcher>() {
@Override
public boolean conditionMet(TestWatcher o) {
return o.called.get();
}
}));
// and eventually a reconnect
assertTrue(poll(5000, callback, new Condition<TestWatcher>() {
@Override
public boolean conditionMet(TestWatcher o) {
try {
o.process();
return true;
} catch (Throwable th) {
return false;
}
}
}));
createClusterLevel(clusterId, session);
assertTrue(session.exists(clusterId.asPath(), callback));
} finally {
if (server != null)
server.shutdown();
if (session != null)
session.stop();
}
}
use of com.nokia.dempsy.cluster.ClusterInfoException in project Dempsy by Dempsy.
the class TestZookeeperClusterResilience method testRecoverWithIOException.
@Test
public void testRecoverWithIOException() throws Throwable {
// now lets startup the server.
ZookeeperTestServer server = null;
ZookeeperSession sessiong = null;
try {
server = new ZookeeperTestServer();
server.start();
final ZookeeperSession session = new ZookeeperSession("127.0.0.1:" + port, 5000) {
@Override
protected ZooKeeper makeZooKeeperClient(String connectString, int sessionTimeout) throws IOException {
if (forceIOException.get()) {
forceIOExceptionLatch.countDown();
throw new IOException("Fake IO Problem.");
}
return super.makeZooKeeperClient(connectString, sessionTimeout);
}
};
sessiong = session;
final ClusterId clusterId = new ClusterId(appname, "testRecoverWithIOException");
TestUtils.createClusterLevel(clusterId, session);
TestWatcher callback = new TestWatcher(session) {
@Override
public void process() {
try {
session.getSubdirs(clusterId.asPath(), this);
called.set(true);
} catch (ClusterInfoException cie) {
throw new RuntimeException(cie);
}
}
};
callback.process();
// force the ioexception to happen
forceIOException.set(true);
ZookeeperTestServer.forceSessionExpiration(session.zkref.get());
// now in the background it should be retrying but hosed.
assertTrue(forceIOExceptionLatch.await(baseTimeoutMillis * 3, TimeUnit.MILLISECONDS));
// now the getActiveSlots call should fail since i'm preventing the recovery by throwing IOExceptions
assertTrue(TestUtils.poll(baseTimeoutMillis, clusterId, new Condition<ClusterId>() {
@Override
public boolean conditionMet(ClusterId o) throws Throwable {
try {
session.mkdir(o.asPath() + "/join-1", null, DirMode.EPHEMERAL);
return false;
} catch (ClusterInfoException e) {
return true;
}
}
}));
// reset the callbacker ...
callback.called.set(false);
// now we should allow the code to proceed.
forceIOException.set(false);
// wait for the callback
assertTrue(poll(baseTimeoutMillis, callback, new Condition<TestWatcher>() {
@Override
public boolean conditionMet(TestWatcher o) {
return o.called.get();
}
}));
// this should eventually recover.
assertTrue(TestUtils.poll(baseTimeoutMillis, clusterId, new Condition<ClusterId>() {
@Override
public boolean conditionMet(ClusterId o) throws Throwable {
try {
TestUtils.createClusterLevel(o, session);
session.mkdir(o.asPath() + "/join-1", null, DirMode.EPHEMERAL);
return true;
} catch (ClusterInfoException e) {
return false;
}
}
}));
session.getSubdirs(clusterId.asPath(), callback);
// And join should work
// And join should work
assertTrue(TestUtils.poll(baseTimeoutMillis, clusterId, new Condition<ClusterId>() {
@Override
public boolean conditionMet(ClusterId o) throws Throwable {
try {
session.mkdir(o.asPath() + "/join-1", null, DirMode.EPHEMERAL);
return true;
} catch (ClusterInfoException e) {
}
return false;
}
}));
} finally {
if (server != null)
server.shutdown();
if (sessiong != null)
sessiong.stop();
}
}
Aggregations