use of org.apache.sling.discovery.base.commons.UndefinedClusterViewException in project sling by apache.
the class AbstractClusterTest method pingConnector.
private boolean pingConnector(final VirtualInstance from, final VirtualInstance to) throws UndefinedClusterViewException {
final Announcement fromAnnouncement = createFromAnnouncement(from);
Announcement replyAnnouncement = null;
try {
replyAnnouncement = ping(to, fromAnnouncement);
} catch (AssertionError e) {
logger.warn("pingConnector: ping failed, assertionError: " + e);
return false;
} catch (UndefinedClusterViewException e) {
logger.warn("pingConnector: ping failed, currently the cluster view is undefined: " + e);
return false;
}
registerReplyAnnouncement(from, replyAnnouncement);
return true;
}
use of org.apache.sling.discovery.base.commons.UndefinedClusterViewException in project sling by apache.
the class AbstractClusterTest method testConnectorSwitching4139.
/* ok, this test should do the following:
* cluster A with instance 1 and instance 2
* cluster B with instance 3 and instance 4
* cluster C with instance 5
* initially, instance3 is pinging instance1, and instance 5 is pinging instance1 as well (MAC hub)
* that should result in instance3 and 5 to inherit the rest from instance1
* then simulate load balancer switching from instance1 to instance2 - hence pings go to instance2
*
*/
//TODO: this takes env 45sec
@Category(Slow.class)
@Test
public void testConnectorSwitching4139() throws Throwable {
final int MIN_EVENT_DELAY = 1;
// reset any setup that was done - we start with a different setup than the default one
tearDown();
final org.apache.log4j.Logger discoveryLogger = RootLogger.getLogger("org.apache.sling.discovery");
logLevel = discoveryLogger.getLevel();
discoveryLogger.setLevel(Level.DEBUG);
instance1 = newBuilder().setDebugName("instance1").newRepository("/var/discovery/clusterA/", true).setConnectorPingTimeout(10).setConnectorPingInterval(999).setMinEventDelay(MIN_EVENT_DELAY).build();
instance2 = newBuilder().setDebugName("instance2").useRepositoryOf(instance1).setConnectorPingTimeout(10).setConnectorPingInterval(999).setMinEventDelay(MIN_EVENT_DELAY).build();
// now launch the remote instance
instance3 = newBuilder().setDebugName("instance3").newRepository("/var/discovery/clusterB/", false).setConnectorPingTimeout(10).setConnectorPingInterval(999).setMinEventDelay(MIN_EVENT_DELAY).build();
instance4 = newBuilder().setDebugName("instance4").useRepositoryOf(instance3).setConnectorPingTimeout(10).setConnectorPingInterval(999).setMinEventDelay(MIN_EVENT_DELAY).build();
instance5 = newBuilder().setDebugName("instance5").newRepository("/var/discovery/clusterC/", false).setConnectorPingTimeout(10).setConnectorPingInterval(999).setMinEventDelay(MIN_EVENT_DELAY).build();
// join the instances to form a cluster by sending out heartbeats
runHeartbeatOnceWith(instance1, instance2, instance3, instance4, instance5);
Thread.sleep(500);
runHeartbeatOnceWith(instance1, instance2, instance3, instance4, instance5);
Thread.sleep(500);
runHeartbeatOnceWith(instance1, instance2, instance3, instance4, instance5);
Thread.sleep(500);
assertSameTopology(new SimpleClusterView(instance1, instance2));
assertSameTopology(new SimpleClusterView(instance3, instance4));
assertSameTopology(new SimpleClusterView(instance5));
// create a topology connector from instance3 to instance1
// -> corresponds to starting to ping
runHeartbeatOnceWith(instance1, instance2, instance3, instance4, instance5);
pingConnector(instance3, instance1);
pingConnector(instance5, instance1);
Thread.sleep(500);
runHeartbeatOnceWith(instance1, instance2, instance3, instance4, instance5);
pingConnector(instance3, instance1);
pingConnector(instance5, instance1);
Thread.sleep(500);
// make asserts on the topology
logger.info("testConnectorSwitching4139: instance1.slingId=" + instance1.slingId);
logger.info("testConnectorSwitching4139: instance2.slingId=" + instance2.slingId);
logger.info("testConnectorSwitching4139: instance3.slingId=" + instance3.slingId);
logger.info("testConnectorSwitching4139: instance4.slingId=" + instance4.slingId);
logger.info("testConnectorSwitching4139: instance5.slingId=" + instance5.slingId);
instance1.dumpRepo();
assertSameTopology(new SimpleClusterView(instance1, instance2), new SimpleClusterView(instance3, instance4), new SimpleClusterView(instance5));
// simulate a crash of instance1, resulting in load-balancer to switch the pings
boolean success = false;
for (int i = 0; i < 25; i++) {
// loop for max 25 times, min 20 times
runHeartbeatOnceWith(instance2, instance3, instance4, instance5);
final boolean ping1 = pingConnector(instance3, instance2);
final boolean ping2 = pingConnector(instance5, instance2);
if (ping1 && ping2) {
// both pings were fine - hence break
success = true;
logger.info("testConnectorSwitching4139: successfully switched all pings to instance2 after " + i + " rounds.");
if (i < 20) {
logger.info("testConnectorSwitching4139: min loop cnt not yet reached: i=" + i);
// 20x1000ms = 20sec max - (vs 10sec timeout) - should be enough for timing out
Thread.sleep(1000);
continue;
}
break;
}
logger.info("testConnectorSwitching4139: looping cos ping1=" + ping1 + ", ping2=" + ping2);
// 25x1000ms = 25sec max - (vs 10sec timeout)
Thread.sleep(1000);
}
assertTrue(success);
// one final heartbeat
runHeartbeatOnceWith(instance2, instance3, instance4, instance5);
assertTrue(pingConnector(instance3, instance2));
assertTrue(pingConnector(instance5, instance2));
instance2.dumpRepo();
assertSameTopology(new SimpleClusterView(instance2), new SimpleClusterView(instance3, instance4), new SimpleClusterView(instance5));
// restart instance1, crash instance4
instance4.stopViewChecker();
instance1Restarted = newBuilder().setDebugName("instance1").useRepositoryOf(instance2).setConnectorPingTimeout(Integer.MAX_VALUE).setMinEventDelay(1).setSlingId(instance1.getSlingId()).build();
runHeartbeatOnceWith(instance1Restarted, instance2, instance3, instance5);
// give these heartbeats/votes some time .. so sleep 2sec (timeout is 10sec, so should be safe)
Thread.sleep(2000);
assertTrue(pingConnector(instance3, instance2));
assertTrue(pingConnector(instance5, instance2));
success = false;
for (int i = 0; i < 40; i++) {
runHeartbeatOnceWith(instance1Restarted, instance2, instance3, instance5);
instance1.getViewChecker().checkView();
// since instance3 *can* have an undefined cluster view..
try {
pingConnector(instance3, instance2);
} catch (UndefinedClusterViewException ucve) {
// ignore
}
pingConnector(instance5, instance2);
final TopologyView topology = instance3.getDiscoveryService().getTopology();
InstanceDescription i3 = null;
for (Iterator<InstanceDescription> it = topology.getInstances().iterator(); it.hasNext(); ) {
final InstanceDescription id = it.next();
if (id.getSlingId().equals(instance3.slingId)) {
i3 = id;
break;
}
}
assertNotNull(i3);
assertEquals(instance3.slingId, i3.getSlingId());
final ClusterView i3Cluster = i3.getClusterView();
final int i3ClusterSize = i3Cluster.getInstances().size();
if (i3ClusterSize == 1) {
if (i < 30) {
logger.info("testConnectorSwitching4139: [2] min loop cnt not yet reached: i=" + i);
// 30x500ms = 15sec max - (vs 10sec-2sec[sleep] timeout) - should be enough for timing out
Thread.sleep(500);
continue;
}
success = true;
logger.info("testConnectorSwitching4139: i3ClusterSize: " + i3ClusterSize + ", i=" + i + " (success)");
break;
}
logger.info("testConnectorSwitching4139: i3ClusterSize: " + i3ClusterSize + ", i=" + i);
Thread.sleep(500);
}
logger.info("testConnectorSwitching4139: instance1Restarted.slingId=" + instance1Restarted.slingId);
logger.info("testConnectorSwitching4139: instance2.slingId=" + instance2.slingId);
logger.info("testConnectorSwitching4139: instance3.slingId=" + instance3.slingId);
logger.info("testConnectorSwitching4139: instance4.slingId=" + instance4.slingId);
logger.info("testConnectorSwitching4139: instance5.slingId=" + instance5.slingId);
instance1Restarted.dumpRepo();
assertTrue(success);
assertSameTopology(new SimpleClusterView(instance1Restarted, instance2), new SimpleClusterView(instance3), new SimpleClusterView(instance5));
instance1Restarted.stop();
}
use of org.apache.sling.discovery.base.commons.UndefinedClusterViewException in project sling by apache.
the class AbstractSingleInstanceTest method testBootstrap.
@Test
public void testBootstrap() throws Throwable {
logger.info("testBootstrap: start");
try {
instance.getClusterViewService().getLocalClusterView();
fail("should complain");
} catch (UndefinedClusterViewException e) {
// SLING-5030 : isolated mode is gone, replaced with exception
// ok
}
// SLING-3750 : with delaying the init event, we now should NOT get any events
// before we let the view establish (which happens via heartbeats below)
AssertingTopologyEventListener ada = new AssertingTopologyEventListener();
instance.bindTopologyEventListener(ada);
assertEquals(0, ada.getEvents().size());
assertEquals(0, ada.getUnexpectedCount());
try {
instance.getClusterViewService().getLocalClusterView();
fail("should complain");
} catch (UndefinedClusterViewException e) {
// ok
}
ada.addExpected(Type.TOPOLOGY_INIT);
instance.heartbeatsAndCheckView();
Thread.sleep(1000);
instance.heartbeatsAndCheckView();
Thread.sleep(1000);
logger.info("testBoostrap: dumping repo...");
instance.dumpRepo();
logger.info("testBoostrap: dumping listener...");
ada.dump();
assertEquals(0, ada.getUnexpectedCount());
assertEquals(1, ada.getEvents().size());
TopologyEvent initEvent = ada.getEvents().remove(0);
assertNotNull(initEvent);
assertNotNull(initEvent.getNewView());
assertNotNull(initEvent.getNewView().getClusterViews());
// after the view was established though, we expect it to be a normal
// EstablishedInstanceDescription
instance.assertEstablishedView();
logger.info("testBootstrap: end");
}
use of org.apache.sling.discovery.base.commons.UndefinedClusterViewException in project sling by apache.
the class OakClusterViewService method asClusterView.
private LocalClusterView asClusterView(DiscoveryLiteDescriptor descriptor, ResourceResolver resourceResolver) throws Exception {
if (descriptor == null) {
throw new IllegalArgumentException("descriptor must not be null");
}
if (resourceResolver == null) {
throw new IllegalArgumentException("resourceResolver must not be null");
}
logger.trace("asClusterView: start");
String clusterViewId = descriptor.getViewId();
if (clusterViewId == null || clusterViewId.length() == 0) {
logger.trace("asClusterView: no clusterId provided by discovery-lite descriptor - reading from repo.");
clusterViewId = readOrDefineClusterId(resourceResolver);
}
String localClusterSyncTokenId = /*descriptor.getViewId()+"_"+*/
String.valueOf(descriptor.getSeqNum());
if (!descriptor.isFinal()) {
throw new UndefinedClusterViewException(Reason.NO_ESTABLISHED_VIEW, "descriptor is not yet final: " + descriptor);
}
LocalClusterView cluster = new LocalClusterView(clusterViewId, localClusterSyncTokenId);
long me = descriptor.getMyId();
int[] activeIds = descriptor.getActiveIds();
if (activeIds == null || activeIds.length == 0) {
throw new UndefinedClusterViewException(Reason.NO_ESTABLISHED_VIEW, "Descriptor contained no active ids: " + descriptor.getDescriptorStr());
}
// convert int[] to List<Integer>
//TODO: could use Guava's Ints class here..
List<Integer> activeIdsList = new LinkedList<Integer>();
for (Integer integer : activeIds) {
activeIdsList.add(integer);
}
// step 1: sort activeIds by their leaderElectionId
// serves two purposes: pos[0] is then leader
// and the rest are properly sorted within the cluster
final Map<Integer, String> leaderElectionIds = new HashMap<Integer, String>();
for (Integer id : activeIdsList) {
String slingId = idMapService.toSlingId(id, resourceResolver);
if (slingId == null) {
idMapService.clearCache();
throw new UndefinedClusterViewException(Reason.NO_ESTABLISHED_VIEW, "no slingId mapped for clusterNodeId=" + id);
}
String leaderElectionId = getLeaderElectionId(resourceResolver, slingId);
leaderElectionIds.put(id, leaderElectionId);
}
Collections.sort(activeIdsList, new Comparator<Integer>() {
@Override
public int compare(Integer arg0, Integer arg1) {
return leaderElectionIds.get(arg0).compareTo(leaderElectionIds.get(arg1));
}
});
for (int i = 0; i < activeIdsList.size(); i++) {
int id = activeIdsList.get(i);
// thx to sorting above [0] is leader indeed
boolean isLeader = i == 0;
boolean isOwn = id == me;
String slingId = idMapService.toSlingId(id, resourceResolver);
if (slingId == null) {
idMapService.clearCache();
logger.info("asClusterView: cannot resolve oak-clusterNodeId {} to a slingId", id);
throw new Exception("Cannot resolve oak-clusterNodeId " + id + " to a slingId");
}
Map<String, String> properties = readProperties(slingId, resourceResolver);
// create a new instance (adds itself to the cluster in the constructor)
new DefaultInstanceDescription(cluster, isLeader, isOwn, slingId, properties);
}
logger.trace("asClusterView: returning {}", cluster);
InstanceDescription local = cluster.getLocalInstance();
if (local != null) {
return cluster;
} else {
logger.info("getClusterView: the local instance (" + getSlingId() + ") is currently not included in the existing established view! " + "This is normal at startup. At other times is pseudo-network-partitioning is an indicator for repository/network-delays or clocks-out-of-sync (SLING-3432). " + "(increasing the heartbeatTimeout can help as a workaround too) " + "The local instance will stay in TOPOLOGY_CHANGING or pre _INIT mode until a new vote was successful.");
throw new UndefinedClusterViewException(Reason.ISOLATED_FROM_TOPOLOGY, "established view does not include local instance - isolated");
}
}
use of org.apache.sling.discovery.base.commons.UndefinedClusterViewException in project sling by apache.
the class OakClusterViewService method getLocalClusterView.
@Override
public LocalClusterView getLocalClusterView() throws UndefinedClusterViewException {
logger.trace("getLocalClusterView: start");
ResourceResolver resourceResolver = null;
try {
resourceResolver = getResourceResolver();
DiscoveryLiteDescriptor descriptor = DiscoveryLiteDescriptor.getDescriptorFrom(resourceResolver);
if (lastSeqNum != descriptor.getSeqNum()) {
logger.info("getLocalClusterView: sequence number change detected - clearing idmap cache");
idMapService.clearCache();
lastSeqNum = descriptor.getSeqNum();
}
return asClusterView(descriptor, resourceResolver);
} catch (UndefinedClusterViewException e) {
logger.info("getLocalClusterView: undefined clusterView: " + e.getReason() + " - " + e.getMessage());
throw e;
} catch (Exception e) {
if (e.getMessage() != null && e.getMessage().contains("No Descriptor value available")) {
logger.warn("getLocalClusterView: repository exception: " + e);
} else {
logger.error("getLocalClusterView: repository exception: " + e, e);
}
throw new UndefinedClusterViewException(Reason.REPOSITORY_EXCEPTION, "Exception while processing descriptor: " + e);
} finally {
logger.trace("getLocalClusterView: end");
if (resourceResolver != null) {
resourceResolver.close();
}
}
}
Aggregations