Search in sources :

Example 1 with UndefinedClusterViewException

use of org.apache.sling.discovery.base.commons.UndefinedClusterViewException in project sling by apache.

the class AbstractClusterTest method pingConnector.

private boolean pingConnector(final VirtualInstance from, final VirtualInstance to) throws UndefinedClusterViewException {
    final Announcement fromAnnouncement = createFromAnnouncement(from);
    Announcement replyAnnouncement = null;
    try {
        replyAnnouncement = ping(to, fromAnnouncement);
    } catch (AssertionError e) {
        logger.warn("pingConnector: ping failed, assertionError: " + e);
        return false;
    } catch (UndefinedClusterViewException e) {
        logger.warn("pingConnector: ping failed, currently the cluster view is undefined: " + e);
        return false;
    }
    registerReplyAnnouncement(from, replyAnnouncement);
    return true;
}
Also used : Announcement(org.apache.sling.discovery.base.connectors.announcement.Announcement) UndefinedClusterViewException(org.apache.sling.discovery.base.commons.UndefinedClusterViewException)

Example 2 with UndefinedClusterViewException

use of org.apache.sling.discovery.base.commons.UndefinedClusterViewException in project sling by apache.

the class AbstractClusterTest method testConnectorSwitching4139.

/*    ok, this test should do the following:
         * cluster A with instance 1 and instance 2
         * cluster B with instance 3 and instance 4
         * cluster C with instance 5

         * initially, instance3 is pinging instance1, and instance 5 is pinging instance1 as well (MAC hub)
          * that should result in instance3 and 5 to inherit the rest from instance1
         * then simulate load balancer switching from instance1 to instance2 - hence pings go to instance2
         *
         */
//TODO: this takes env 45sec
@Category(Slow.class)
@Test
public void testConnectorSwitching4139() throws Throwable {
    final int MIN_EVENT_DELAY = 1;
    // reset any setup that was done - we start with a different setup than the default one
    tearDown();
    final org.apache.log4j.Logger discoveryLogger = RootLogger.getLogger("org.apache.sling.discovery");
    logLevel = discoveryLogger.getLevel();
    discoveryLogger.setLevel(Level.DEBUG);
    instance1 = newBuilder().setDebugName("instance1").newRepository("/var/discovery/clusterA/", true).setConnectorPingTimeout(10).setConnectorPingInterval(999).setMinEventDelay(MIN_EVENT_DELAY).build();
    instance2 = newBuilder().setDebugName("instance2").useRepositoryOf(instance1).setConnectorPingTimeout(10).setConnectorPingInterval(999).setMinEventDelay(MIN_EVENT_DELAY).build();
    // now launch the remote instance
    instance3 = newBuilder().setDebugName("instance3").newRepository("/var/discovery/clusterB/", false).setConnectorPingTimeout(10).setConnectorPingInterval(999).setMinEventDelay(MIN_EVENT_DELAY).build();
    instance4 = newBuilder().setDebugName("instance4").useRepositoryOf(instance3).setConnectorPingTimeout(10).setConnectorPingInterval(999).setMinEventDelay(MIN_EVENT_DELAY).build();
    instance5 = newBuilder().setDebugName("instance5").newRepository("/var/discovery/clusterC/", false).setConnectorPingTimeout(10).setConnectorPingInterval(999).setMinEventDelay(MIN_EVENT_DELAY).build();
    // join the instances to form a cluster by sending out heartbeats
    runHeartbeatOnceWith(instance1, instance2, instance3, instance4, instance5);
    Thread.sleep(500);
    runHeartbeatOnceWith(instance1, instance2, instance3, instance4, instance5);
    Thread.sleep(500);
    runHeartbeatOnceWith(instance1, instance2, instance3, instance4, instance5);
    Thread.sleep(500);
    assertSameTopology(new SimpleClusterView(instance1, instance2));
    assertSameTopology(new SimpleClusterView(instance3, instance4));
    assertSameTopology(new SimpleClusterView(instance5));
    // create a topology connector from instance3 to instance1
    // -> corresponds to starting to ping
    runHeartbeatOnceWith(instance1, instance2, instance3, instance4, instance5);
    pingConnector(instance3, instance1);
    pingConnector(instance5, instance1);
    Thread.sleep(500);
    runHeartbeatOnceWith(instance1, instance2, instance3, instance4, instance5);
    pingConnector(instance3, instance1);
    pingConnector(instance5, instance1);
    Thread.sleep(500);
    // make asserts on the topology
    logger.info("testConnectorSwitching4139: instance1.slingId=" + instance1.slingId);
    logger.info("testConnectorSwitching4139: instance2.slingId=" + instance2.slingId);
    logger.info("testConnectorSwitching4139: instance3.slingId=" + instance3.slingId);
    logger.info("testConnectorSwitching4139: instance4.slingId=" + instance4.slingId);
    logger.info("testConnectorSwitching4139: instance5.slingId=" + instance5.slingId);
    instance1.dumpRepo();
    assertSameTopology(new SimpleClusterView(instance1, instance2), new SimpleClusterView(instance3, instance4), new SimpleClusterView(instance5));
    // simulate a crash of instance1, resulting in load-balancer to switch the pings
    boolean success = false;
    for (int i = 0; i < 25; i++) {
        // loop for max 25 times, min 20 times
        runHeartbeatOnceWith(instance2, instance3, instance4, instance5);
        final boolean ping1 = pingConnector(instance3, instance2);
        final boolean ping2 = pingConnector(instance5, instance2);
        if (ping1 && ping2) {
            // both pings were fine - hence break
            success = true;
            logger.info("testConnectorSwitching4139: successfully switched all pings to instance2 after " + i + " rounds.");
            if (i < 20) {
                logger.info("testConnectorSwitching4139: min loop cnt not yet reached: i=" + i);
                // 20x1000ms = 20sec max - (vs 10sec timeout) - should be enough for timing out
                Thread.sleep(1000);
                continue;
            }
            break;
        }
        logger.info("testConnectorSwitching4139: looping cos ping1=" + ping1 + ", ping2=" + ping2);
        // 25x1000ms = 25sec max - (vs 10sec timeout)
        Thread.sleep(1000);
    }
    assertTrue(success);
    // one final heartbeat
    runHeartbeatOnceWith(instance2, instance3, instance4, instance5);
    assertTrue(pingConnector(instance3, instance2));
    assertTrue(pingConnector(instance5, instance2));
    instance2.dumpRepo();
    assertSameTopology(new SimpleClusterView(instance2), new SimpleClusterView(instance3, instance4), new SimpleClusterView(instance5));
    // restart instance1, crash instance4
    instance4.stopViewChecker();
    instance1Restarted = newBuilder().setDebugName("instance1").useRepositoryOf(instance2).setConnectorPingTimeout(Integer.MAX_VALUE).setMinEventDelay(1).setSlingId(instance1.getSlingId()).build();
    runHeartbeatOnceWith(instance1Restarted, instance2, instance3, instance5);
    // give these heartbeats/votes some time .. so sleep 2sec (timeout is 10sec, so should be safe)
    Thread.sleep(2000);
    assertTrue(pingConnector(instance3, instance2));
    assertTrue(pingConnector(instance5, instance2));
    success = false;
    for (int i = 0; i < 40; i++) {
        runHeartbeatOnceWith(instance1Restarted, instance2, instance3, instance5);
        instance1.getViewChecker().checkView();
        // since instance3 *can* have an undefined cluster view..
        try {
            pingConnector(instance3, instance2);
        } catch (UndefinedClusterViewException ucve) {
        // ignore
        }
        pingConnector(instance5, instance2);
        final TopologyView topology = instance3.getDiscoveryService().getTopology();
        InstanceDescription i3 = null;
        for (Iterator<InstanceDescription> it = topology.getInstances().iterator(); it.hasNext(); ) {
            final InstanceDescription id = it.next();
            if (id.getSlingId().equals(instance3.slingId)) {
                i3 = id;
                break;
            }
        }
        assertNotNull(i3);
        assertEquals(instance3.slingId, i3.getSlingId());
        final ClusterView i3Cluster = i3.getClusterView();
        final int i3ClusterSize = i3Cluster.getInstances().size();
        if (i3ClusterSize == 1) {
            if (i < 30) {
                logger.info("testConnectorSwitching4139: [2] min loop cnt not yet reached: i=" + i);
                // 30x500ms = 15sec max - (vs 10sec-2sec[sleep] timeout) - should be enough for timing out
                Thread.sleep(500);
                continue;
            }
            success = true;
            logger.info("testConnectorSwitching4139: i3ClusterSize: " + i3ClusterSize + ", i=" + i + " (success)");
            break;
        }
        logger.info("testConnectorSwitching4139: i3ClusterSize: " + i3ClusterSize + ", i=" + i);
        Thread.sleep(500);
    }
    logger.info("testConnectorSwitching4139: instance1Restarted.slingId=" + instance1Restarted.slingId);
    logger.info("testConnectorSwitching4139: instance2.slingId=" + instance2.slingId);
    logger.info("testConnectorSwitching4139: instance3.slingId=" + instance3.slingId);
    logger.info("testConnectorSwitching4139: instance4.slingId=" + instance4.slingId);
    logger.info("testConnectorSwitching4139: instance5.slingId=" + instance5.slingId);
    instance1Restarted.dumpRepo();
    assertTrue(success);
    assertSameTopology(new SimpleClusterView(instance1Restarted, instance2), new SimpleClusterView(instance3), new SimpleClusterView(instance5));
    instance1Restarted.stop();
}
Also used : ClusterView(org.apache.sling.discovery.ClusterView) UndefinedClusterViewException(org.apache.sling.discovery.base.commons.UndefinedClusterViewException) InstanceDescription(org.apache.sling.discovery.InstanceDescription) TopologyView(org.apache.sling.discovery.TopologyView) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 3 with UndefinedClusterViewException

use of org.apache.sling.discovery.base.commons.UndefinedClusterViewException in project sling by apache.

the class AbstractSingleInstanceTest method testBootstrap.

@Test
public void testBootstrap() throws Throwable {
    logger.info("testBootstrap: start");
    try {
        instance.getClusterViewService().getLocalClusterView();
        fail("should complain");
    } catch (UndefinedClusterViewException e) {
    // SLING-5030 : isolated mode is gone, replaced with exception
    // ok
    }
    // SLING-3750 : with delaying the init event, we now should NOT get any events
    // before we let the view establish (which happens via heartbeats below)
    AssertingTopologyEventListener ada = new AssertingTopologyEventListener();
    instance.bindTopologyEventListener(ada);
    assertEquals(0, ada.getEvents().size());
    assertEquals(0, ada.getUnexpectedCount());
    try {
        instance.getClusterViewService().getLocalClusterView();
        fail("should complain");
    } catch (UndefinedClusterViewException e) {
    // ok
    }
    ada.addExpected(Type.TOPOLOGY_INIT);
    instance.heartbeatsAndCheckView();
    Thread.sleep(1000);
    instance.heartbeatsAndCheckView();
    Thread.sleep(1000);
    logger.info("testBoostrap: dumping repo...");
    instance.dumpRepo();
    logger.info("testBoostrap: dumping listener...");
    ada.dump();
    assertEquals(0, ada.getUnexpectedCount());
    assertEquals(1, ada.getEvents().size());
    TopologyEvent initEvent = ada.getEvents().remove(0);
    assertNotNull(initEvent);
    assertNotNull(initEvent.getNewView());
    assertNotNull(initEvent.getNewView().getClusterViews());
    // after the view was established though, we expect it to be a normal
    // EstablishedInstanceDescription
    instance.assertEstablishedView();
    logger.info("testBootstrap: end");
}
Also used : TopologyEvent(org.apache.sling.discovery.TopologyEvent) AssertingTopologyEventListener(org.apache.sling.discovery.base.its.setup.mock.AssertingTopologyEventListener) UndefinedClusterViewException(org.apache.sling.discovery.base.commons.UndefinedClusterViewException) Test(org.junit.Test)

Example 4 with UndefinedClusterViewException

use of org.apache.sling.discovery.base.commons.UndefinedClusterViewException in project sling by apache.

the class OakClusterViewService method asClusterView.

private LocalClusterView asClusterView(DiscoveryLiteDescriptor descriptor, ResourceResolver resourceResolver) throws Exception {
    if (descriptor == null) {
        throw new IllegalArgumentException("descriptor must not be null");
    }
    if (resourceResolver == null) {
        throw new IllegalArgumentException("resourceResolver must not be null");
    }
    logger.trace("asClusterView: start");
    String clusterViewId = descriptor.getViewId();
    if (clusterViewId == null || clusterViewId.length() == 0) {
        logger.trace("asClusterView: no clusterId provided by discovery-lite descriptor - reading from repo.");
        clusterViewId = readOrDefineClusterId(resourceResolver);
    }
    String localClusterSyncTokenId = /*descriptor.getViewId()+"_"+*/
    String.valueOf(descriptor.getSeqNum());
    if (!descriptor.isFinal()) {
        throw new UndefinedClusterViewException(Reason.NO_ESTABLISHED_VIEW, "descriptor is not yet final: " + descriptor);
    }
    LocalClusterView cluster = new LocalClusterView(clusterViewId, localClusterSyncTokenId);
    long me = descriptor.getMyId();
    int[] activeIds = descriptor.getActiveIds();
    if (activeIds == null || activeIds.length == 0) {
        throw new UndefinedClusterViewException(Reason.NO_ESTABLISHED_VIEW, "Descriptor contained no active ids: " + descriptor.getDescriptorStr());
    }
    // convert int[] to List<Integer>
    //TODO: could use Guava's Ints class here..
    List<Integer> activeIdsList = new LinkedList<Integer>();
    for (Integer integer : activeIds) {
        activeIdsList.add(integer);
    }
    // step 1: sort activeIds by their leaderElectionId
    //   serves two purposes: pos[0] is then leader
    //   and the rest are properly sorted within the cluster
    final Map<Integer, String> leaderElectionIds = new HashMap<Integer, String>();
    for (Integer id : activeIdsList) {
        String slingId = idMapService.toSlingId(id, resourceResolver);
        if (slingId == null) {
            idMapService.clearCache();
            throw new UndefinedClusterViewException(Reason.NO_ESTABLISHED_VIEW, "no slingId mapped for clusterNodeId=" + id);
        }
        String leaderElectionId = getLeaderElectionId(resourceResolver, slingId);
        leaderElectionIds.put(id, leaderElectionId);
    }
    Collections.sort(activeIdsList, new Comparator<Integer>() {

        @Override
        public int compare(Integer arg0, Integer arg1) {
            return leaderElectionIds.get(arg0).compareTo(leaderElectionIds.get(arg1));
        }
    });
    for (int i = 0; i < activeIdsList.size(); i++) {
        int id = activeIdsList.get(i);
        // thx to sorting above [0] is leader indeed
        boolean isLeader = i == 0;
        boolean isOwn = id == me;
        String slingId = idMapService.toSlingId(id, resourceResolver);
        if (slingId == null) {
            idMapService.clearCache();
            logger.info("asClusterView: cannot resolve oak-clusterNodeId {} to a slingId", id);
            throw new Exception("Cannot resolve oak-clusterNodeId " + id + " to a slingId");
        }
        Map<String, String> properties = readProperties(slingId, resourceResolver);
        // create a new instance (adds itself to the cluster in the constructor)
        new DefaultInstanceDescription(cluster, isLeader, isOwn, slingId, properties);
    }
    logger.trace("asClusterView: returning {}", cluster);
    InstanceDescription local = cluster.getLocalInstance();
    if (local != null) {
        return cluster;
    } else {
        logger.info("getClusterView: the local instance (" + getSlingId() + ") is currently not included in the existing established view! " + "This is normal at startup. At other times is pseudo-network-partitioning is an indicator for repository/network-delays or clocks-out-of-sync (SLING-3432). " + "(increasing the heartbeatTimeout can help as a workaround too) " + "The local instance will stay in TOPOLOGY_CHANGING or pre _INIT mode until a new vote was successful.");
        throw new UndefinedClusterViewException(Reason.ISOLATED_FROM_TOPOLOGY, "established view does not include local instance - isolated");
    }
}
Also used : HashMap(java.util.HashMap) DefaultInstanceDescription(org.apache.sling.discovery.commons.providers.DefaultInstanceDescription) LinkedList(java.util.LinkedList) PersistenceException(org.apache.sling.api.resource.PersistenceException) LoginException(org.apache.sling.api.resource.LoginException) UndefinedClusterViewException(org.apache.sling.discovery.base.commons.UndefinedClusterViewException) LocalClusterView(org.apache.sling.discovery.commons.providers.spi.LocalClusterView) UndefinedClusterViewException(org.apache.sling.discovery.base.commons.UndefinedClusterViewException) DefaultInstanceDescription(org.apache.sling.discovery.commons.providers.DefaultInstanceDescription) InstanceDescription(org.apache.sling.discovery.InstanceDescription)

Example 5 with UndefinedClusterViewException

use of org.apache.sling.discovery.base.commons.UndefinedClusterViewException in project sling by apache.

the class OakClusterViewService method getLocalClusterView.

@Override
public LocalClusterView getLocalClusterView() throws UndefinedClusterViewException {
    logger.trace("getLocalClusterView: start");
    ResourceResolver resourceResolver = null;
    try {
        resourceResolver = getResourceResolver();
        DiscoveryLiteDescriptor descriptor = DiscoveryLiteDescriptor.getDescriptorFrom(resourceResolver);
        if (lastSeqNum != descriptor.getSeqNum()) {
            logger.info("getLocalClusterView: sequence number change detected - clearing idmap cache");
            idMapService.clearCache();
            lastSeqNum = descriptor.getSeqNum();
        }
        return asClusterView(descriptor, resourceResolver);
    } catch (UndefinedClusterViewException e) {
        logger.info("getLocalClusterView: undefined clusterView: " + e.getReason() + " - " + e.getMessage());
        throw e;
    } catch (Exception e) {
        if (e.getMessage() != null && e.getMessage().contains("No Descriptor value available")) {
            logger.warn("getLocalClusterView: repository exception: " + e);
        } else {
            logger.error("getLocalClusterView: repository exception: " + e, e);
        }
        throw new UndefinedClusterViewException(Reason.REPOSITORY_EXCEPTION, "Exception while processing descriptor: " + e);
    } finally {
        logger.trace("getLocalClusterView: end");
        if (resourceResolver != null) {
            resourceResolver.close();
        }
    }
}
Also used : ResourceResolver(org.apache.sling.api.resource.ResourceResolver) DiscoveryLiteDescriptor(org.apache.sling.discovery.commons.providers.spi.base.DiscoveryLiteDescriptor) UndefinedClusterViewException(org.apache.sling.discovery.base.commons.UndefinedClusterViewException) PersistenceException(org.apache.sling.api.resource.PersistenceException) LoginException(org.apache.sling.api.resource.LoginException) UndefinedClusterViewException(org.apache.sling.discovery.base.commons.UndefinedClusterViewException)

Aggregations

UndefinedClusterViewException (org.apache.sling.discovery.base.commons.UndefinedClusterViewException)12 InstanceDescription (org.apache.sling.discovery.InstanceDescription)5 Test (org.junit.Test)5 ClusterView (org.apache.sling.discovery.ClusterView)4 LoginException (org.apache.sling.api.resource.LoginException)3 Announcement (org.apache.sling.discovery.base.connectors.announcement.Announcement)3 LocalClusterView (org.apache.sling.discovery.commons.providers.spi.LocalClusterView)3 GZIPOutputStream (java.util.zip.GZIPOutputStream)2 JsonException (javax.json.JsonException)2 PersistenceException (org.apache.sling.api.resource.PersistenceException)2 ResourceResolver (org.apache.sling.api.resource.ResourceResolver)2 AnnouncementFilter (org.apache.sling.discovery.base.connectors.announcement.AnnouncementFilter)2 AssertingTopologyEventListener (org.apache.sling.discovery.base.its.setup.mock.AssertingTopologyEventListener)2 Category (org.junit.experimental.categories.Category)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 PrintWriter (java.io.PrintWriter)1 Date (java.util.Date)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1