Search in sources :

Example 16 with SolrZkClient

use of org.apache.solr.common.cloud.SolrZkClient in project lucene-solr by apache.

the class OverseerTest method testShardLeaderChange.

@Test
public void testShardLeaderChange() throws Exception {
    String zkDir = createTempDir("zkData").toFile().getAbsolutePath();
    final ZkTestServer server = new ZkTestServer(zkDir);
    SolrZkClient controllerClient = null;
    ZkStateReader reader = null;
    MockZKController mockController = null;
    MockZKController mockController2 = null;
    OverseerRestarter killer = null;
    Thread killerThread = null;
    try {
        server.run();
        controllerClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
        AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
        AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
        ZkController.createClusterZkNodes(controllerClient);
        killer = new OverseerRestarter(server.getZkAddress());
        killerThread = new Thread(killer);
        killerThread.start();
        reader = new ZkStateReader(controllerClient);
        reader.createClusterStateWatchersAndUpdate();
        for (int i = 0; i < atLeast(4); i++) {
            //for each round allow 1 kill
            killCounter.incrementAndGet();
            mockController = new MockZKController(server.getZkAddress(), "node1");
            mockController.publishState(COLLECTION, "core1", "node1", Replica.State.ACTIVE, 1);
            if (mockController2 != null) {
                mockController2.close();
                mockController2 = null;
            }
            mockController.publishState(COLLECTION, "core1", "node1", Replica.State.RECOVERING, 1);
            mockController2 = new MockZKController(server.getZkAddress(), "node2");
            mockController.publishState(COLLECTION, "core1", "node1", Replica.State.ACTIVE, 1);
            verifyShardLeader(reader, COLLECTION, "shard1", "core1");
            mockController2.publishState(COLLECTION, "core4", "node2", Replica.State.ACTIVE, 1);
            mockController.close();
            mockController = null;
            verifyShardLeader(reader, COLLECTION, "shard1", "core4");
        }
    } finally {
        if (killer != null) {
            killer.run = false;
            if (killerThread != null) {
                killerThread.join();
            }
        }
        close(mockController);
        close(mockController2);
        close(controllerClient);
        close(reader);
        server.shutdown();
    }
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) Test(org.junit.Test)

Example 17 with SolrZkClient

use of org.apache.solr.common.cloud.SolrZkClient in project lucene-solr by apache.

the class OverseerTest method testShardAssignmentBigger.

@Test
public void testShardAssignmentBigger() throws Exception {
    String zkDir = createTempDir("zkData").toFile().getAbsolutePath();
    //how many simulated nodes (num of threads)
    final int nodeCount = random().nextInt(TEST_NIGHTLY ? 50 : 10) + (TEST_NIGHTLY ? 50 : 10) + 1;
    //how many cores to register
    final int coreCount = random().nextInt(TEST_NIGHTLY ? 100 : 11) + (TEST_NIGHTLY ? 100 : 11) + 1;
    //how many slices
    final int sliceCount = random().nextInt(TEST_NIGHTLY ? 20 : 5) + 1;
    ZkTestServer server = new ZkTestServer(zkDir);
    SolrZkClient zkClient = null;
    ZkStateReader reader = null;
    SolrZkClient overseerClient = null;
    final MockZKController[] controllers = new MockZKController[nodeCount];
    final ExecutorService[] nodeExecutors = new ExecutorService[nodeCount];
    try {
        server.run();
        AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
        AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
        zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
        ZkController.createClusterZkNodes(zkClient);
        overseerClient = electNewOverseer(server.getZkAddress());
        reader = new ZkStateReader(zkClient);
        reader.createClusterStateWatchersAndUpdate();
        for (int i = 0; i < nodeCount; i++) {
            controllers[i] = new MockZKController(server.getZkAddress(), "node" + i);
        }
        for (int i = 0; i < nodeCount; i++) {
            nodeExecutors[i] = ExecutorUtil.newMDCAwareFixedThreadPool(1, new DefaultSolrThreadFactory("testShardAssignment"));
        }
        final String[] ids = new String[coreCount];
        //register total of coreCount cores
        for (int i = 0; i < coreCount; i++) {
            final int slot = i;
            nodeExecutors[i % nodeCount].submit((Runnable) () -> {
                final String coreName = "core" + slot;
                try {
                    ids[slot] = controllers[slot % nodeCount].publishState(COLLECTION, coreName, "node" + slot, Replica.State.ACTIVE, sliceCount);
                } catch (Throwable e) {
                    e.printStackTrace();
                    fail("register threw exception:" + e.getClass());
                }
            });
        }
        for (int i = 0; i < nodeCount; i++) {
            nodeExecutors[i].shutdown();
        }
        for (int i = 0; i < nodeCount; i++) {
            while (!nodeExecutors[i].awaitTermination(100, TimeUnit.MILLISECONDS)) ;
        }
        // make sure all cores have been assigned a id in cloudstate
        int cloudStateSliceCount = 0;
        for (int i = 0; i < 40; i++) {
            cloudStateSliceCount = 0;
            ClusterState state = reader.getClusterState();
            final Map<String, Slice> slices = state.getSlicesMap(COLLECTION);
            if (slices != null) {
                for (String name : slices.keySet()) {
                    cloudStateSliceCount += slices.get(name).getReplicasMap().size();
                }
                if (coreCount == cloudStateSliceCount)
                    break;
            }
            Thread.sleep(200);
        }
        assertEquals("Unable to verify all cores have been assigned an id in cloudstate", coreCount, cloudStateSliceCount);
        // make sure all cores have been returned an id
        int assignedCount = 0;
        for (int i = 0; i < 240; i++) {
            assignedCount = 0;
            for (int j = 0; j < coreCount; j++) {
                if (ids[j] != null) {
                    assignedCount++;
                }
            }
            if (coreCount == assignedCount) {
                break;
            }
            Thread.sleep(1000);
        }
        assertEquals("Unable to verify all cores have been returned an id", coreCount, assignedCount);
        final HashMap<String, AtomicInteger> counters = new HashMap<>();
        for (int i = 1; i < sliceCount + 1; i++) {
            counters.put("shard" + i, new AtomicInteger());
        }
        for (int i = 0; i < coreCount; i++) {
            final AtomicInteger ai = counters.get(ids[i]);
            assertNotNull("could not find counter for shard:" + ids[i], ai);
            ai.incrementAndGet();
        }
        for (String counter : counters.keySet()) {
            int count = counters.get(counter).intValue();
            int expectedCount = coreCount / sliceCount;
            int min = expectedCount - 1;
            int max = expectedCount + 1;
            if (count < min || count > max) {
                fail("Unevenly assigned shard ids, " + counter + " had " + count + ", expected: " + min + "-" + max);
            }
        }
        //make sure leaders are in cloud state
        for (int i = 0; i < sliceCount; i++) {
            assertNotNull(reader.getLeaderUrl(COLLECTION, "shard" + (i + 1), 15000));
        }
    } finally {
        close(zkClient);
        close(overseerClient);
        close(reader);
        for (int i = 0; i < controllers.length; i++) if (controllers[i] != null) {
            controllers[i].close();
        }
        server.shutdown();
        for (int i = 0; i < nodeCount; i++) {
            if (nodeExecutors[i] != null) {
                nodeExecutors[i].shutdownNow();
            }
        }
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) HashMap(java.util.HashMap) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Slice(org.apache.solr.common.cloud.Slice) ExecutorService(java.util.concurrent.ExecutorService) Test(org.junit.Test)

Example 18 with SolrZkClient

use of org.apache.solr.common.cloud.SolrZkClient in project lucene-solr by apache.

the class OverseerTest method testReplay.

@Test
public void testReplay() throws Exception {
    String zkDir = createTempDir().toFile().getAbsolutePath() + File.separator + "zookeeper/server1/data";
    ZkTestServer server = new ZkTestServer(zkDir);
    SolrZkClient zkClient = null;
    SolrZkClient overseerClient = null;
    ZkStateReader reader = null;
    try {
        server.run();
        zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
        AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
        AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
        ZkController.createClusterZkNodes(zkClient);
        reader = new ZkStateReader(zkClient);
        reader.createClusterStateWatchersAndUpdate();
        //prepopulate work queue with some items to emulate previous overseer died before persisting state
        DistributedQueue queue = Overseer.getInternalWorkQueue(zkClient, new Overseer.Stats());
        ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr", ZkStateReader.NODE_NAME_PROP, "node1", ZkStateReader.SHARD_ID_PROP, "s1", ZkStateReader.COLLECTION_PROP, COLLECTION, ZkStateReader.CORE_NAME_PROP, "core1", ZkStateReader.ROLES_PROP, "", ZkStateReader.STATE_PROP, Replica.State.RECOVERING.toString());
        queue.offer(Utils.toJSON(m));
        m = new ZkNodeProps(Overseer.QUEUE_OPERATION, "state", ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr", ZkStateReader.NODE_NAME_PROP, "node1", ZkStateReader.SHARD_ID_PROP, "s1", ZkStateReader.COLLECTION_PROP, COLLECTION, ZkStateReader.CORE_NAME_PROP, "core2", ZkStateReader.ROLES_PROP, "", ZkStateReader.STATE_PROP, Replica.State.RECOVERING.toString());
        queue.offer(Utils.toJSON(m));
        overseerClient = electNewOverseer(server.getZkAddress());
        //submit to proper queue
        queue = Overseer.getStateUpdateQueue(zkClient);
        m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr", ZkStateReader.NODE_NAME_PROP, "node1", ZkStateReader.SHARD_ID_PROP, "s1", ZkStateReader.COLLECTION_PROP, COLLECTION, ZkStateReader.CORE_NAME_PROP, "core3", ZkStateReader.ROLES_PROP, "", ZkStateReader.STATE_PROP, Replica.State.RECOVERING.toString());
        queue.offer(Utils.toJSON(m));
        for (int i = 0; i < 100; i++) {
            Slice s = reader.getClusterState().getSlice(COLLECTION, "s1");
            if (s != null && s.getReplicasMap().size() == 3)
                break;
            Thread.sleep(100);
        }
        assertNotNull(reader.getClusterState().getSlice(COLLECTION, "s1"));
        assertEquals(3, reader.getClusterState().getSlice(COLLECTION, "s1").getReplicasMap().size());
    } finally {
        close(overseerClient);
        close(zkClient);
        close(reader);
        server.shutdown();
    }
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Slice(org.apache.solr.common.cloud.Slice) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) Test(org.junit.Test)

Example 19 with SolrZkClient

use of org.apache.solr.common.cloud.SolrZkClient in project lucene-solr by apache.

the class OverseerTest method testOverseerStatsReset.

@Test
public void testOverseerStatsReset() throws Exception {
    String zkDir = createTempDir("zkData").toFile().getAbsolutePath();
    ZkTestServer server = new ZkTestServer(zkDir);
    ZkStateReader reader = null;
    MockZKController mockController = null;
    SolrZkClient zkClient = null;
    try {
        server.run();
        AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
        AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
        zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
        ZkController.createClusterZkNodes(zkClient);
        reader = new ZkStateReader(zkClient);
        reader.createClusterStateWatchersAndUpdate();
        mockController = new MockZKController(server.getZkAddress(), "node1");
        LeaderElector overseerElector = new LeaderElector(zkClient);
        if (overseers.size() > 0) {
            overseers.get(overseers.size() - 1).close();
            overseers.get(overseers.size() - 1).getZkStateReader().getZkClient().close();
        }
        UpdateShardHandler updateShardHandler = new UpdateShardHandler(UpdateShardHandlerConfig.DEFAULT);
        updateShardHandlers.add(updateShardHandler);
        HttpShardHandlerFactory httpShardHandlerFactory = new HttpShardHandlerFactory();
        httpShardHandlerFactorys.add(httpShardHandlerFactory);
        Overseer overseer = new Overseer(httpShardHandlerFactory.getShardHandler(), updateShardHandler, "/admin/cores", reader, null, new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "").build());
        overseers.add(overseer);
        ElectionContext ec = new OverseerElectionContext(zkClient, overseer, server.getZkAddress().replaceAll("/", "_"));
        overseerElector.setup(ec);
        overseerElector.joinElection(ec, false);
        mockController.publishState(COLLECTION, "core1", "core_node1", Replica.State.ACTIVE, 1);
        assertNotNull(overseer.getStats());
        assertTrue((overseer.getStats().getSuccessCount(OverseerAction.STATE.toLower())) > 0);
        // shut it down
        overseer.close();
        ec.cancelElection();
        // start it again
        overseerElector.setup(ec);
        overseerElector.joinElection(ec, false);
        assertNotNull(overseer.getStats());
        assertEquals(0, (overseer.getStats().getSuccessCount(OverseerAction.STATE.toLower())));
    } finally {
        close(mockController);
        close(zkClient);
        close(reader);
        server.shutdown();
    }
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) UpdateShardHandler(org.apache.solr.update.UpdateShardHandler) HttpShardHandlerFactory(org.apache.solr.handler.component.HttpShardHandlerFactory) Test(org.junit.Test)

Example 20 with SolrZkClient

use of org.apache.solr.common.cloud.SolrZkClient in project lucene-solr by apache.

the class OverseerTest method testStateChange.

@Test
public void testStateChange() throws Exception {
    String zkDir = createTempDir("zkData").toFile().getAbsolutePath();
    ZkTestServer server = new ZkTestServer(zkDir);
    SolrZkClient zkClient = null;
    ZkStateReader reader = null;
    SolrZkClient overseerClient = null;
    try {
        server.run();
        zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
        AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
        AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
        ZkController.createClusterZkNodes(zkClient);
        reader = new ZkStateReader(zkClient);
        reader.createClusterStateWatchersAndUpdate();
        overseerClient = electNewOverseer(server.getZkAddress());
        DistributedQueue q = Overseer.getStateUpdateQueue(zkClient);
        ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr", ZkStateReader.NODE_NAME_PROP, "node1", ZkStateReader.COLLECTION_PROP, COLLECTION, ZkStateReader.CORE_NAME_PROP, "core1", ZkStateReader.ROLES_PROP, "", ZkStateReader.STATE_PROP, Replica.State.RECOVERING.toString());
        q.offer(Utils.toJSON(m));
        waitForCollections(reader, COLLECTION);
        assertSame(reader.getClusterState().toString(), Replica.State.RECOVERING, reader.getClusterState().getSlice(COLLECTION, "shard1").getReplica("core_node1").getState());
        //publish node state (active)
        m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr", ZkStateReader.NODE_NAME_PROP, "node1", ZkStateReader.COLLECTION_PROP, COLLECTION, ZkStateReader.CORE_NAME_PROP, "core1", ZkStateReader.ROLES_PROP, "", ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString());
        q.offer(Utils.toJSON(m));
        verifyReplicaStatus(reader, "collection1", "shard1", "core_node1", Replica.State.ACTIVE);
    } finally {
        close(zkClient);
        close(overseerClient);
        close(reader);
        server.shutdown();
    }
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) Test(org.junit.Test)

Aggregations

SolrZkClient (org.apache.solr.common.cloud.SolrZkClient)130 Test (org.junit.Test)46 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)34 HashMap (java.util.HashMap)21 KeeperException (org.apache.zookeeper.KeeperException)18 SolrException (org.apache.solr.common.SolrException)15 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)14 IOException (java.io.IOException)13 ClusterState (org.apache.solr.common.cloud.ClusterState)13 DocCollection (org.apache.solr.common.cloud.DocCollection)12 Map (java.util.Map)11 Slice (org.apache.solr.common.cloud.Slice)11 Replica (org.apache.solr.common.cloud.Replica)10 ArrayList (java.util.ArrayList)9 CloudSolrClient (org.apache.solr.client.solrj.impl.CloudSolrClient)8 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)8 Overseer (org.apache.solr.cloud.Overseer)8 ZkTestServer (org.apache.solr.cloud.ZkTestServer)8 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)7 NamedList (org.apache.solr.common.util.NamedList)7