Search in sources :

Example 61 with ZkStateReader

use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.

the class TopicStream method constructStreams.

protected void constructStreams() throws IOException {
    try {
        ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
        Collection<Slice> slices = CloudSolrStream.getSlices(this.collection, zkStateReader, false);
        ModifiableSolrParams mParams = new ModifiableSolrParams(params);
        // We are the aggregator.
        mParams.set(DISTRIB, "false");
        String fl = mParams.get("fl");
        mParams.set(SORT, "_version_ asc");
        if (!fl.contains(VERSION_FIELD)) {
            fl += ",_version_";
        }
        mParams.set("fl", fl);
        Random random = new Random();
        ClusterState clusterState = zkStateReader.getClusterState();
        Set<String> liveNodes = clusterState.getLiveNodes();
        for (Slice slice : slices) {
            ModifiableSolrParams localParams = new ModifiableSolrParams(mParams);
            long checkpoint = checkpoints.get(slice.getName());
            Collection<Replica> replicas = slice.getReplicas();
            List<Replica> shuffler = new ArrayList<>();
            for (Replica replica : replicas) {
                if (replica.getState() == Replica.State.ACTIVE && liveNodes.contains(replica.getNodeName()))
                    shuffler.add(replica);
            }
            Replica rep = shuffler.get(random.nextInt(shuffler.size()));
            ZkCoreNodeProps zkProps = new ZkCoreNodeProps(rep);
            String url = zkProps.getCoreUrl();
            SolrStream solrStream = new SolrStream(url, localParams);
            solrStream.setSlice(slice.getName());
            solrStream.setCheckpoint(checkpoint);
            solrStream.setTrace(true);
            if (streamContext != null) {
                solrStream.setStreamContext(streamContext);
            }
            solrStreams.add(solrStream);
        }
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Replica(org.apache.solr.common.cloud.Replica) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) IOException(java.io.IOException) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Random(java.util.Random) Slice(org.apache.solr.common.cloud.Slice)

Example 62 with ZkStateReader

use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.

the class TopicStream method getPersistedCheckpoints.

private void getPersistedCheckpoints() throws IOException {
    ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
    Collection<Slice> slices = CloudSolrStream.getSlices(checkpointCollection, zkStateReader, false);
    ClusterState clusterState = zkStateReader.getClusterState();
    Set<String> liveNodes = clusterState.getLiveNodes();
    OUTER: for (Slice slice : slices) {
        Collection<Replica> replicas = slice.getReplicas();
        for (Replica replica : replicas) {
            if (replica.getState() == Replica.State.ACTIVE && liveNodes.contains(replica.getNodeName())) {
                HttpSolrClient httpClient = streamContext.getSolrClientCache().getHttpSolrClient(replica.getCoreUrl());
                try {
                    SolrDocument doc = httpClient.getById(id);
                    if (doc != null) {
                        List<String> checkpoints = (List<String>) doc.getFieldValue("checkpoint_ss");
                        for (String checkpoint : checkpoints) {
                            String[] pair = checkpoint.split("~");
                            this.checkpoints.put(pair[0], Long.parseLong(pair[1]));
                        }
                    }
                } catch (Exception e) {
                    throw new IOException(e);
                }
                break OUTER;
            }
        }
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) IOException(java.io.IOException) Replica(org.apache.solr.common.cloud.Replica) IOException(java.io.IOException) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) SolrDocument(org.apache.solr.common.SolrDocument) Slice(org.apache.solr.common.cloud.Slice) Collection(java.util.Collection) ArrayList(java.util.ArrayList) List(java.util.List)

Example 63 with ZkStateReader

use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.

the class ChaosMonkeyNothingIsSafeWithPullReplicasTest method test.

@Test
public void test() throws Exception {
    cloudClient.setSoTimeout(clientSoTimeout);
    DocCollection docCollection = cloudClient.getZkStateReader().getClusterState().getCollection(DEFAULT_COLLECTION);
    assertEquals(this.sliceCount, docCollection.getSlices().size());
    Slice s = docCollection.getSlice("shard1");
    assertNotNull(s);
    assertEquals("Unexpected number of replicas. Collection: " + docCollection, numRealtimeOrTlogReplicas + numPullReplicas, s.getReplicas().size());
    assertEquals("Unexpected number of pull replicas. Collection: " + docCollection, numPullReplicas, s.getReplicas(EnumSet.of(Replica.Type.PULL)).size());
    assertEquals(useTlogReplicas() ? 0 : numRealtimeOrTlogReplicas, s.getReplicas(EnumSet.of(Replica.Type.NRT)).size());
    assertEquals(useTlogReplicas() ? numRealtimeOrTlogReplicas : 0, s.getReplicas(EnumSet.of(Replica.Type.TLOG)).size());
    boolean testSuccessful = false;
    try {
        handle.clear();
        handle.put("timestamp", SKIPVAL);
        ZkStateReader zkStateReader = cloudClient.getZkStateReader();
        // make sure we have leaders for each shard
        for (int j = 1; j < sliceCount; j++) {
            zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 10000);
        }
        // make sure we again have leaders for each shard
        waitForRecoveriesToFinish(false);
        // we cannot do delete by query
        // as it's not supported for recovery
        del("*:*");
        List<StoppableThread> threads = new ArrayList<>();
        List<StoppableIndexingThread> indexTreads = new ArrayList<>();
        int threadCount = TEST_NIGHTLY ? 3 : 1;
        int i = 0;
        for (i = 0; i < threadCount; i++) {
            StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
            threads.add(indexThread);
            indexTreads.add(indexThread);
            indexThread.start();
        }
        threadCount = 1;
        i = 0;
        for (i = 0; i < threadCount; i++) {
            StoppableSearchThread searchThread = new StoppableSearchThread(cloudClient);
            threads.add(searchThread);
            searchThread.start();
        }
        if (usually()) {
            StoppableCommitThread commitThread = new StoppableCommitThread(cloudClient, 1000, false);
            threads.add(commitThread);
            commitThread.start();
        }
        // TODO: we only do this sometimes so that we can sometimes compare against control,
        // it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
        boolean runFullThrottle = random().nextBoolean();
        if (runFullThrottle) {
            FullThrottleStoppableIndexingThread ftIndexThread = new FullThrottleStoppableIndexingThread(controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
            threads.add(ftIndexThread);
            ftIndexThread.start();
        }
        chaosMonkey.startTheMonkey(true, 10000);
        try {
            long runLength;
            if (RUN_LENGTH != -1) {
                runLength = RUN_LENGTH;
            } else {
                int[] runTimes;
                if (TEST_NIGHTLY) {
                    runTimes = new int[] { 5000, 6000, 10000, 15000, 25000, 30000, 30000, 45000, 90000, 120000 };
                } else {
                    runTimes = new int[] { 5000, 7000, 15000 };
                }
                runLength = runTimes[random().nextInt(runTimes.length - 1)];
            }
            ChaosMonkey.wait(runLength, DEFAULT_COLLECTION, zkStateReader);
        } finally {
            chaosMonkey.stopTheMonkey();
        }
        // ideally this should go into chaosMonkey
        restartZk(1000 * (5 + random().nextInt(4)));
        for (StoppableThread indexThread : threads) {
            indexThread.safeStop();
        }
        // wait for stop...
        for (StoppableThread indexThread : threads) {
            indexThread.join();
        }
        // try and wait for any replications and what not to finish...
        ChaosMonkey.wait(2000, DEFAULT_COLLECTION, zkStateReader);
        // wait until there are no recoveries...
        //Math.round((runLength / 1000.0f / 3.0f)));
        waitForThingsToLevelOut(Integer.MAX_VALUE);
        // make sure we again have leaders for each shard
        for (int j = 1; j < sliceCount; j++) {
            zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 30000);
        }
        commit();
        // TODO: assert we didnt kill everyone
        zkStateReader.updateLiveNodes();
        assertTrue(zkStateReader.getClusterState().getLiveNodes().size() > 0);
        // we expect full throttle fails, but cloud client should not easily fail
        for (StoppableThread indexThread : threads) {
            if (indexThread instanceof StoppableIndexingThread && !(indexThread instanceof FullThrottleStoppableIndexingThread)) {
                int failCount = ((StoppableIndexingThread) indexThread).getFailCount();
                assertFalse("There were too many update fails (" + failCount + " > " + FAIL_TOLERANCE + ") - we expect it can happen, but shouldn't easily", failCount > FAIL_TOLERANCE);
            }
        }
        waitForReplicationFromReplicas(DEFAULT_COLLECTION, zkStateReader, new TimeOut(30, TimeUnit.SECONDS));
        //      waitForAllWarmingSearchers();
        Set<String> addFails = getAddFails(indexTreads);
        Set<String> deleteFails = getDeleteFails(indexTreads);
        // full throttle thread can
        // have request fails
        checkShardConsistency(!runFullThrottle, true, addFails, deleteFails);
        long ctrlDocs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
        // ensure we have added more than 0 docs
        long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
        assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0);
        if (VERBOSE)
            System.out.println("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
        // sometimes we restart zookeeper as well
        if (random().nextBoolean()) {
            restartZk(1000 * (5 + random().nextInt(4)));
        }
        try (CloudSolrClient client = createCloudClient("collection1")) {
            // We don't really know how many live nodes we have at this point, so "maxShardsPerNode" needs to be > 1
            createCollection(null, "testcollection", 1, 1, 10, client, null, "conf1");
        }
        List<Integer> numShardsNumReplicas = new ArrayList<>(2);
        numShardsNumReplicas.add(1);
        numShardsNumReplicas.add(1 + getPullReplicaCount());
        checkForCollection("testcollection", numShardsNumReplicas, null);
        testSuccessful = true;
    } finally {
        if (!testSuccessful) {
            logReplicaTypesReplicationInfo(DEFAULT_COLLECTION, cloudClient.getZkStateReader());
            printLayout();
        }
    }
}
Also used : TimeOut(org.apache.solr.util.TimeOut) ArrayList(java.util.ArrayList) SolrQuery(org.apache.solr.client.solrj.SolrQuery) CloudSolrClient(org.apache.solr.client.solrj.impl.CloudSolrClient) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Slice(org.apache.solr.common.cloud.Slice) DocCollection(org.apache.solr.common.cloud.DocCollection) Test(org.junit.Test)

Example 64 with ZkStateReader

use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.

the class ChaosMonkeyShardSplitTest method electNewOverseer.

/**
   * Elects a new overseer
   *
   * @return SolrZkClient
   */
private SolrZkClient electNewOverseer(String address) throws KeeperException, InterruptedException, IOException {
    SolrZkClient zkClient = new SolrZkClient(address, TIMEOUT);
    ZkStateReader reader = new ZkStateReader(zkClient);
    LeaderElector overseerElector = new LeaderElector(zkClient);
    UpdateShardHandler updateShardHandler = new UpdateShardHandler(UpdateShardHandlerConfig.DEFAULT);
    // TODO: close Overseer
    Overseer overseer = new Overseer(new HttpShardHandlerFactory().getShardHandler(), updateShardHandler, "/admin/cores", reader, null, new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "solr").build());
    overseer.close();
    ElectionContext ec = new OverseerElectionContext(zkClient, overseer, address.replaceAll("/", "_"));
    overseerElector.setup(ec);
    overseerElector.joinElection(ec, false);
    reader.close();
    return zkClient;
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) UpdateShardHandler(org.apache.solr.update.UpdateShardHandler) HttpShardHandlerFactory(org.apache.solr.handler.component.HttpShardHandlerFactory)

Example 65 with ZkStateReader

use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.

the class ChaosMonkeyNothingIsSafeTest method test.

@Test
public void test() throws Exception {
    cloudClient.setSoTimeout(clientSoTimeout);
    boolean testSuccessful = false;
    try {
        handle.clear();
        handle.put("timestamp", SKIPVAL);
        ZkStateReader zkStateReader = cloudClient.getZkStateReader();
        // make sure we have leaders for each shard
        for (int j = 1; j < sliceCount; j++) {
            zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 10000);
        }
        // make sure we again have leaders for each shard
        waitForRecoveriesToFinish(false);
        // we cannot do delete by query
        // as it's not supported for recovery
        del("*:*");
        List<StoppableThread> threads = new ArrayList<>();
        List<StoppableIndexingThread> indexTreads = new ArrayList<>();
        int threadCount = TEST_NIGHTLY ? 3 : 1;
        int i = 0;
        for (i = 0; i < threadCount; i++) {
            StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
            threads.add(indexThread);
            indexTreads.add(indexThread);
            indexThread.start();
        }
        threadCount = 1;
        i = 0;
        for (i = 0; i < threadCount; i++) {
            StoppableSearchThread searchThread = new StoppableSearchThread(cloudClient);
            threads.add(searchThread);
            searchThread.start();
        }
        // TODO: we only do this sometimes so that we can sometimes compare against control,
        // it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
        boolean runFullThrottle = random().nextBoolean();
        if (runFullThrottle) {
            FullThrottleStoppableIndexingThread ftIndexThread = new FullThrottleStoppableIndexingThread(controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
            threads.add(ftIndexThread);
            ftIndexThread.start();
        }
        chaosMonkey.startTheMonkey(true, 10000);
        try {
            long runLength;
            if (RUN_LENGTH != -1) {
                runLength = RUN_LENGTH;
            } else {
                int[] runTimes;
                if (TEST_NIGHTLY) {
                    runTimes = new int[] { 5000, 6000, 10000, 15000, 25000, 30000, 30000, 45000, 90000, 120000 };
                } else {
                    runTimes = new int[] { 5000, 7000, 15000 };
                }
                runLength = runTimes[random().nextInt(runTimes.length - 1)];
            }
            Thread.sleep(runLength);
        } finally {
            chaosMonkey.stopTheMonkey();
        }
        // ideally this should go into chaosMonkey
        restartZk(1000 * (5 + random().nextInt(4)));
        for (StoppableThread indexThread : threads) {
            indexThread.safeStop();
        }
        // wait for stop...
        for (StoppableThread indexThread : threads) {
            indexThread.join();
        }
        // try and wait for any replications and what not to finish...
        Thread.sleep(2000);
        // wait until there are no recoveries...
        //Math.round((runLength / 1000.0f / 3.0f)));
        waitForThingsToLevelOut(Integer.MAX_VALUE);
        // make sure we again have leaders for each shard
        for (int j = 1; j < sliceCount; j++) {
            zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 30000);
        }
        commit();
        // TODO: assert we didnt kill everyone
        zkStateReader.updateLiveNodes();
        assertTrue(zkStateReader.getClusterState().getLiveNodes().size() > 0);
        // we expect full throttle fails, but cloud client should not easily fail
        for (StoppableThread indexThread : threads) {
            if (indexThread instanceof StoppableIndexingThread && !(indexThread instanceof FullThrottleStoppableIndexingThread)) {
                int failCount = ((StoppableIndexingThread) indexThread).getFailCount();
                assertFalse("There were too many update fails (" + failCount + " > " + FAIL_TOLERANCE + ") - we expect it can happen, but shouldn't easily", failCount > FAIL_TOLERANCE);
            }
        }
        Set<String> addFails = getAddFails(indexTreads);
        Set<String> deleteFails = getDeleteFails(indexTreads);
        // full throttle thread can
        // have request fails 
        checkShardConsistency(!runFullThrottle, true, addFails, deleteFails);
        long ctrlDocs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
        // ensure we have added more than 0 docs
        long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
        assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0);
        if (VERBOSE)
            System.out.println("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
        // sometimes we restart zookeeper as well
        if (random().nextBoolean()) {
            restartZk(1000 * (5 + random().nextInt(4)));
        }
        try (CloudSolrClient client = createCloudClient("collection1")) {
            createCollection(null, "testcollection", 1, 1, 1, client, null, "conf1");
        }
        List<Integer> numShardsNumReplicas = new ArrayList<>(2);
        numShardsNumReplicas.add(1);
        numShardsNumReplicas.add(1);
        checkForCollection("testcollection", numShardsNumReplicas, null);
        testSuccessful = true;
    } finally {
        if (!testSuccessful) {
            printLayout();
        }
    }
}
Also used : ArrayList(java.util.ArrayList) SolrQuery(org.apache.solr.client.solrj.SolrQuery) CloudSolrClient(org.apache.solr.client.solrj.impl.CloudSolrClient) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Test(org.junit.Test)

Aggregations

ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)133 ClusterState (org.apache.solr.common.cloud.ClusterState)58 Replica (org.apache.solr.common.cloud.Replica)48 Slice (org.apache.solr.common.cloud.Slice)48 HashMap (java.util.HashMap)34 SolrZkClient (org.apache.solr.common.cloud.SolrZkClient)33 ArrayList (java.util.ArrayList)32 DocCollection (org.apache.solr.common.cloud.DocCollection)31 Test (org.junit.Test)26 SolrException (org.apache.solr.common.SolrException)25 CloudSolrClient (org.apache.solr.client.solrj.impl.CloudSolrClient)22 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)20 IOException (java.io.IOException)19 Map (java.util.Map)19 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)18 KeeperException (org.apache.zookeeper.KeeperException)16 SolrQuery (org.apache.solr.client.solrj.SolrQuery)15 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)15 SolrServerException (org.apache.solr.client.solrj.SolrServerException)13 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)12