Search in sources :

Example 1 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class ForceLeaderTest method unsetLeader.

protected void unsetLeader(String collection, String slice) throws Exception {
    DistributedQueue inQueue = Overseer.getStateUpdateQueue(cloudClient.getZkStateReader().getZkClient());
    ZkStateReader zkStateReader = cloudClient.getZkStateReader();
    ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.LEADER.toLower(), ZkStateReader.SHARD_ID_PROP, slice, ZkStateReader.COLLECTION_PROP, collection);
    inQueue.offer(Utils.toJSON(m));
    ClusterState clusterState = null;
    boolean transition = false;
    for (int counter = 10; counter > 0; counter--) {
        clusterState = zkStateReader.getClusterState();
        Replica newLeader = clusterState.getSlice(collection, slice).getLeader();
        if (newLeader == null) {
            transition = true;
            break;
        }
        Thread.sleep(1000);
    }
    if (!transition) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not unset replica leader" + ". Cluster state: " + printClusterStateInfo(collection));
    }
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) ClusterState(org.apache.solr.common.cloud.ClusterState) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) Replica(org.apache.solr.common.cloud.Replica) SolrException(org.apache.solr.common.SolrException)

Example 2 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class LeaderElectionTest method testCancelElection.

@Test
public void testCancelElection() throws Exception {
    LeaderElector first = new LeaderElector(zkClient);
    ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr/", ZkStateReader.CORE_NAME_PROP, "1");
    ElectionContext firstContext = new ShardLeaderElectionContextBase(first, "slice1", "collection2", "dummynode1", props, zkStateReader);
    first.setup(firstContext);
    first.joinElection(firstContext, false);
    Thread.sleep(1000);
    assertEquals("original leader was not registered", "http://127.0.0.1/solr/1/", getLeaderUrl("collection2", "slice1"));
    LeaderElector second = new LeaderElector(zkClient);
    props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr/", ZkStateReader.CORE_NAME_PROP, "2");
    ElectionContext context = new ShardLeaderElectionContextBase(second, "slice1", "collection2", "dummynode2", props, zkStateReader);
    second.setup(context);
    second.joinElection(context, false);
    Thread.sleep(1000);
    assertEquals("original leader should have stayed leader", "http://127.0.0.1/solr/1/", getLeaderUrl("collection2", "slice1"));
    firstContext.cancelElection();
    Thread.sleep(1000);
    assertEquals("new leader was not registered", "http://127.0.0.1/solr/2/", getLeaderUrl("collection2", "slice1"));
}
Also used : ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) Test(org.junit.Test)

Example 3 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class TestConfigsApi method testCommands.

public void testCommands() throws Exception {
    ConfigSetsHandler handler = new ConfigSetsHandler(null) {

        @Override
        protected void sendToZk(SolrQueryResponse rsp, ConfigSetOperation operation, Map<String, Object> result) throws KeeperException, InterruptedException {
            result.put(QUEUE_OPERATION, operation.action.toLower());
            rsp.add(ZkNodeProps.class.getName(), new ZkNodeProps(result));
        }
    };
    ApiBag apiBag = new ApiBag(false);
    for (Api api : handler.getApis()) apiBag.register(api, EMPTY_MAP);
    compareOutput(apiBag, "/cluster/configs/sample", DELETE, null, null, "{name :sample, operation:delete}");
    compareOutput(apiBag, "/cluster/configs", POST, "{create:{name : newconf, baseConfigSet: sample }}", null, "{operation:create, name :newconf,  baseConfigSet: sample, immutable: false }");
}
Also used : SolrQueryResponse(org.apache.solr.response.SolrQueryResponse) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ApiBag(org.apache.solr.api.ApiBag) Api(org.apache.solr.api.Api) Map(java.util.Map)

Example 4 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class TestRandomRequestDistribution method testQueryAgainstDownReplica.

/**
   * Asserts that requests against a collection are only served by a 'active' local replica
   */
private void testQueryAgainstDownReplica() throws Exception {
    log.info("Creating collection 'football' with 1 shard and 2 replicas");
    CollectionAdminRequest.createCollection("football", 1, 2).setCreateNodeSet(nodeNames.get(0) + ',' + nodeNames.get(1)).process(cloudClient);
    waitForRecoveriesToFinish("football", true);
    cloudClient.getZkStateReader().forceUpdateCollection("football");
    Replica leader = null;
    Replica notLeader = null;
    Collection<Replica> replicas = cloudClient.getZkStateReader().getClusterState().getSlice("football", "shard1").getReplicas();
    for (Replica replica : replicas) {
        if (replica.getStr(ZkStateReader.LEADER_PROP) != null) {
            leader = replica;
        } else {
            notLeader = replica;
        }
    }
    //Simulate a replica being in down state.
    ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.BASE_URL_PROP, notLeader.getStr(ZkStateReader.BASE_URL_PROP), ZkStateReader.NODE_NAME_PROP, notLeader.getStr(ZkStateReader.NODE_NAME_PROP), ZkStateReader.COLLECTION_PROP, "football", ZkStateReader.SHARD_ID_PROP, "shard1", ZkStateReader.CORE_NAME_PROP, notLeader.getStr(ZkStateReader.CORE_NAME_PROP), ZkStateReader.ROLES_PROP, "", ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
    log.info("Forcing {} to go into 'down' state", notLeader.getStr(ZkStateReader.CORE_NAME_PROP));
    DistributedQueue q = Overseer.getStateUpdateQueue(cloudClient.getZkStateReader().getZkClient());
    q.offer(Utils.toJSON(m));
    verifyReplicaStatus(cloudClient.getZkStateReader(), "football", "shard1", notLeader.getName(), Replica.State.DOWN);
    //Query against the node which hosts the down replica
    String baseUrl = notLeader.getStr(ZkStateReader.BASE_URL_PROP);
    if (!baseUrl.endsWith("/"))
        baseUrl += "/";
    String path = baseUrl + "football";
    log.info("Firing queries against path=" + path);
    try (HttpSolrClient client = getHttpSolrClient(path)) {
        client.setSoTimeout(5000);
        client.setConnectionTimeout(2000);
        SolrCore leaderCore = null;
        for (JettySolrRunner jetty : jettys) {
            CoreContainer container = jetty.getCoreContainer();
            for (SolrCore core : container.getCores()) {
                if (core.getName().equals(leader.getStr(ZkStateReader.CORE_NAME_PROP))) {
                    leaderCore = core;
                    break;
                }
            }
        }
        assertNotNull(leaderCore);
        SolrMetricManager leaderMetricManager = leaderCore.getCoreContainer().getMetricManager();
        String leaderRegistry = leaderCore.getCoreMetricManager().getRegistryName();
        Counter cnt = leaderMetricManager.counter(null, leaderRegistry, "requests", "QUERY.standard");
        // All queries should be served by the active replica
        // To make sure that's true we keep querying the down replica
        // If queries are getting processed by the down replica then the cluster state hasn't updated for that replica
        // locally
        // So we keep trying till it has updated and then verify if ALL queries go to the active replica
        long count = 0;
        while (true) {
            count++;
            client.query(new SolrQuery("*:*"));
            long c = cnt.getCount();
            if (c == 1) {
                // cluster state has got update locally
                break;
            } else {
                Thread.sleep(100);
            }
            if (count > 10000) {
                fail("After 10k queries we still see all requests being processed by the down replica");
            }
        }
        // Now we fire a few additional queries and make sure ALL of them
        // are served by the active replica
        int moreQueries = TestUtil.nextInt(random(), 4, 10);
        // Since 1 query has already hit the leader
        count = 1;
        for (int i = 0; i < moreQueries; i++) {
            client.query(new SolrQuery("*:*"));
            count++;
            long c = cnt.getCount();
            assertEquals("Query wasn't served by leader", count, c);
        }
    }
}
Also used : SolrCore(org.apache.solr.core.SolrCore) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) Replica(org.apache.solr.common.cloud.Replica) SolrQuery(org.apache.solr.client.solrj.SolrQuery) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) Counter(com.codahale.metrics.Counter) CoreContainer(org.apache.solr.core.CoreContainer) SolrMetricManager(org.apache.solr.metrics.SolrMetricManager)

Example 5 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class ChaosMonkey method getRandomJetty.

public CloudJettyRunner getRandomJetty(String slice, boolean aggressivelyKillLeaders) throws KeeperException, InterruptedException {
    int numActive = 0;
    numActive = checkIfKillIsLegal(slice, numActive);
    // TODO: stale state makes this a tough call
    if (numActive < 2) {
        // we cannot kill anyone
        monkeyLog("only one active node in shard - monkey cannot kill :(");
        return null;
    }
    // let's check the deadpool count
    int numRunning = 0;
    for (CloudJettyRunner cjetty : shardToJetty.get(slice)) {
        if (!deadPool.contains(cjetty)) {
            numRunning++;
        }
    }
    if (numRunning < 2) {
        // we cannot kill anyone
        monkeyLog("only one active node in shard - monkey cannot kill :(");
        return null;
    }
    boolean canKillIndexer = canKillIndexer(slice);
    if (!canKillIndexer) {
        monkeyLog("Number of indexer nodes (nrt or tlog replicas) is not enough to kill one of them, Will only choose a pull replica to kill");
    }
    int chance = chaosRandom.nextInt(10);
    CloudJettyRunner cjetty = null;
    if (chance <= 5 && aggressivelyKillLeaders && canKillIndexer) {
        // if killLeader, really aggressively go after leaders
        cjetty = shardToLeaderJetty.get(slice);
    } else {
        List<CloudJettyRunner> jetties = shardToJetty.get(slice);
        // get random node
        int attempt = 0;
        while (true) {
            attempt++;
            int index = chaosRandom.nextInt(jetties.size());
            cjetty = jetties.get(index);
            if (canKillIndexer || getTypeForJetty(slice, cjetty) == Replica.Type.PULL) {
                break;
            } else if (attempt > 20) {
                monkeyLog("Can't kill indexer nodes (nrt or tlog replicas) and couldn't find a random pull node after 20 attempts - monkey cannot kill :(");
                return null;
            }
        }
        ZkNodeProps leader = null;
        try {
            leader = zkStateReader.getLeaderRetry(collection, slice);
        } catch (Throwable t) {
            log.error("Could not get leader", t);
            return null;
        }
        // cluster state can be stale - also go by our 'near real-time' is leader prop
        boolean rtIsLeader;
        CoreContainer cc = cjetty.jetty.getCoreContainer();
        if (cc != null) {
            try (SolrCore core = cc.getCore(leader.getStr(ZkStateReader.CORE_NAME_PROP))) {
                if (core == null) {
                    monkeyLog("selected jetty not running correctly - skip");
                    return null;
                }
                rtIsLeader = core.getCoreDescriptor().getCloudDescriptor().isLeader();
            }
        } else {
            return null;
        }
        boolean isLeader = leader.getStr(ZkStateReader.NODE_NAME_PROP).equals(cjetty.nodeName) || rtIsLeader;
        if (!aggressivelyKillLeaders && isLeader) {
            // we don't kill leaders...
            monkeyLog("abort! I don't kill leaders");
            return null;
        }
    }
    if (cjetty.jetty.getLocalPort() == -1) {
        // we can't kill the dead
        monkeyLog("abort! This guy is already dead");
        return null;
    }
    //System.out.println("num active:" + numActive + " for " + slice + " sac:" + jetty.getLocalPort());
    monkeyLog("chose a victim! " + cjetty.jetty.getLocalPort());
    return cjetty;
}
Also used : CoreContainer(org.apache.solr.core.CoreContainer) SolrCore(org.apache.solr.core.SolrCore) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) CloudJettyRunner(org.apache.solr.cloud.AbstractFullDistribZkTestBase.CloudJettyRunner)

Aggregations

ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)91 SolrException (org.apache.solr.common.SolrException)35 HashMap (java.util.HashMap)28 Replica (org.apache.solr.common.cloud.Replica)22 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)20 ArrayList (java.util.ArrayList)19 Slice (org.apache.solr.common.cloud.Slice)19 KeeperException (org.apache.zookeeper.KeeperException)19 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)16 Test (org.junit.Test)16 DocCollection (org.apache.solr.common.cloud.DocCollection)15 SolrZkClient (org.apache.solr.common.cloud.SolrZkClient)14 Map (java.util.Map)13 ClusterState (org.apache.solr.common.cloud.ClusterState)13 IOException (java.io.IOException)10 ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)10 ZooKeeperException (org.apache.solr.common.cloud.ZooKeeperException)10 NamedList (org.apache.solr.common.util.NamedList)10 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)9 SolrCore (org.apache.solr.core.SolrCore)8