Search in sources :

Example 1 with ZkCoreNodeProps

use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.

the class BasicDistributedZkTest method getLeaderUrlFromZk.

protected ZkCoreNodeProps getLeaderUrlFromZk(String collection, String slice) {
    ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState();
    ZkNodeProps leader = clusterState.getLeader(collection, slice);
    if (leader == null) {
        throw new RuntimeException("Could not find leader:" + collection + " " + slice);
    }
    return new ZkCoreNodeProps(leader);
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps)

Example 2 with ZkCoreNodeProps

use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.

the class BasicDistributedZkTest method testANewCollectionInOneInstanceWithManualShardAssignement.

private void testANewCollectionInOneInstanceWithManualShardAssignement() throws Exception {
    log.info("### STARTING testANewCollectionInOneInstanceWithManualShardAssignement");
    System.clearProperty("numShards");
    List<SolrClient> collectionClients = new ArrayList<>();
    SolrClient client = clients.get(0);
    final String baseUrl = ((HttpSolrClient) client).getBaseURL().substring(0, ((HttpSolrClient) client).getBaseURL().length() - DEFAULT_COLLECTION.length() - 1);
    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 1, "slice1");
    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 2, "slice2");
    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 3, "slice2");
    createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 4, "slice1");
    while (pending != null && pending.size() > 0) {
        Future<Object> future = completionService.take();
        pending.remove(future);
    }
    SolrClient client1 = collectionClients.get(0);
    SolrClient client2 = collectionClients.get(1);
    SolrClient client3 = collectionClients.get(2);
    SolrClient client4 = collectionClients.get(3);
    // no one should be recovering
    waitForRecoveriesToFinish(oneInstanceCollection2, getCommonCloudSolrClient().getZkStateReader(), false, true);
    assertAllActive(oneInstanceCollection2, getCommonCloudSolrClient().getZkStateReader());
    //printLayout();
    // TODO: enable when we don't falsely get slice1...
    // solrj.getZkStateReader().getLeaderUrl(oneInstanceCollection2, "slice1", 30000);
    // solrj.getZkStateReader().getLeaderUrl(oneInstanceCollection2, "slice2", 30000);
    client2.add(getDoc(id, "1"));
    client3.add(getDoc(id, "2"));
    client4.add(getDoc(id, "3"));
    client1.commit();
    SolrQuery query = new SolrQuery("*:*");
    query.set("distrib", false);
    long oneDocs = client1.query(query).getResults().getNumFound();
    long twoDocs = client2.query(query).getResults().getNumFound();
    long threeDocs = client3.query(query).getResults().getNumFound();
    long fourDocs = client4.query(query).getResults().getNumFound();
    query.set("collection", oneInstanceCollection2);
    query.set("distrib", true);
    long allDocs = getCommonCloudSolrClient().query(query).getResults().getNumFound();
    //    System.out.println("1:" + oneDocs);
    //    System.out.println("2:" + twoDocs);
    //    System.out.println("3:" + threeDocs);
    //    System.out.println("4:" + fourDocs);
    //    System.out.println("All Docs:" + allDocs);
    //    assertEquals(oneDocs, threeDocs);
    //    assertEquals(twoDocs, fourDocs);
    //    assertNotSame(oneDocs, twoDocs);
    assertEquals(3, allDocs);
    // we added a role of none on these creates - check for it
    ZkStateReader zkStateReader = getCommonCloudSolrClient().getZkStateReader();
    zkStateReader.forceUpdateCollection(oneInstanceCollection2);
    Map<String, Slice> slices = zkStateReader.getClusterState().getSlicesMap(oneInstanceCollection2);
    assertNotNull(slices);
    String roles = slices.get("slice1").getReplicasMap().values().iterator().next().getStr(ZkStateReader.ROLES_PROP);
    assertEquals("none", roles);
    ZkCoreNodeProps props = new ZkCoreNodeProps(getCommonCloudSolrClient().getZkStateReader().getClusterState().getLeader(oneInstanceCollection2, "slice1"));
    // now test that unloading a core gets us a new leader
    try (HttpSolrClient unloadClient = getHttpSolrClient(baseUrl)) {
        unloadClient.setConnectionTimeout(15000);
        unloadClient.setSoTimeout(60000);
        Unload unloadCmd = new Unload(true);
        unloadCmd.setCoreName(props.getCoreName());
        String leader = props.getCoreUrl();
        unloadClient.request(unloadCmd);
        int tries = 50;
        while (leader.equals(zkStateReader.getLeaderUrl(oneInstanceCollection2, "slice1", 10000))) {
            Thread.sleep(100);
            if (tries-- == 0) {
                fail("Leader never changed");
            }
        }
    }
    IOUtils.close(collectionClients);
}
Also used : ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) ArrayList(java.util.ArrayList) SolrQuery(org.apache.solr.client.solrj.SolrQuery) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Unload(org.apache.solr.client.solrj.request.CoreAdminRequest.Unload) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) SolrClient(org.apache.solr.client.solrj.SolrClient) Slice(org.apache.solr.common.cloud.Slice)

Example 3 with ZkCoreNodeProps

use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.

the class OverseerElectionContext method startLeaderInitiatedRecoveryOnReplicas.

private void startLeaderInitiatedRecoveryOnReplicas(String coreName) throws Exception {
    try (SolrCore core = cc.getCore(coreName)) {
        CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
        String coll = cloudDesc.getCollectionName();
        String shardId = cloudDesc.getShardId();
        String coreNodeName = cloudDesc.getCoreNodeName();
        if (coll == null || shardId == null) {
            log.error("Cannot start leader-initiated recovery on new leader (core=" + coreName + ",coreNodeName=" + coreNodeName + ") because collection and/or shard is null!");
            return;
        }
        String znodePath = zkController.getLeaderInitiatedRecoveryZnodePath(coll, shardId);
        List<String> replicas = null;
        try {
            replicas = zkClient.getChildren(znodePath, null, false);
        } catch (NoNodeException nne) {
        // this can be ignored
        }
        if (replicas != null && replicas.size() > 0) {
            for (String replicaCoreNodeName : replicas) {
                if (coreNodeName.equals(replicaCoreNodeName))
                    // added safe-guard so we don't mark this core as down
                    continue;
                final Replica.State lirState = zkController.getLeaderInitiatedRecoveryState(coll, shardId, replicaCoreNodeName);
                if (lirState == Replica.State.DOWN || lirState == Replica.State.RECOVERY_FAILED) {
                    log.info("After core={} coreNodeName={} was elected leader, a replica coreNodeName={} was found in state: " + lirState.toString() + " and needing recovery.", coreName, coreNodeName, replicaCoreNodeName);
                    List<ZkCoreNodeProps> replicaProps = zkController.getZkStateReader().getReplicaProps(collection, shardId, coreNodeName);
                    if (replicaProps != null && replicaProps.size() > 0) {
                        ZkCoreNodeProps coreNodeProps = null;
                        for (ZkCoreNodeProps p : replicaProps) {
                            if (((Replica) p.getNodeProps()).getName().equals(replicaCoreNodeName)) {
                                coreNodeProps = p;
                                break;
                            }
                        }
                        zkController.ensureReplicaInLeaderInitiatedRecovery(cc, collection, shardId, coreNodeProps, core.getCoreDescriptor(), false);
                    }
                }
            }
        }
    }
// core gets closed automagically    
}
Also used : NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) SolrCore(org.apache.solr.core.SolrCore) Replica(org.apache.solr.common.cloud.Replica)

Example 4 with ZkCoreNodeProps

use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.

the class SyncStrategy method syncToMe.

private void syncToMe(ZkController zkController, String collection, String shardId, ZkNodeProps leaderProps, CoreDescriptor cd, int nUpdates) {
    // sync everyone else
    // TODO: we should do this in parallel at least
    List<ZkCoreNodeProps> nodes = zkController.getZkStateReader().getReplicaProps(collection, shardId, cd.getCloudDescriptor().getCoreNodeName());
    if (nodes == null) {
        log.info(ZkCoreNodeProps.getCoreUrl(leaderProps) + " has no replicas");
        return;
    }
    ZkCoreNodeProps zkLeader = new ZkCoreNodeProps(leaderProps);
    for (ZkCoreNodeProps node : nodes) {
        try {
            log.info(ZkCoreNodeProps.getCoreUrl(leaderProps) + ": try and ask " + node.getCoreUrl() + " to sync");
            requestSync(node.getBaseUrl(), node.getCoreUrl(), zkLeader.getCoreUrl(), node.getCoreName(), nUpdates);
        } catch (Exception e) {
            SolrException.log(log, "Error syncing replica to leader", e);
        }
    }
    for (; ; ) {
        ShardResponse srsp = shardHandler.takeCompletedOrError();
        if (srsp == null)
            break;
        boolean success = handleResponse(srsp);
        if (srsp.getException() != null) {
            SolrException.log(log, "Sync request error: " + srsp.getException());
        }
        if (!success) {
            log.info(ZkCoreNodeProps.getCoreUrl(leaderProps) + ": Sync failed - we will ask replica (" + srsp.getShardAddress() + ") to recover.");
            if (isClosed) {
                log.info("We have been closed, don't request that a replica recover");
            } else {
                RecoveryRequest rr = new RecoveryRequest();
                rr.leaderProps = leaderProps;
                rr.baseUrl = ((ShardCoreRequest) srsp.getShardRequest()).baseUrl;
                rr.coreName = ((ShardCoreRequest) srsp.getShardRequest()).coreName;
                recoveryRequests.add(rr);
            }
        } else {
            log.info(ZkCoreNodeProps.getCoreUrl(leaderProps) + ": " + " sync completed with " + srsp.getShardAddress());
        }
    }
}
Also used : ShardResponse(org.apache.solr.handler.component.ShardResponse) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) SolrServerException(org.apache.solr.client.solrj.SolrServerException) SolrException(org.apache.solr.common.SolrException) IOException(java.io.IOException)

Example 5 with ZkCoreNodeProps

use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.

the class ZkController method getLeaderProps.

/**
   * Get leader props directly from zk nodes.
   *
   * @return leader props
   */
public ZkCoreNodeProps getLeaderProps(final String collection, final String slice, int timeoutms, boolean failImmediatelyOnExpiration) throws InterruptedException {
    int iterCount = timeoutms / 1000;
    Exception exp = null;
    while (iterCount-- > 0) {
        try {
            byte[] data = zkClient.getData(ZkStateReader.getShardLeadersPath(collection, slice), null, null, true);
            ZkCoreNodeProps leaderProps = new ZkCoreNodeProps(ZkNodeProps.load(data));
            return leaderProps;
        } catch (InterruptedException e) {
            throw e;
        } catch (SessionExpiredException e) {
            if (failImmediatelyOnExpiration) {
                throw new RuntimeException("Session has expired - could not get leader props", exp);
            }
            exp = e;
            Thread.sleep(1000);
        } catch (Exception e) {
            exp = e;
            Thread.sleep(1000);
        }
        if (cc.isShutDown()) {
            throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "CoreContainer is closed");
        }
    }
    throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Could not get leader props", exp);
}
Also used : ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) SessionExpiredException(org.apache.zookeeper.KeeperException.SessionExpiredException) TimeoutException(java.util.concurrent.TimeoutException) SolrException(org.apache.solr.common.SolrException) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) SessionExpiredException(org.apache.zookeeper.KeeperException.SessionExpiredException) ConnectionLossException(org.apache.zookeeper.KeeperException.ConnectionLossException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) SolrCoreInitializationException(org.apache.solr.core.SolrCoreInitializationException) SolrException(org.apache.solr.common.SolrException)

Aggregations

ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)47 Replica (org.apache.solr.common.cloud.Replica)24 ArrayList (java.util.ArrayList)22 Slice (org.apache.solr.common.cloud.Slice)20 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)16 SolrException (org.apache.solr.common.SolrException)13 ClusterState (org.apache.solr.common.cloud.ClusterState)13 IOException (java.io.IOException)12 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)12 RetryNode (org.apache.solr.update.SolrCmdDistributor.RetryNode)12 StdNode (org.apache.solr.update.SolrCmdDistributor.StdNode)12 Node (org.apache.solr.update.SolrCmdDistributor.Node)11 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)10 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)10 SolrQuery (org.apache.solr.client.solrj.SolrQuery)9 ZooKeeperException (org.apache.solr.common.cloud.ZooKeeperException)8 KeeperException (org.apache.zookeeper.KeeperException)8 SolrServerException (org.apache.solr.client.solrj.SolrServerException)7 Random (java.util.Random)6 NamedList (org.apache.solr.common.util.NamedList)6