Search in sources :

Example 16 with ZkController

use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.

the class CoreSorterTest method getMockContainer.

private CoreContainer getMockContainer() {
    CoreContainer mockCC = mock(CoreContainer.class);
    ZkController mockZKC = mock(ZkController.class);
    ClusterState mockClusterState = mock(ClusterState.class);
    when(mockCC.isZooKeeperAware()).thenReturn(true);
    when(mockCC.getZkController()).thenReturn(mockZKC);
    when(mockClusterState.getLiveNodes()).thenReturn(liveNodes);
    when(mockZKC.getClusterState()).thenReturn(mockClusterState);
    return mockCC;
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) MockCoreContainer(org.apache.solr.util.MockCoreContainer) ZkController(org.apache.solr.cloud.ZkController)

Example 17 with ZkController

use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.

the class TestInPlaceUpdatesDistrib method delayedReorderingFetchesMissingUpdateFromLeaderTest.

private void delayedReorderingFetchesMissingUpdateFromLeaderTest() throws Exception {
    clearIndex();
    commit();
    float inplace_updatable_float = 1F;
    buildRandomIndex(inplace_updatable_float, Collections.singletonList(1));
    float newinplace_updatable_float = 100F;
    List<UpdateRequest> updates = new ArrayList<>();
    updates.add(regularUpdateRequest("id", 1, "title_s", "title1_new", "id_i", 1, "inplace_updatable_float", newinplace_updatable_float));
    updates.add(regularUpdateRequest("id", 1, "inplace_updatable_float", map("inc", 1)));
    updates.add(regularUpdateRequest("id", 1, "inplace_updatable_float", map("inc", 1)));
    // The next request to replica2 will be delayed by 6 secs (timeout is 5s)
    shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 1, 6000);
    ExecutorService threadpool = ExecutorUtil.newMDCAwareFixedThreadPool(updates.size() + 1, new DefaultSolrThreadFactory(getTestName()));
    for (UpdateRequest update : updates) {
        AsyncUpdateWithRandomCommit task = new AsyncUpdateWithRandomCommit(update, cloudClient, random().nextLong());
        threadpool.submit(task);
        // while we can't guarantee/trust what order the updates are executed in, since multiple threads
        // are involved, but we're trying to bias the thread scheduling to run them in the order submitted
        Thread.sleep(100);
    }
    threadpool.shutdown();
    assertTrue("Thread pool didn't terminate within 15 secs", threadpool.awaitTermination(15, TimeUnit.SECONDS));
    commit();
    // Check every 10ms, 100 times, for a replica to go down (& assert that it doesn't)
    for (int i = 0; i < 100; i++) {
        Thread.sleep(10);
        cloudClient.getZkStateReader().forceUpdateCollection(DEFAULT_COLLECTION);
        ClusterState state = cloudClient.getZkStateReader().getClusterState();
        int numActiveReplicas = 0;
        for (Replica rep : state.getCollection(DEFAULT_COLLECTION).getSlice(SHARD1).getReplicas()) if (rep.getState().equals(Replica.State.ACTIVE))
            numActiveReplicas++;
        assertEquals("The replica receiving reordered updates must not have gone down", 3, numActiveReplicas);
    }
    for (SolrClient client : clients) {
        log.info("Testing client (Fetch missing test): " + ((HttpSolrClient) client).getBaseURL());
        log.info("Version at " + ((HttpSolrClient) client).getBaseURL() + " is: " + getReplicaValue(client, 1, "_version_"));
        assertReplicaValue(client, 1, "inplace_updatable_float", (newinplace_updatable_float + 2.0f), "inplace_updatable_float didn't match for replica at client: " + ((HttpSolrClient) client).getBaseURL());
        assertReplicaValue(client, 1, "title_s", "title1_new", "Title didn't match for replica at client: " + ((HttpSolrClient) client).getBaseURL());
    }
    // Try another round of these updates, this time with a delete request at the end.
    // This is to ensure that the fetch missing update from leader doesn't bomb out if the 
    // document has been deleted on the leader later on
    {
        clearIndex();
        commit();
        shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().unsetDelay();
        updates.add(regularDeleteRequest(1));
        // the first update
        shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 1, 5999);
        // the delete update
        shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 4, 5998);
        threadpool = ExecutorUtil.newMDCAwareFixedThreadPool(updates.size() + 1, new DefaultSolrThreadFactory(getTestName()));
        for (UpdateRequest update : updates) {
            AsyncUpdateWithRandomCommit task = new AsyncUpdateWithRandomCommit(update, cloudClient, random().nextLong());
            threadpool.submit(task);
            // while we can't guarantee/trust what order the updates are executed in, since multiple threads
            // are involved, but we're trying to bias the thread scheduling to run them in the order submitted
            Thread.sleep(100);
        }
        threadpool.shutdown();
        assertTrue("Thread pool didn't terminate within 15 secs", threadpool.awaitTermination(15, TimeUnit.SECONDS));
        commit();
        // TODO: Could try checking ZK for LIR flags to ensure LIR has not kicked in
        // Check every 10ms, 100 times, for a replica to go down (& assert that it doesn't)
        ZkController zkController = shardToLeaderJetty.get(SHARD1).jetty.getCoreContainer().getZkController();
        String lirPath = zkController.getLeaderInitiatedRecoveryZnodePath(DEFAULT_TEST_COLLECTION_NAME, SHARD1);
        assertFalse(zkController.getZkClient().exists(lirPath, true));
        for (int i = 0; i < 100; i++) {
            Thread.sleep(10);
            cloudClient.getZkStateReader().forceUpdateCollection(DEFAULT_COLLECTION);
            ClusterState state = cloudClient.getZkStateReader().getClusterState();
            int numActiveReplicas = 0;
            for (Replica rep : state.getCollection(DEFAULT_COLLECTION).getSlice(SHARD1).getReplicas()) if (rep.getState().equals(Replica.State.ACTIVE))
                numActiveReplicas++;
            assertEquals("The replica receiving reordered updates must not have gone down", 3, numActiveReplicas);
        }
        for (SolrClient client : new SolrClient[] { LEADER, NONLEADERS.get(0), NONLEADERS.get(1) }) {
            // nonleader 0 re-ordered replica, nonleader 1 well-ordered replica
            SolrDocument doc = client.getById(String.valueOf(1), params("distrib", "false"));
            assertNull("This doc was supposed to have been deleted, but was: " + doc, doc);
        }
    }
    log.info("delayedReorderingFetchesMissingUpdateFromLeaderTest: This test passed fine...");
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) ArrayList(java.util.ArrayList) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) Replica(org.apache.solr.common.cloud.Replica) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) SolrDocument(org.apache.solr.common.SolrDocument) SolrClient(org.apache.solr.client.solrj.SolrClient) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ZkController(org.apache.solr.cloud.ZkController) ExecutorService(java.util.concurrent.ExecutorService)

Example 18 with ZkController

use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.

the class CdcrRequestHandler method handleCollectionCheckpointAction.

/**
   * This action is generally executed on the target cluster in order to retrieve the latest update checkpoint.
   * This checkpoint is used on the source cluster to setup the
   * {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader} of a shard leader. <br/>
   * This method will execute in parallel one
   * {@link org.apache.solr.handler.CdcrParams.CdcrAction#SHARDCHECKPOINT} request per shard leader. It will
   * then pick the lowest version number as checkpoint. Picking the lowest amongst all shards will ensure that we do not
   * pick a checkpoint that is ahead of the source cluster. This can occur when other shard leaders are sending new
   * updates to the target cluster while we are currently instantiating the
   * {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader}.
   * This solution only works in scenarios where the topology of the source and target clusters are identical.
   */
private void handleCollectionCheckpointAction(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
    ZkController zkController = core.getCoreContainer().getZkController();
    try {
        zkController.getZkStateReader().forceUpdateCollection(collection);
    } catch (Exception e) {
        log.warn("Error when updating cluster state", e);
    }
    ClusterState cstate = zkController.getClusterState();
    Collection<Slice> shards = cstate.getActiveSlices(collection);
    ExecutorService parallelExecutor = ExecutorUtil.newMDCAwareCachedThreadPool(new DefaultSolrThreadFactory("parallelCdcrExecutor"));
    long checkpoint = Long.MAX_VALUE;
    try {
        List<Callable<Long>> callables = new ArrayList<>();
        for (Slice shard : shards) {
            ZkNodeProps leaderProps = zkController.getZkStateReader().getLeaderRetry(collection, shard.getName());
            ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(leaderProps);
            callables.add(new SliceCheckpointCallable(nodeProps.getCoreUrl(), path));
        }
        for (final Future<Long> future : parallelExecutor.invokeAll(callables)) {
            long version = future.get();
            if (version < checkpoint) {
                // we must take the lowest checkpoint from all the shards
                checkpoint = version;
            }
        }
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
    } catch (ExecutionException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
    } finally {
        parallelExecutor.shutdown();
    }
    rsp.add(CdcrParams.CHECKPOINT, checkpoint);
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) SolrServerException(org.apache.solr.client.solrj.SolrServerException) SolrException(org.apache.solr.common.SolrException) CancellationException(java.util.concurrent.CancellationException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Callable(java.util.concurrent.Callable) ZkController(org.apache.solr.cloud.ZkController) Slice(org.apache.solr.common.cloud.Slice) ExecutorService(java.util.concurrent.ExecutorService) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) ExecutionException(java.util.concurrent.ExecutionException) SolrException(org.apache.solr.common.SolrException)

Example 19 with ZkController

use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.

the class DocExpirationUpdateProcessorFactory method iAmInChargeOfPeriodicDeletes.

/**
   * <p>
   * Helper method that returns true if the Runnable managed by this factory 
   * should be responsible of doing periodical deletes.
   * </p>
   * <p>
   * In simple standalone installations this method always returns true, 
   * but in cloud mode it will be true if and only if we are currently the leader 
   * of the (active) slice with the first name (lexicographically).
   * </p>
   * <p>
   * If this method returns false, it may have also logged a message letting the user 
   * know why we aren't attempting period deletion (but it will attempt to not log 
   * this excessively)
   * </p>
   */
private boolean iAmInChargeOfPeriodicDeletes() {
    ZkController zk = core.getCoreContainer().getZkController();
    if (null == zk)
        return true;
    // This is a lot simpler then doing our own "leader" election across all replicas 
    // of all shards since:
    //   a) we already have a per shard leader
    //   b) shard names must be unique
    //   c) ClusterState is already being "watched" by ZkController, no additional zk hits
    //   d) there might be multiple instances of this factory (in multiple chains) per 
    //      collection, so picking an ephemeral node name for our election would be tricky
    CloudDescriptor desc = core.getCoreDescriptor().getCloudDescriptor();
    String col = desc.getCollectionName();
    List<Slice> slices = new ArrayList<Slice>(zk.getClusterState().getActiveSlices(col));
    Collections.sort(slices, COMPARE_SLICES_BY_NAME);
    if (slices.isEmpty()) {
        log.error("Collection {} has no active Slices?", col);
        return false;
    }
    Replica firstSliceLeader = slices.get(0).getLeader();
    if (null == firstSliceLeader) {
        log.warn("Slice in charge of periodic deletes for {} does not currently have a leader", col);
        return false;
    }
    String leaderInCharge = firstSliceLeader.getName();
    String myCoreNodeName = desc.getCoreNodeName();
    boolean inChargeOfDeletesRightNow = leaderInCharge.equals(myCoreNodeName);
    if (previouslyInChargeOfDeletes && !inChargeOfDeletesRightNow) {
        // don't spam the logs constantly, just log when we know that we're not the guy
        // (the first time -- or anytime we were, but no longer are)
        log.info("Not currently in charge of periodic deletes for this collection, " + "will not trigger delete or log again until this changes");
    }
    previouslyInChargeOfDeletes = inChargeOfDeletesRightNow;
    return inChargeOfDeletesRightNow;
}
Also used : ZkController(org.apache.solr.cloud.ZkController) Slice(org.apache.solr.common.cloud.Slice) ArrayList(java.util.ArrayList) Replica(org.apache.solr.common.cloud.Replica) CloudDescriptor(org.apache.solr.cloud.CloudDescriptor)

Example 20 with ZkController

use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.

the class PeerSync method msg.

// start of peersync related debug messages.  includes the core name for correlation.
private String msg() {
    ZkController zkController = uhandler.core.getCoreContainer().getZkController();
    String myURL = "";
    if (zkController != null) {
        myURL = zkController.getBaseUrl();
    }
    // TODO: core name turns up blank in many tests - find URL if cloud enabled?
    return "PeerSync: core=" + uhandler.core.getName() + " url=" + myURL + " ";
}
Also used : ZkController(org.apache.solr.cloud.ZkController)

Aggregations

ZkController (org.apache.solr.cloud.ZkController)26 SolrException (org.apache.solr.common.SolrException)12 ArrayList (java.util.ArrayList)7 SolrParams (org.apache.solr.common.params.SolrParams)7 HashMap (java.util.HashMap)6 IOException (java.io.IOException)5 CloudDescriptor (org.apache.solr.cloud.CloudDescriptor)5 ClusterState (org.apache.solr.common.cloud.ClusterState)5 Replica (org.apache.solr.common.cloud.Replica)4 Slice (org.apache.solr.common.cloud.Slice)4 SolrCore (org.apache.solr.core.SolrCore)4 List (java.util.List)3 ExecutorService (java.util.concurrent.ExecutorService)3 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)3 NamedList (org.apache.solr.common.util.NamedList)3 URI (java.net.URI)2 Map (java.util.Map)2 ExecutionException (java.util.concurrent.ExecutionException)2 FilterConfig (javax.servlet.FilterConfig)2 ServletContext (javax.servlet.ServletContext)2