Search in sources :

Example 21 with ZkController

use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.

the class QueryElevationComponent method getElevationMap.

//get the elevation map from the data dir
Map<String, ElevationObj> getElevationMap(IndexReader reader, SolrCore core) throws Exception {
    synchronized (elevationCache) {
        Map<String, ElevationObj> map = elevationCache.get(null);
        if (map != null)
            return map;
        map = elevationCache.get(reader);
        if (map == null) {
            String f = initArgs.get(CONFIG_FILE);
            if (f == null) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "QueryElevationComponent must specify argument: " + CONFIG_FILE);
            }
            log.info("Loading QueryElevation from data dir: " + f);
            Config cfg;
            ZkController zkController = core.getCoreContainer().getZkController();
            if (zkController != null) {
                cfg = new Config(core.getResourceLoader(), f, null, null);
            } else {
                InputStream is = VersionedFile.getLatestFile(core.getDataDir(), f);
                cfg = new Config(core.getResourceLoader(), f, new InputSource(is), null);
            }
            map = loadElevationMap(cfg);
            elevationCache.put(reader, map);
        }
        return map;
    }
}
Also used : InputSource(org.xml.sax.InputSource) Config(org.apache.solr.core.Config) ZkController(org.apache.solr.cloud.ZkController) InputStream(java.io.InputStream) SolrException(org.apache.solr.common.SolrException)

Example 22 with ZkController

use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.

the class TestInPlaceUpdatesDistrib method delayedReorderingFetchesMissingUpdateFromLeaderTest.

private void delayedReorderingFetchesMissingUpdateFromLeaderTest() throws Exception {
    clearIndex();
    commit();
    float inplace_updatable_float = 1F;
    buildRandomIndex(inplace_updatable_float, Collections.singletonList(1));
    float newinplace_updatable_float = 100F;
    List<UpdateRequest> updates = new ArrayList<>();
    updates.add(regularUpdateRequest("id", 1, "title_s", "title1_new", "id_i", 1, "inplace_updatable_float", newinplace_updatable_float));
    updates.add(regularUpdateRequest("id", 1, "inplace_updatable_float", map("inc", 1)));
    updates.add(regularUpdateRequest("id", 1, "inplace_updatable_float", map("inc", 1)));
    // The next request to replica2 will be delayed by 6 secs (timeout is 5s)
    shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 1, 6000);
    ExecutorService threadpool = ExecutorUtil.newMDCAwareFixedThreadPool(updates.size() + 1, new DefaultSolrThreadFactory(getTestName()));
    for (UpdateRequest update : updates) {
        AsyncUpdateWithRandomCommit task = new AsyncUpdateWithRandomCommit(update, cloudClient, random().nextLong());
        threadpool.submit(task);
        // while we can't guarantee/trust what order the updates are executed in, since multiple threads
        // are involved, but we're trying to bias the thread scheduling to run them in the order submitted
        Thread.sleep(100);
    }
    threadpool.shutdown();
    assertTrue("Thread pool didn't terminate within 15 secs", threadpool.awaitTermination(15, TimeUnit.SECONDS));
    commit();
    // Check every 10ms, 100 times, for a replica to go down (& assert that it doesn't)
    for (int i = 0; i < 100; i++) {
        Thread.sleep(10);
        cloudClient.getZkStateReader().forceUpdateCollection(DEFAULT_COLLECTION);
        ClusterState state = cloudClient.getZkStateReader().getClusterState();
        int numActiveReplicas = 0;
        for (Replica rep : state.getCollection(DEFAULT_COLLECTION).getSlice(SHARD1).getReplicas()) if (rep.getState().equals(Replica.State.ACTIVE))
            numActiveReplicas++;
        assertEquals("The replica receiving reordered updates must not have gone down", 3, numActiveReplicas);
    }
    for (SolrClient client : clients) {
        log.info("Testing client (Fetch missing test): " + ((HttpSolrClient) client).getBaseURL());
        log.info("Version at " + ((HttpSolrClient) client).getBaseURL() + " is: " + getReplicaValue(client, 1, "_version_"));
        assertReplicaValue(client, 1, "inplace_updatable_float", (newinplace_updatable_float + 2.0f), "inplace_updatable_float didn't match for replica at client: " + ((HttpSolrClient) client).getBaseURL());
        assertReplicaValue(client, 1, "title_s", "title1_new", "Title didn't match for replica at client: " + ((HttpSolrClient) client).getBaseURL());
    }
    // Try another round of these updates, this time with a delete request at the end.
    // This is to ensure that the fetch missing update from leader doesn't bomb out if the 
    // document has been deleted on the leader later on
    {
        clearIndex();
        commit();
        shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().unsetDelay();
        updates.add(regularDeleteRequest(1));
        // the first update
        shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 1, 5999);
        // the delete update
        shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 4, 5998);
        threadpool = ExecutorUtil.newMDCAwareFixedThreadPool(updates.size() + 1, new DefaultSolrThreadFactory(getTestName()));
        for (UpdateRequest update : updates) {
            AsyncUpdateWithRandomCommit task = new AsyncUpdateWithRandomCommit(update, cloudClient, random().nextLong());
            threadpool.submit(task);
            // while we can't guarantee/trust what order the updates are executed in, since multiple threads
            // are involved, but we're trying to bias the thread scheduling to run them in the order submitted
            Thread.sleep(100);
        }
        threadpool.shutdown();
        assertTrue("Thread pool didn't terminate within 15 secs", threadpool.awaitTermination(15, TimeUnit.SECONDS));
        commit();
        // TODO: Could try checking ZK for LIR flags to ensure LIR has not kicked in
        // Check every 10ms, 100 times, for a replica to go down (& assert that it doesn't)
        ZkController zkController = shardToLeaderJetty.get(SHARD1).jetty.getCoreContainer().getZkController();
        String lirPath = zkController.getLeaderInitiatedRecoveryZnodePath(DEFAULT_TEST_COLLECTION_NAME, SHARD1);
        assertFalse(zkController.getZkClient().exists(lirPath, true));
        for (int i = 0; i < 100; i++) {
            Thread.sleep(10);
            cloudClient.getZkStateReader().forceUpdateCollection(DEFAULT_COLLECTION);
            ClusterState state = cloudClient.getZkStateReader().getClusterState();
            int numActiveReplicas = 0;
            for (Replica rep : state.getCollection(DEFAULT_COLLECTION).getSlice(SHARD1).getReplicas()) if (rep.getState().equals(Replica.State.ACTIVE))
                numActiveReplicas++;
            assertEquals("The replica receiving reordered updates must not have gone down", 3, numActiveReplicas);
        }
        for (SolrClient client : new SolrClient[] { LEADER, NONLEADERS.get(0), NONLEADERS.get(1) }) {
            // nonleader 0 re-ordered replica, nonleader 1 well-ordered replica
            SolrDocument doc = client.getById(String.valueOf(1), params("distrib", "false"));
            assertNull("This doc was supposed to have been deleted, but was: " + doc, doc);
        }
    }
    log.info("delayedReorderingFetchesMissingUpdateFromLeaderTest: This test passed fine...");
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) ArrayList(java.util.ArrayList) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) Replica(org.apache.solr.common.cloud.Replica) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) SolrDocument(org.apache.solr.common.SolrDocument) SolrClient(org.apache.solr.client.solrj.SolrClient) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ZkController(org.apache.solr.cloud.ZkController) ExecutorService(java.util.concurrent.ExecutorService)

Example 23 with ZkController

use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.

the class ManagedIndexSchema method persistManagedSchemaToZooKeeper.

/**
   * Persists the managed schema to ZooKeeper using optimistic concurrency.
   * <p/>
   * If createOnly is true, success is when the schema is created or if it previously existed.
   * <p/>
   * If createOnly is false, success is when the schema is persisted - this will only happen
   * if schemaZkVersion matches the version in ZooKeeper.
   * 
   * @return true on success 
   */
boolean persistManagedSchemaToZooKeeper(boolean createOnly) {
    final ZkSolrResourceLoader zkLoader = (ZkSolrResourceLoader) loader;
    final ZkController zkController = zkLoader.getZkController();
    final SolrZkClient zkClient = zkController.getZkClient();
    final String managedSchemaPath = zkLoader.getConfigSetZkPath() + "/" + managedSchemaResourceName;
    boolean success = true;
    boolean schemaChangedInZk = false;
    try {
        // Persist the managed schema
        StringWriter writer = new StringWriter();
        persist(writer);
        final byte[] data = writer.toString().getBytes(StandardCharsets.UTF_8);
        if (createOnly) {
            try {
                zkClient.create(managedSchemaPath, data, CreateMode.PERSISTENT, true);
                schemaZkVersion = 0;
                log.info("Created and persisted managed schema znode at " + managedSchemaPath);
            } catch (KeeperException.NodeExistsException e) {
                // This is okay - do nothing and fall through
                log.info("Managed schema znode at " + managedSchemaPath + " already exists - no need to create it");
            }
        } else {
            try {
                // Assumption: the path exists
                Stat stat = zkClient.setData(managedSchemaPath, data, schemaZkVersion, true);
                schemaZkVersion = stat.getVersion();
                log.info("Persisted managed schema version " + schemaZkVersion + " at " + managedSchemaPath);
            } catch (KeeperException.BadVersionException e) {
                log.error("Bad version when trying to persist schema using " + schemaZkVersion + " due to: " + e);
                success = false;
                schemaChangedInZk = true;
            }
        }
    } catch (Exception e) {
        if (e instanceof InterruptedException) {
            // Restore the interrupted status
            Thread.currentThread().interrupt();
        }
        final String msg = "Error persisting managed schema at " + managedSchemaPath;
        log.error(msg, e);
        throw new SolrException(ErrorCode.SERVER_ERROR, msg, e);
    }
    if (schemaChangedInZk) {
        String msg = "Failed to persist managed schema at " + managedSchemaPath + " - version mismatch";
        log.info(msg);
        throw new SchemaChangedInZkException(ErrorCode.CONFLICT, msg + ", retry.");
    }
    return success;
}
Also used : SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Stat(org.apache.zookeeper.data.Stat) StringWriter(java.io.StringWriter) ZkController(org.apache.solr.cloud.ZkController) ZkSolrResourceLoader(org.apache.solr.cloud.ZkSolrResourceLoader) KeeperException(org.apache.zookeeper.KeeperException) SolrException(org.apache.solr.common.SolrException)

Example 24 with ZkController

use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.

the class CdcrRequestHandler method handleCollectionCheckpointAction.

/**
   * This action is generally executed on the target cluster in order to retrieve the latest update checkpoint.
   * This checkpoint is used on the source cluster to setup the
   * {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader} of a shard leader. <br/>
   * This method will execute in parallel one
   * {@link org.apache.solr.handler.CdcrParams.CdcrAction#SHARDCHECKPOINT} request per shard leader. It will
   * then pick the lowest version number as checkpoint. Picking the lowest amongst all shards will ensure that we do not
   * pick a checkpoint that is ahead of the source cluster. This can occur when other shard leaders are sending new
   * updates to the target cluster while we are currently instantiating the
   * {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader}.
   * This solution only works in scenarios where the topology of the source and target clusters are identical.
   */
private void handleCollectionCheckpointAction(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
    ZkController zkController = core.getCoreContainer().getZkController();
    try {
        zkController.getZkStateReader().forceUpdateCollection(collection);
    } catch (Exception e) {
        log.warn("Error when updating cluster state", e);
    }
    ClusterState cstate = zkController.getClusterState();
    Collection<Slice> shards = cstate.getActiveSlices(collection);
    ExecutorService parallelExecutor = ExecutorUtil.newMDCAwareCachedThreadPool(new DefaultSolrThreadFactory("parallelCdcrExecutor"));
    long checkpoint = Long.MAX_VALUE;
    try {
        List<Callable<Long>> callables = new ArrayList<>();
        for (Slice shard : shards) {
            ZkNodeProps leaderProps = zkController.getZkStateReader().getLeaderRetry(collection, shard.getName());
            ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(leaderProps);
            callables.add(new SliceCheckpointCallable(nodeProps.getCoreUrl(), path));
        }
        for (final Future<Long> future : parallelExecutor.invokeAll(callables)) {
            long version = future.get();
            if (version < checkpoint) {
                // we must take the lowest checkpoint from all the shards
                checkpoint = version;
            }
        }
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
    } catch (ExecutionException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
    } finally {
        parallelExecutor.shutdown();
    }
    rsp.add(CdcrParams.CHECKPOINT, checkpoint);
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) SolrServerException(org.apache.solr.client.solrj.SolrServerException) SolrException(org.apache.solr.common.SolrException) CancellationException(java.util.concurrent.CancellationException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Callable(java.util.concurrent.Callable) ZkController(org.apache.solr.cloud.ZkController) Slice(org.apache.solr.common.cloud.Slice) ExecutorService(java.util.concurrent.ExecutorService) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) ExecutionException(java.util.concurrent.ExecutionException) SolrException(org.apache.solr.common.SolrException)

Example 25 with ZkController

use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.

the class DocExpirationUpdateProcessorFactory method iAmInChargeOfPeriodicDeletes.

/**
   * <p>
   * Helper method that returns true if the Runnable managed by this factory 
   * should be responsible of doing periodical deletes.
   * </p>
   * <p>
   * In simple standalone installations this method always returns true, 
   * but in cloud mode it will be true if and only if we are currently the leader 
   * of the (active) slice with the first name (lexicographically).
   * </p>
   * <p>
   * If this method returns false, it may have also logged a message letting the user 
   * know why we aren't attempting period deletion (but it will attempt to not log 
   * this excessively)
   * </p>
   */
private boolean iAmInChargeOfPeriodicDeletes() {
    ZkController zk = core.getCoreContainer().getZkController();
    if (null == zk)
        return true;
    // This is a lot simpler then doing our own "leader" election across all replicas 
    // of all shards since:
    //   a) we already have a per shard leader
    //   b) shard names must be unique
    //   c) ClusterState is already being "watched" by ZkController, no additional zk hits
    //   d) there might be multiple instances of this factory (in multiple chains) per 
    //      collection, so picking an ephemeral node name for our election would be tricky
    CloudDescriptor desc = core.getCoreDescriptor().getCloudDescriptor();
    String col = desc.getCollectionName();
    List<Slice> slices = new ArrayList<Slice>(zk.getClusterState().getActiveSlices(col));
    Collections.sort(slices, COMPARE_SLICES_BY_NAME);
    if (slices.isEmpty()) {
        log.error("Collection {} has no active Slices?", col);
        return false;
    }
    Replica firstSliceLeader = slices.get(0).getLeader();
    if (null == firstSliceLeader) {
        log.warn("Slice in charge of periodic deletes for {} does not currently have a leader", col);
        return false;
    }
    String leaderInCharge = firstSliceLeader.getName();
    String myCoreNodeName = desc.getCoreNodeName();
    boolean inChargeOfDeletesRightNow = leaderInCharge.equals(myCoreNodeName);
    if (previouslyInChargeOfDeletes && !inChargeOfDeletesRightNow) {
        // don't spam the logs constantly, just log when we know that we're not the guy
        // (the first time -- or anytime we were, but no longer are)
        log.info("Not currently in charge of periodic deletes for this collection, " + "will not trigger delete or log again until this changes");
    }
    previouslyInChargeOfDeletes = inChargeOfDeletesRightNow;
    return inChargeOfDeletesRightNow;
}
Also used : ZkController(org.apache.solr.cloud.ZkController) Slice(org.apache.solr.common.cloud.Slice) ArrayList(java.util.ArrayList) Replica(org.apache.solr.common.cloud.Replica) CloudDescriptor(org.apache.solr.cloud.CloudDescriptor)

Aggregations

ZkController (org.apache.solr.cloud.ZkController)26 SolrException (org.apache.solr.common.SolrException)12 ArrayList (java.util.ArrayList)7 SolrParams (org.apache.solr.common.params.SolrParams)7 HashMap (java.util.HashMap)6 IOException (java.io.IOException)5 CloudDescriptor (org.apache.solr.cloud.CloudDescriptor)5 ClusterState (org.apache.solr.common.cloud.ClusterState)5 Replica (org.apache.solr.common.cloud.Replica)4 Slice (org.apache.solr.common.cloud.Slice)4 SolrCore (org.apache.solr.core.SolrCore)4 List (java.util.List)3 ExecutorService (java.util.concurrent.ExecutorService)3 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)3 NamedList (org.apache.solr.common.util.NamedList)3 URI (java.net.URI)2 Map (java.util.Map)2 ExecutionException (java.util.concurrent.ExecutionException)2 FilterConfig (javax.servlet.FilterConfig)2 ServletContext (javax.servlet.ServletContext)2