Search in sources :

Example 11 with ZooKeeperException

use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.

the class RecoveryStrategy method run.

@Override
public final void run() {
    // set request info for logging
    try (SolrCore core = cc.getCore(coreName)) {
        if (core == null) {
            SolrException.log(LOG, "SolrCore not found - cannot recover:" + coreName);
            return;
        }
        MDCLoggingContext.setCore(core);
        LOG.info("Starting recovery process. recoveringAfterStartup=" + recoveringAfterStartup);
        try {
            doRecovery(core);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            SolrException.log(LOG, "", e);
            throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
        } catch (Exception e) {
            LOG.error("", e);
            throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
        }
    } finally {
        MDCLoggingContext.clear();
    }
}
Also used : ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) SolrCore(org.apache.solr.core.SolrCore) SolrServerException(org.apache.solr.client.solrj.SolrServerException) SolrException(org.apache.solr.common.SolrException) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) SocketTimeoutException(java.net.SocketTimeoutException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 12 with ZooKeeperException

use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.

the class ZkIndexSchemaReader method createSchemaWatcher.

public void createSchemaWatcher() {
    log.info("Creating ZooKeeper watch for the managed schema at " + managedSchemaPath);
    try {
        zkClient.exists(managedSchemaPath, new Watcher() {

            @Override
            public void process(WatchedEvent event) {
                if (ZkIndexSchemaReader.this.isRemoved) {
                    // the core for this reader has already been removed, don't process this event
                    return;
                }
                // session events are not change events, and do not remove the watcher
                if (Event.EventType.None.equals(event.getType())) {
                    return;
                }
                log.info("A schema change: {}, has occurred - updating schema from ZooKeeper ...", event);
                try {
                    updateSchema(this, -1);
                } catch (KeeperException e) {
                    if (e.code() == KeeperException.Code.SESSIONEXPIRED || e.code() == KeeperException.Code.CONNECTIONLOSS) {
                        log.warn("ZooKeeper watch triggered, but Solr cannot talk to ZK");
                        return;
                    }
                    log.error("", e);
                    throw new ZooKeeperException(ErrorCode.SERVER_ERROR, "", e);
                } catch (InterruptedException e) {
                    // Restore the interrupted status
                    Thread.currentThread().interrupt();
                    log.warn("", e);
                }
            }
        }, true);
    } catch (KeeperException e) {
        final String msg = "Error creating ZooKeeper watch for the managed schema";
        log.error(msg, e);
        throw new ZooKeeperException(ErrorCode.SERVER_ERROR, msg, e);
    } catch (InterruptedException e) {
        // Restore the interrupted status
        Thread.currentThread().interrupt();
        log.warn("", e);
    }
}
Also used : WatchedEvent(org.apache.zookeeper.WatchedEvent) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) Watcher(org.apache.zookeeper.Watcher) KeeperException(org.apache.zookeeper.KeeperException) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException)

Example 13 with ZooKeeperException

use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.

the class ZkController method register.

/**
   * Register shard with ZooKeeper.
   *
   * @return the shardId for the SolrCore
   */
public String register(String coreName, final CoreDescriptor desc, boolean recoverReloadedCores, boolean afterExpiration, boolean skipRecovery) throws Exception {
    try (SolrCore core = cc.getCore(desc.getName())) {
        MDCLoggingContext.setCore(core);
    }
    try {
        // pre register has published our down state
        final String baseUrl = getBaseUrl();
        final CloudDescriptor cloudDesc = desc.getCloudDescriptor();
        final String collection = cloudDesc.getCollectionName();
        final String coreZkNodeName = desc.getCloudDescriptor().getCoreNodeName();
        assert coreZkNodeName != null : "we should have a coreNodeName by now";
        String shardId = cloudDesc.getShardId();
        Map<String, Object> props = new HashMap<>();
        // we only put a subset of props into the leader node
        props.put(ZkStateReader.BASE_URL_PROP, baseUrl);
        props.put(ZkStateReader.CORE_NAME_PROP, coreName);
        props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
        log.debug("Register replica - core:{} address:{} collection:{} shard:{}", coreName, baseUrl, cloudDesc.getCollectionName(), shardId);
        ZkNodeProps leaderProps = new ZkNodeProps(props);
        try {
            // If we're a preferred leader, insert ourselves at the head of the queue
            boolean joinAtHead = false;
            Replica replica = zkStateReader.getClusterState().getReplica(collection, coreZkNodeName);
            if (replica != null) {
                joinAtHead = replica.getBool(SliceMutator.PREFERRED_LEADER_PROP, false);
            }
            //TODO WHy would replica be null?
            if (replica == null || replica.getType() != Type.PULL) {
                joinElection(desc, afterExpiration, joinAtHead);
            } else if (replica.getType() == Type.PULL) {
                if (joinAtHead) {
                    log.warn("Replica {} was designated as preferred leader but it's type is {}, It won't join election", coreZkNodeName, Type.PULL);
                }
                log.debug("Replica {} skipping election because it's type is {}", coreZkNodeName, Type.PULL);
                startReplicationFromLeader(coreName, false);
            }
        } catch (InterruptedException e) {
            // Restore the interrupted status
            Thread.currentThread().interrupt();
            throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
        } catch (KeeperException | IOException e) {
            throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
        }
        // in this case, we want to wait for the leader as long as the leader might
        // wait for a vote, at least - but also long enough that a large cluster has
        // time to get its act together
        String leaderUrl = getLeader(cloudDesc, leaderVoteWait + 600000);
        String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
        log.debug("We are " + ourUrl + " and leader is " + leaderUrl);
        boolean isLeader = leaderUrl.equals(ourUrl);
        Replica.Type replicaType = zkStateReader.getClusterState().getCollection(collection).getReplica(coreZkNodeName).getType();
        assert !(isLeader && replicaType == Type.PULL) : "Pull replica became leader!";
        try (SolrCore core = cc.getCore(desc.getName())) {
            // recover from local transaction log and wait for it to complete before
            // going active
            // TODO: should this be moved to another thread? To recoveryStrat?
            // TODO: should this actually be done earlier, before (or as part of)
            // leader election perhaps?
            UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
            boolean isTlogReplicaAndNotLeader = replicaType == Replica.Type.TLOG && !isLeader;
            if (isTlogReplicaAndNotLeader) {
                String commitVersion = ReplicateFromLeader.getCommitVersion(core);
                if (commitVersion != null) {
                    ulog.copyOverOldUpdates(Long.parseLong(commitVersion));
                }
            }
            // we will call register again after zk expiration and on reload
            if (!afterExpiration && !core.isReloaded() && ulog != null && !isTlogReplicaAndNotLeader) {
                // disable recovery in case shard is in construction state (for shard splits)
                Slice slice = getClusterState().getSlice(collection, shardId);
                if (slice.getState() != Slice.State.CONSTRUCTION || !isLeader) {
                    Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler().getUpdateLog().recoverFromLog();
                    if (recoveryFuture != null) {
                        log.info("Replaying tlog for " + ourUrl + " during startup... NOTE: This can take a while.");
                        // NOTE: this could potentially block for
                        recoveryFuture.get();
                    // minutes or more!
                    // TODO: public as recovering in the mean time?
                    // TODO: in the future we could do peersync in parallel with recoverFromLog
                    } else {
                        log.debug("No LogReplay needed for core={} baseURL={}", core.getName(), baseUrl);
                    }
                }
            }
            boolean didRecovery = checkRecovery(recoverReloadedCores, isLeader, skipRecovery, collection, coreZkNodeName, core, cc, afterExpiration);
            if (!didRecovery) {
                if (isTlogReplicaAndNotLeader) {
                    startReplicationFromLeader(coreName, true);
                }
                publish(desc, Replica.State.ACTIVE);
            }
            core.getCoreDescriptor().getCloudDescriptor().setHasRegistered(true);
        }
        // make sure we have an update cluster state right away
        zkStateReader.forceUpdateCollection(collection);
        return shardId;
    } finally {
        MDCLoggingContext.clear();
    }
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) SolrCore(org.apache.solr.core.SolrCore) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) IOException(java.io.IOException) Type(org.apache.solr.common.cloud.Replica.Type) Replica(org.apache.solr.common.cloud.Replica) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) Slice(org.apache.solr.common.cloud.Slice) UpdateLog(org.apache.solr.update.UpdateLog) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) KeeperException(org.apache.zookeeper.KeeperException)

Example 14 with ZooKeeperException

use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.

the class TestZkChroot method testNoBootstrapConf.

@Test
public void testNoBootstrapConf() throws Exception {
    String chroot = "/foo/bar2";
    System.setProperty("bootstrap_conf", "false");
    System.setProperty("zkHost", zkServer.getZkHost() + chroot);
    SolrZkClient zkClient = null;
    try {
        zkClient = new SolrZkClient(zkServer.getZkHost(), AbstractZkTestCase.TIMEOUT);
        assertFalse("Path '" + chroot + "' should not exist before the test", zkClient.exists(chroot, true));
        cores = CoreContainer.createAndLoad(home);
        fail("There should be a zk exception, as the initial path doesn't exist");
    } catch (ZooKeeperException e) {
        // expected
        assertFalse("Path shouldn't have been created", // check the path was not created
        zkClient.exists(chroot, true));
    } finally {
        if (cores != null)
            cores.shutdown();
        if (zkClient != null)
            zkClient.close();
    }
}
Also used : ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) Test(org.junit.Test)

Aggregations

ZooKeeperException (org.apache.solr.common.cloud.ZooKeeperException)14 KeeperException (org.apache.zookeeper.KeeperException)8 SolrException (org.apache.solr.common.SolrException)6 IOException (java.io.IOException)5 Replica (org.apache.solr.common.cloud.Replica)5 ArrayList (java.util.ArrayList)4 Slice (org.apache.solr.common.cloud.Slice)4 ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)4 Node (org.apache.solr.update.SolrCmdDistributor.Node)4 RetryNode (org.apache.solr.update.SolrCmdDistributor.RetryNode)4 StdNode (org.apache.solr.update.SolrCmdDistributor.StdNode)4 DocCollection (org.apache.solr.common.cloud.DocCollection)3 HashMap (java.util.HashMap)2 List (java.util.List)2 ClusterState (org.apache.solr.common.cloud.ClusterState)2 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)2 SolrCore (org.apache.solr.core.SolrCore)2 SocketTimeoutException (java.net.SocketTimeoutException)1 Path (java.nio.file.Path)1 EnumSet (java.util.EnumSet)1