Search in sources :

Example 46 with KeeperException

use of org.apache.zookeeper.KeeperException in project lucene-solr by apache.

the class DeleteShardCmd method call.

@Override
public void call(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
    String collectionName = message.getStr(ZkStateReader.COLLECTION_PROP);
    String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
    log.info("Delete shard invoked");
    Slice slice = clusterState.getSlice(collectionName, sliceId);
    if (slice == null) {
        if (clusterState.hasCollection(collectionName)) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No shard with name " + sliceId + " exists for collection " + collectionName);
        } else {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collectionName);
        }
    }
    // For now, only allow for deletions of Inactive slices or custom hashes (range==null).
    // TODO: Add check for range gaps on Slice deletion
    final Slice.State state = slice.getState();
    if (!(slice.getRange() == null || state == Slice.State.INACTIVE || state == Slice.State.RECOVERY || state == Slice.State.CONSTRUCTION) || state == Slice.State.RECOVERY_FAILED) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The slice: " + slice.getName() + " is currently " + state + ". Only non-active (or custom-hashed) slices can be deleted.");
    }
    if (state == Slice.State.RECOVERY) {
        // mark the slice as 'construction' and only then try to delete the cores
        // see SOLR-9455
        DistributedQueue inQueue = Overseer.getStateUpdateQueue(ocmh.zkStateReader.getZkClient());
        Map<String, Object> propMap = new HashMap<>();
        propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
        propMap.put(sliceId, Slice.State.CONSTRUCTION.toString());
        propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
        ZkNodeProps m = new ZkNodeProps(propMap);
        inQueue.offer(Utils.toJSON(m));
    }
    String asyncId = message.getStr(ASYNC);
    try {
        List<ZkNodeProps> replicas = getReplicasForSlice(collectionName, slice);
        CountDownLatch cleanupLatch = new CountDownLatch(replicas.size());
        for (ZkNodeProps r : replicas) {
            final ZkNodeProps replica = r.plus(message.getProperties()).plus("parallel", "true").plus(ASYNC, asyncId);
            log.info("Deleting replica for collection={} shard={} on node={}", replica.getStr(COLLECTION_PROP), replica.getStr(SHARD_ID_PROP), replica.getStr(CoreAdminParams.NODE));
            NamedList deleteResult = new NamedList();
            try {
                ((DeleteReplicaCmd) ocmh.commandMap.get(DELETEREPLICA)).deleteReplica(clusterState, replica, deleteResult, () -> {
                    cleanupLatch.countDown();
                    if (deleteResult.get("failure") != null) {
                        synchronized (results) {
                            results.add("failure", String.format(Locale.ROOT, "Failed to delete replica for collection=%s shard=%s" + " on node=%s", replica.getStr(COLLECTION_PROP), replica.getStr(SHARD_ID_PROP), replica.getStr(NODE_NAME_PROP)));
                        }
                    }
                    SimpleOrderedMap success = (SimpleOrderedMap) deleteResult.get("success");
                    if (success != null) {
                        synchronized (results) {
                            results.add("success", success);
                        }
                    }
                });
            } catch (KeeperException e) {
                log.warn("Error deleting replica: " + r, e);
                cleanupLatch.countDown();
            } catch (Exception e) {
                log.warn("Error deleting replica: " + r, e);
                cleanupLatch.countDown();
                throw e;
            }
        }
        log.debug("Waiting for delete shard action to complete");
        cleanupLatch.await(5, TimeUnit.MINUTES);
        ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETESHARD.toLower(), ZkStateReader.COLLECTION_PROP, collectionName, ZkStateReader.SHARD_ID_PROP, sliceId);
        ZkStateReader zkStateReader = ocmh.zkStateReader;
        Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
        // wait for a while until we don't see the shard
        TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
        boolean removed = false;
        while (!timeout.hasTimedOut()) {
            Thread.sleep(100);
            DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
            removed = collection.getSlice(sliceId) == null;
            if (removed) {
                // just a bit of time so it's more likely other readers see on return
                Thread.sleep(100);
                break;
            }
        }
        if (!removed) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not fully remove collection: " + collectionName + " shard: " + sliceId);
        }
        log.info("Successfully deleted collection: " + collectionName + ", shard: " + sliceId);
    } catch (SolrException e) {
        throw e;
    } catch (Exception e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collectionName + " shard: " + sliceId, e);
    }
}
Also used : HashMap(java.util.HashMap) NamedList(org.apache.solr.common.util.NamedList) TimeOut(org.apache.solr.util.TimeOut) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) CountDownLatch(java.util.concurrent.CountDownLatch) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Slice(org.apache.solr.common.cloud.Slice) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException)

Example 47 with KeeperException

use of org.apache.zookeeper.KeeperException in project lucene-solr by apache.

the class LeaderElector method joinElection.

/**
     * Begin participating in the election process. Gets a new sequential number
     * and begins watching the node with the sequence number before it, unless it
     * is the lowest number, in which case, initiates the leader process. If the
     * node that is watched goes down, check if we are the new lowest node, else
     * watch the next lowest numbered node.
     *
     * @return sequential node number
     */
public int joinElection(ElectionContext context, boolean replacement, boolean joinAtHead) throws KeeperException, InterruptedException, IOException {
    context.joinedElectionFired();
    final String shardsElectZkPath = context.electionPath + LeaderElector.ELECTION_NODE;
    long sessionId = zkClient.getSolrZooKeeper().getSessionId();
    String id = sessionId + "-" + context.id;
    String leaderSeqPath = null;
    boolean cont = true;
    int tries = 0;
    while (cont) {
        try {
            if (joinAtHead) {
                log.debug("Node {} trying to join election at the head", id);
                List<String> nodes = OverseerTaskProcessor.getSortedElectionNodes(zkClient, shardsElectZkPath);
                if (nodes.size() < 2) {
                    leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", null, CreateMode.EPHEMERAL_SEQUENTIAL, false);
                } else {
                    String firstInLine = nodes.get(1);
                    log.debug("The current head: {}", firstInLine);
                    Matcher m = LEADER_SEQ.matcher(firstInLine);
                    if (!m.matches()) {
                        throw new IllegalStateException("Could not find regex match in:" + firstInLine);
                    }
                    leaderSeqPath = shardsElectZkPath + "/" + id + "-n_" + m.group(1);
                    zkClient.create(leaderSeqPath, null, CreateMode.EPHEMERAL, false);
                }
            } else {
                leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", null, CreateMode.EPHEMERAL_SEQUENTIAL, false);
            }
            log.debug("Joined leadership election with path: {}", leaderSeqPath);
            context.leaderSeqPath = leaderSeqPath;
            cont = false;
        } catch (ConnectionLossException e) {
            // we don't know if we made our node or not...
            List<String> entries = zkClient.getChildren(shardsElectZkPath, null, true);
            boolean foundId = false;
            for (String entry : entries) {
                String nodeId = getNodeId(entry);
                if (id.equals(nodeId)) {
                    // we did create our node...
                    foundId = true;
                    break;
                }
            }
            if (!foundId) {
                cont = true;
                if (tries++ > 20) {
                    throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
                }
                try {
                    Thread.sleep(50);
                } catch (InterruptedException e2) {
                    Thread.currentThread().interrupt();
                }
            }
        } catch (KeeperException.NoNodeException e) {
            // be working on it, lets try again
            if (tries++ > 20) {
                context = null;
                throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
            }
            cont = true;
            try {
                Thread.sleep(50);
            } catch (InterruptedException e2) {
                Thread.currentThread().interrupt();
            }
        }
    }
    checkIfIamLeader(context, replacement);
    return getSeq(context.leaderSeqPath);
}
Also used : Matcher(java.util.regex.Matcher) ConnectionLossException(org.apache.zookeeper.KeeperException.ConnectionLossException) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) List(java.util.List) KeeperException(org.apache.zookeeper.KeeperException) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException)

Example 48 with KeeperException

use of org.apache.zookeeper.KeeperException in project lucene-solr by apache.

the class CreateCollectionCmd method createCollectionZkNode.

public static void createCollectionZkNode(SolrZkClient zkClient, String collection, Map<String, String> params) {
    log.debug("Check for collection zkNode:" + collection);
    String collectionPath = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection;
    try {
        if (!zkClient.exists(collectionPath, true)) {
            log.debug("Creating collection in ZooKeeper:" + collection);
            try {
                Map<String, Object> collectionProps = new HashMap<>();
                // TODO: if collection.configName isn't set, and there isn't already a conf in zk, just use that?
                String defaultConfigName = System.getProperty(ZkController.COLLECTION_PARAM_PREFIX + ZkController.CONFIGNAME_PROP, collection);
                if (params.size() > 0) {
                    collectionProps.putAll(params);
                    // if the config name wasn't passed in, use the default
                    if (!collectionProps.containsKey(ZkController.CONFIGNAME_PROP)) {
                        // users can create the collection node and conf link ahead of time, or this may return another option
                        getConfName(zkClient, collection, collectionPath, collectionProps);
                    }
                } else if (System.getProperty("bootstrap_confdir") != null) {
                    // if we are bootstrapping a collection, default the config for
                    // a new collection to the collection we are bootstrapping
                    log.info("Setting config for collection:" + collection + " to " + defaultConfigName);
                    Properties sysProps = System.getProperties();
                    for (String sprop : System.getProperties().stringPropertyNames()) {
                        if (sprop.startsWith(ZkController.COLLECTION_PARAM_PREFIX)) {
                            collectionProps.put(sprop.substring(ZkController.COLLECTION_PARAM_PREFIX.length()), sysProps.getProperty(sprop));
                        }
                    }
                    // if the config name wasn't passed in, use the default
                    if (!collectionProps.containsKey(ZkController.CONFIGNAME_PROP))
                        collectionProps.put(ZkController.CONFIGNAME_PROP, defaultConfigName);
                } else if (Boolean.getBoolean("bootstrap_conf")) {
                    // the conf name should should be the collection name of this core
                    collectionProps.put(ZkController.CONFIGNAME_PROP, collection);
                } else {
                    getConfName(zkClient, collection, collectionPath, collectionProps);
                }
                // we don't put numShards in the collections properties
                collectionProps.remove(ZkStateReader.NUM_SHARDS_PROP);
                ZkNodeProps zkProps = new ZkNodeProps(collectionProps);
                zkClient.makePath(collectionPath, Utils.toJSON(zkProps), CreateMode.PERSISTENT, null, true);
            } catch (KeeperException e) {
                // it's okay if the node already exists
                if (e.code() != KeeperException.Code.NODEEXISTS) {
                    throw e;
                }
            }
        } else {
            log.debug("Collection zkNode exists");
        }
    } catch (KeeperException e) {
        // it's okay if another beats us creating the node
        if (e.code() == KeeperException.Code.NODEEXISTS) {
            return;
        }
        throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
    } catch (InterruptedException e) {
        Thread.interrupted();
        throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
    }
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) StrUtils.formatString(org.apache.solr.common.util.StrUtils.formatString) Properties(java.util.Properties) KeeperException(org.apache.zookeeper.KeeperException) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) SolrException(org.apache.solr.common.SolrException)

Example 49 with KeeperException

use of org.apache.zookeeper.KeeperException in project lucene-solr by apache.

the class CdcrBufferStateManager method createStateNode.

private void createStateNode() {
    SolrZkClient zkClient = core.getCoreContainer().getZkController().getZkClient();
    try {
        if (!zkClient.exists(this.getZnodePath(), true)) {
            if (!zkClient.exists(this.getZnodeBase(), true)) {
                // Should be a no-op if node exists
                zkClient.makePath(this.getZnodeBase(), null, CreateMode.PERSISTENT, null, false, true);
            }
            zkClient.create(this.getZnodePath(), DEFAULT_STATE.getBytes(), CreateMode.PERSISTENT, true);
            log.info("Created znode {}", this.getZnodePath());
        }
    } catch (KeeperException.NodeExistsException ne) {
    // Someone got in first and created the node.
    } catch (KeeperException | InterruptedException e) {
        log.warn("Failed to create CDCR buffer state node", e);
    }
}
Also used : SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) KeeperException(org.apache.zookeeper.KeeperException)

Example 50 with KeeperException

use of org.apache.zookeeper.KeeperException in project lucene-solr by apache.

the class CdcrProcessStateManager method synchronize.

/**
   * Synchronise the state to Zookeeper. This method must be called only by the handler receiving the
   * action.
   */
void synchronize() {
    SolrZkClient zkClient = core.getCoreContainer().getZkController().getZkClient();
    try {
        zkClient.setData(this.getZnodePath(), this.getState().getBytes(), true);
        // check if nobody changed it in the meantime, and set a new watcher
        this.setState(CdcrParams.ProcessState.get(zkClient.getData(this.getZnodePath(), watcher, null, true)));
    } catch (KeeperException | InterruptedException e) {
        log.warn("Failed synchronising new state", e);
    }
}
Also used : SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

KeeperException (org.apache.zookeeper.KeeperException)566 IOException (java.io.IOException)188 Stat (org.apache.zookeeper.data.Stat)127 ZooKeeper (org.apache.zookeeper.ZooKeeper)87 ArrayList (java.util.ArrayList)51 NoNodeException (org.apache.zookeeper.KeeperException.NoNodeException)45 Watcher (org.apache.zookeeper.Watcher)39 WatchedEvent (org.apache.zookeeper.WatchedEvent)38 Test (org.junit.jupiter.api.Test)38 CountDownLatch (java.util.concurrent.CountDownLatch)30 SolrException (org.apache.solr.common.SolrException)30 HashMap (java.util.HashMap)29 List (java.util.List)28 ACL (org.apache.zookeeper.data.ACL)27 Test (org.junit.Test)27 HeliosRuntimeException (com.spotify.helios.common.HeliosRuntimeException)25 ServerName (org.apache.hadoop.hbase.ServerName)24 Map (java.util.Map)23 IZooReaderWriter (org.apache.accumulo.fate.zookeeper.IZooReaderWriter)23 InterruptedIOException (java.io.InterruptedIOException)20