Search in sources :

Example 6 with ShardHandler

use of org.apache.solr.handler.component.ShardHandler in project lucene-solr by apache.

the class MigrateCmd method migrateKey.

private void migrateKey(ClusterState clusterState, DocCollection sourceCollection, Slice sourceSlice, DocCollection targetCollection, Slice targetSlice, String splitKey, int timeout, NamedList results, String asyncId, ZkNodeProps message) throws Exception {
    String tempSourceCollectionName = "split_" + sourceSlice.getName() + "_temp_" + targetSlice.getName();
    ZkStateReader zkStateReader = ocmh.zkStateReader;
    if (clusterState.hasCollection(tempSourceCollectionName)) {
        log.info("Deleting temporary collection: " + tempSourceCollectionName);
        Map<String, Object> props = makeMap(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, tempSourceCollectionName);
        try {
            ocmh.commandMap.get(DELETE).call(zkStateReader.getClusterState(), new ZkNodeProps(props), results);
            clusterState = zkStateReader.getClusterState();
        } catch (Exception e) {
            log.warn("Unable to clean up existing temporary collection: " + tempSourceCollectionName, e);
        }
    }
    CompositeIdRouter sourceRouter = (CompositeIdRouter) sourceCollection.getRouter();
    DocRouter.Range keyHashRange = sourceRouter.keyHashRange(splitKey);
    ShardHandlerFactory shardHandlerFactory = ocmh.shardHandlerFactory;
    ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
    log.info("Hash range for split.key: {} is: {}", splitKey, keyHashRange);
    // intersect source range, keyHashRange and target range
    // this is the range that has to be split from source and transferred to target
    DocRouter.Range splitRange = ocmh.intersect(targetSlice.getRange(), ocmh.intersect(sourceSlice.getRange(), keyHashRange));
    if (splitRange == null) {
        log.info("No common hashes between source shard: {} and target shard: {}", sourceSlice.getName(), targetSlice.getName());
        return;
    }
    log.info("Common hash range between source shard: {} and target shard: {} = " + splitRange, sourceSlice.getName(), targetSlice.getName());
    Replica targetLeader = zkStateReader.getLeaderRetry(targetCollection.getName(), targetSlice.getName(), 10000);
    // For tracking async calls.
    Map<String, String> requestMap = new HashMap<>();
    log.info("Asking target leader node: " + targetLeader.getNodeName() + " core: " + targetLeader.getStr("core") + " to buffer updates");
    ModifiableSolrParams params = new ModifiableSolrParams();
    params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTBUFFERUPDATES.toString());
    params.set(CoreAdminParams.NAME, targetLeader.getStr("core"));
    ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to request node to buffer updates", asyncId, requestMap);
    ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.ADDROUTINGRULE.toLower(), COLLECTION_PROP, sourceCollection.getName(), SHARD_ID_PROP, sourceSlice.getName(), "routeKey", SolrIndexSplitter.getRouteKey(splitKey) + "!", "range", splitRange.toString(), "targetCollection", targetCollection.getName(), "expireAt", RoutingRule.makeExpiryAt(timeout));
    log.info("Adding routing rule: " + m);
    Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
    // wait for a while until we see the new rule
    log.info("Waiting to see routing rule updated in clusterstate");
    TimeOut waitUntil = new TimeOut(60, TimeUnit.SECONDS);
    boolean added = false;
    while (!waitUntil.hasTimedOut()) {
        Thread.sleep(100);
        sourceCollection = zkStateReader.getClusterState().getCollection(sourceCollection.getName());
        sourceSlice = sourceCollection.getSlice(sourceSlice.getName());
        Map<String, RoutingRule> rules = sourceSlice.getRoutingRules();
        if (rules != null) {
            RoutingRule rule = rules.get(SolrIndexSplitter.getRouteKey(splitKey) + "!");
            if (rule != null && rule.getRouteRanges().contains(splitRange)) {
                added = true;
                break;
            }
        }
    }
    if (!added) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not add routing rule: " + m);
    }
    log.info("Routing rule added successfully");
    // Create temp core on source shard
    Replica sourceLeader = zkStateReader.getLeaderRetry(sourceCollection.getName(), sourceSlice.getName(), 10000);
    // create a temporary collection with just one node on the shard leader
    String configName = zkStateReader.readConfigName(sourceCollection.getName());
    Map<String, Object> props = makeMap(Overseer.QUEUE_OPERATION, CREATE.toLower(), NAME, tempSourceCollectionName, NRT_REPLICAS, 1, NUM_SLICES, 1, COLL_CONF, configName, CREATE_NODE_SET, sourceLeader.getNodeName());
    if (asyncId != null) {
        String internalAsyncId = asyncId + Math.abs(System.nanoTime());
        props.put(ASYNC, internalAsyncId);
    }
    log.info("Creating temporary collection: " + props);
    ocmh.commandMap.get(CREATE).call(clusterState, new ZkNodeProps(props), results);
    // refresh cluster state
    clusterState = zkStateReader.getClusterState();
    Slice tempSourceSlice = clusterState.getCollection(tempSourceCollectionName).getSlices().iterator().next();
    Replica tempSourceLeader = zkStateReader.getLeaderRetry(tempSourceCollectionName, tempSourceSlice.getName(), 120000);
    String tempCollectionReplica1 = Assign.buildCoreName(tempSourceCollectionName, tempSourceSlice.getName(), Replica.Type.NRT, 1);
    String coreNodeName = ocmh.waitForCoreNodeName(tempSourceCollectionName, sourceLeader.getNodeName(), tempCollectionReplica1);
    // wait for the replicas to be seen as active on temp source leader
    log.info("Asking source leader to wait for: " + tempCollectionReplica1 + " to be alive on: " + sourceLeader.getNodeName());
    CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
    cmd.setCoreName(tempCollectionReplica1);
    cmd.setNodeName(sourceLeader.getNodeName());
    cmd.setCoreNodeName(coreNodeName);
    cmd.setState(Replica.State.ACTIVE);
    cmd.setCheckLive(true);
    cmd.setOnlyIfLeader(true);
    // we don't want this to happen asynchronously
    ocmh.sendShardRequest(tempSourceLeader.getNodeName(), new ModifiableSolrParams(cmd.getParams()), shardHandler, null, null);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create temp collection leader" + " or timed out waiting for it to come up", asyncId, requestMap);
    log.info("Asking source leader to split index");
    params = new ModifiableSolrParams();
    params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.SPLIT.toString());
    params.set(CoreAdminParams.CORE, sourceLeader.getStr("core"));
    params.add(CoreAdminParams.TARGET_CORE, tempSourceLeader.getStr("core"));
    params.set(CoreAdminParams.RANGES, splitRange.toString());
    params.set("split.key", splitKey);
    String tempNodeName = sourceLeader.getNodeName();
    ocmh.sendShardRequest(tempNodeName, params, shardHandler, asyncId, requestMap);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to invoke SPLIT core admin command", asyncId, requestMap);
    log.info("Creating a replica of temporary collection: {} on the target leader node: {}", tempSourceCollectionName, targetLeader.getNodeName());
    String tempCollectionReplica2 = Assign.buildCoreName(tempSourceCollectionName, tempSourceSlice.getName(), Replica.Type.NRT, 2);
    props = new HashMap<>();
    props.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
    props.put(COLLECTION_PROP, tempSourceCollectionName);
    props.put(SHARD_ID_PROP, tempSourceSlice.getName());
    props.put("node", targetLeader.getNodeName());
    props.put(CoreAdminParams.NAME, tempCollectionReplica2);
    // copy over property params:
    for (String key : message.keySet()) {
        if (key.startsWith(COLL_PROP_PREFIX)) {
            props.put(key, message.getStr(key));
        }
    }
    // add async param
    if (asyncId != null) {
        props.put(ASYNC, asyncId);
    }
    ((AddReplicaCmd) ocmh.commandMap.get(ADDREPLICA)).addReplica(clusterState, new ZkNodeProps(props), results, null);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create replica of " + "temporary collection in target leader node.", asyncId, requestMap);
    coreNodeName = ocmh.waitForCoreNodeName(tempSourceCollectionName, targetLeader.getNodeName(), tempCollectionReplica2);
    // wait for the replicas to be seen as active on temp source leader
    log.info("Asking temp source leader to wait for: " + tempCollectionReplica2 + " to be alive on: " + targetLeader.getNodeName());
    cmd = new CoreAdminRequest.WaitForState();
    cmd.setCoreName(tempSourceLeader.getStr("core"));
    cmd.setNodeName(targetLeader.getNodeName());
    cmd.setCoreNodeName(coreNodeName);
    cmd.setState(Replica.State.ACTIVE);
    cmd.setCheckLive(true);
    cmd.setOnlyIfLeader(true);
    params = new ModifiableSolrParams(cmd.getParams());
    ocmh.sendShardRequest(tempSourceLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create temp collection" + " replica or timed out waiting for them to come up", asyncId, requestMap);
    log.info("Successfully created replica of temp source collection on target leader node");
    log.info("Requesting merge of temp source collection replica to target leader");
    params = new ModifiableSolrParams();
    params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.MERGEINDEXES.toString());
    params.set(CoreAdminParams.CORE, targetLeader.getStr("core"));
    params.set(CoreAdminParams.SRC_CORE, tempCollectionReplica2);
    ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
    String msg = "MIGRATE failed to merge " + tempCollectionReplica2 + " to " + targetLeader.getStr("core") + " on node: " + targetLeader.getNodeName();
    ocmh.processResponses(results, shardHandler, true, msg, asyncId, requestMap);
    log.info("Asking target leader to apply buffered updates");
    params = new ModifiableSolrParams();
    params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
    params.set(CoreAdminParams.NAME, targetLeader.getStr("core"));
    ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to request node to apply buffered updates", asyncId, requestMap);
    try {
        log.info("Deleting temporary collection: " + tempSourceCollectionName);
        props = makeMap(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, tempSourceCollectionName);
        ocmh.commandMap.get(DELETE).call(zkStateReader.getClusterState(), new ZkNodeProps(props), results);
    } catch (Exception e) {
        log.error("Unable to delete temporary collection: " + tempSourceCollectionName + ". Please remove it manually", e);
    }
}
Also used : HashMap(java.util.HashMap) TimeOut(org.apache.solr.util.TimeOut) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) CoreAdminRequest(org.apache.solr.client.solrj.request.CoreAdminRequest) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) DocRouter(org.apache.solr.common.cloud.DocRouter) RoutingRule(org.apache.solr.common.cloud.RoutingRule) SolrException(org.apache.solr.common.SolrException) ShardHandlerFactory(org.apache.solr.handler.component.ShardHandlerFactory) ShardHandler(org.apache.solr.handler.component.ShardHandler) Replica(org.apache.solr.common.cloud.Replica) SolrException(org.apache.solr.common.SolrException) CompositeIdRouter(org.apache.solr.common.cloud.CompositeIdRouter) Slice(org.apache.solr.common.cloud.Slice)

Example 7 with ShardHandler

use of org.apache.solr.handler.component.ShardHandler in project lucene-solr by apache.

the class OverseerCollectionMessageHandler method processRebalanceLeaders.

@SuppressWarnings("unchecked")
private void processRebalanceLeaders(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
    checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, CORE_NAME_PROP, ELECTION_NODE_PROP, CORE_NODE_NAME_PROP, BASE_URL_PROP, REJOIN_AT_HEAD_PROP);
    ModifiableSolrParams params = new ModifiableSolrParams();
    params.set(COLLECTION_PROP, message.getStr(COLLECTION_PROP));
    params.set(SHARD_ID_PROP, message.getStr(SHARD_ID_PROP));
    params.set(REJOIN_AT_HEAD_PROP, message.getStr(REJOIN_AT_HEAD_PROP));
    params.set(CoreAdminParams.ACTION, CoreAdminAction.REJOINLEADERELECTION.toString());
    params.set(CORE_NAME_PROP, message.getStr(CORE_NAME_PROP));
    params.set(CORE_NODE_NAME_PROP, message.getStr(CORE_NODE_NAME_PROP));
    params.set(ELECTION_NODE_PROP, message.getStr(ELECTION_NODE_PROP));
    params.set(BASE_URL_PROP, message.getStr(BASE_URL_PROP));
    String baseUrl = message.getStr(BASE_URL_PROP);
    ShardRequest sreq = new ShardRequest();
    sreq.nodeName = message.getStr(ZkStateReader.CORE_NAME_PROP);
    // yes, they must use same admin handler path everywhere...
    params.set("qt", adminPath);
    sreq.purpose = ShardRequest.PURPOSE_PRIVATE;
    sreq.shards = new String[] { baseUrl };
    sreq.actualShards = sreq.shards;
    sreq.params = params;
    ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
    shardHandler.submit(sreq, baseUrl, sreq.params);
}
Also used : ShardRequest(org.apache.solr.handler.component.ShardRequest) ShardHandler(org.apache.solr.handler.component.ShardHandler) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams)

Example 8 with ShardHandler

use of org.apache.solr.handler.component.ShardHandler in project lucene-solr by apache.

the class DeleteReplicaCmd method deleteCore.

void deleteCore(Slice slice, String collectionName, String replicaName, ZkNodeProps message, String shard, NamedList results, Runnable onComplete, boolean parallel) throws KeeperException, InterruptedException {
    Replica replica = slice.getReplica(replicaName);
    if (replica == null) {
        ArrayList<String> l = new ArrayList<>();
        for (Replica r : slice.getReplicas()) l.add(r.getName());
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid replica : " + replicaName + " in shard/collection : " + shard + "/" + collectionName + " available replicas are " + StrUtils.join(l, ','));
    }
    // on the command.
    if (Boolean.parseBoolean(message.getStr(ONLY_IF_DOWN)) && replica.getState() != Replica.State.DOWN) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Attempted to remove replica : " + collectionName + "/" + shard + "/" + replicaName + " with onlyIfDown='true', but state is '" + replica.getStr(ZkStateReader.STATE_PROP) + "'");
    }
    ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
    String core = replica.getStr(ZkStateReader.CORE_NAME_PROP);
    String asyncId = message.getStr(ASYNC);
    AtomicReference<Map<String, String>> requestMap = new AtomicReference<>(null);
    if (asyncId != null) {
        requestMap.set(new HashMap<>(1, 1.0f));
    }
    ModifiableSolrParams params = new ModifiableSolrParams();
    params.add(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.UNLOAD.toString());
    params.add(CoreAdminParams.CORE, core);
    params.set(CoreAdminParams.DELETE_INDEX, message.getBool(CoreAdminParams.DELETE_INDEX, true));
    params.set(CoreAdminParams.DELETE_INSTANCE_DIR, message.getBool(CoreAdminParams.DELETE_INSTANCE_DIR, true));
    params.set(CoreAdminParams.DELETE_DATA_DIR, message.getBool(CoreAdminParams.DELETE_DATA_DIR, true));
    boolean isLive = ocmh.zkStateReader.getClusterState().getLiveNodes().contains(replica.getNodeName());
    if (isLive) {
        ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap.get());
    }
    Callable<Boolean> callable = () -> {
        try {
            if (isLive) {
                ocmh.processResponses(results, shardHandler, false, null, asyncId, requestMap.get());
                //check if the core unload removed the corenode zk entry
                if (ocmh.waitForCoreNodeGone(collectionName, shard, replicaName, 5000))
                    return Boolean.TRUE;
            }
            // try and ensure core info is removed from cluster state
            ocmh.deleteCoreNode(collectionName, replicaName, replica, core);
            if (ocmh.waitForCoreNodeGone(collectionName, shard, replicaName, 30000))
                return Boolean.TRUE;
            return Boolean.FALSE;
        } catch (Exception e) {
            results.add("failure", "Could not complete delete " + e.getMessage());
            throw e;
        } finally {
            if (onComplete != null)
                onComplete.run();
        }
    };
    if (!parallel) {
        try {
            if (!callable.call())
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not  remove replica : " + collectionName + "/" + shard + "/" + replicaName);
        } catch (InterruptedException | KeeperException e) {
            throw e;
        } catch (Exception ex) {
            throw new SolrException(SolrException.ErrorCode.UNKNOWN, "Error waiting for corenode gone", ex);
        }
    } else {
        ocmh.tpe.submit(callable);
    }
}
Also used : ArrayList(java.util.ArrayList) AtomicReference(java.util.concurrent.atomic.AtomicReference) ShardHandler(org.apache.solr.handler.component.ShardHandler) Replica(org.apache.solr.common.cloud.Replica) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException) HashMap(java.util.HashMap) Map(java.util.Map) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException)

Example 9 with ShardHandler

use of org.apache.solr.handler.component.ShardHandler in project lucene-solr by apache.

the class CreateShardCmd method call.

@Override
public void call(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
    String collectionName = message.getStr(COLLECTION_PROP);
    String sliceName = message.getStr(SHARD_ID_PROP);
    log.info("Create shard invoked: {}", message);
    if (collectionName == null || sliceName == null)
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'collection' and 'shard' are required parameters");
    int numSlices = 1;
    ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
    DocCollection collection = clusterState.getCollection(collectionName);
    //    int repFactor = message.getInt(REPLICATION_FACTOR, collection.getInt(REPLICATION_FACTOR, 1));
    int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, collection.getInt(NRT_REPLICAS, collection.getInt(REPLICATION_FACTOR, 1))));
    int numPullReplicas = message.getInt(PULL_REPLICAS, collection.getInt(PULL_REPLICAS, 0));
    int numTlogReplicas = message.getInt(TLOG_REPLICAS, collection.getInt(TLOG_REPLICAS, 0));
    int totalReplicas = numNrtReplicas + numPullReplicas + numTlogReplicas;
    if (numNrtReplicas + numTlogReplicas <= 0) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NRT_REPLICAS + " + " + TLOG_REPLICAS + " must be greater than 0");
    }
    Object createNodeSetStr = message.get(OverseerCollectionMessageHandler.CREATE_NODE_SET);
    List<Assign.ReplicaCount> sortedNodeList = getNodesForNewReplicas(clusterState, collectionName, sliceName, totalReplicas, createNodeSetStr, ocmh.overseer.getZkController().getCoreContainer());
    ZkStateReader zkStateReader = ocmh.zkStateReader;
    Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message));
    // wait for a while until we see the shard
    TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
    boolean created = false;
    while (!timeout.hasTimedOut()) {
        Thread.sleep(100);
        created = zkStateReader.getClusterState().getCollection(collectionName).getSlice(sliceName) != null;
        if (created)
            break;
    }
    if (!created)
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not fully create shard: " + message.getStr(NAME));
    String configName = message.getStr(COLL_CONF);
    String async = message.getStr(ASYNC);
    Map<String, String> requestMap = null;
    if (async != null) {
        requestMap = new HashMap<>(totalReplicas, 1.0f);
    }
    int createdNrtReplicas = 0, createdTlogReplicas = 0, createdPullReplicas = 0;
    for (int j = 1; j <= totalReplicas; j++) {
        int coreNameNumber;
        Replica.Type typeToCreate;
        if (createdNrtReplicas < numNrtReplicas) {
            createdNrtReplicas++;
            coreNameNumber = createdNrtReplicas;
            typeToCreate = Replica.Type.NRT;
        } else if (createdTlogReplicas < numTlogReplicas) {
            createdTlogReplicas++;
            coreNameNumber = createdTlogReplicas;
            typeToCreate = Replica.Type.TLOG;
        } else {
            createdPullReplicas++;
            coreNameNumber = createdPullReplicas;
            typeToCreate = Replica.Type.PULL;
        }
        String nodeName = sortedNodeList.get(((j - 1)) % sortedNodeList.size()).nodeName;
        String coreName = Assign.buildCoreName(collectionName, sliceName, typeToCreate, coreNameNumber);
        //      String coreName = collectionName + "_" + sliceName + "_replica" + j;
        log.info("Creating replica " + coreName + " as part of slice " + sliceName + " of collection " + collectionName + " on " + nodeName);
        // Need to create new params for each request
        ModifiableSolrParams params = new ModifiableSolrParams();
        params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.CREATE.toString());
        params.set(CoreAdminParams.NAME, coreName);
        params.set(CoreAdminParams.REPLICA_TYPE, typeToCreate.name());
        params.set(COLL_CONF, configName);
        params.set(CoreAdminParams.COLLECTION, collectionName);
        params.set(CoreAdminParams.SHARD, sliceName);
        params.set(ZkStateReader.NUM_SHARDS_PROP, numSlices);
        ocmh.addPropertyParams(message, params);
        ocmh.sendShardRequest(nodeName, params, shardHandler, async, requestMap);
    }
    ocmh.processResponses(results, shardHandler, true, "Failed to create shard", async, requestMap, Collections.emptySet());
    log.info("Finished create command on all shards for collection: " + collectionName);
}
Also used : TimeOut(org.apache.solr.util.TimeOut) ShardHandler(org.apache.solr.handler.component.ShardHandler) Replica(org.apache.solr.common.cloud.Replica) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrException(org.apache.solr.common.SolrException)

Example 10 with ShardHandler

use of org.apache.solr.handler.component.ShardHandler in project lucene-solr by apache.

the class CreateSnapshotCmd method call.

@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
    String collectionName = message.getStr(COLLECTION_PROP);
    String commitName = message.getStr(CoreAdminParams.COMMIT_NAME);
    String asyncId = message.getStr(ASYNC);
    SolrZkClient zkClient = this.ocmh.overseer.getZkController().getZkClient();
    Date creationDate = new Date();
    if (SolrSnapshotManager.snapshotExists(zkClient, collectionName, commitName)) {
        throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " already exists for collection " + collectionName);
    }
    log.info("Creating a snapshot for collection={} with commitName={}", collectionName, commitName);
    // Create a node in ZK to store the collection level snapshot meta-data.
    SolrSnapshotManager.createCollectionLevelSnapshot(zkClient, collectionName, new CollectionSnapshotMetaData(commitName));
    log.info("Created a ZK path to store snapshot information for collection={} with commitName={}", collectionName, commitName);
    Map<String, String> requestMap = new HashMap<>();
    NamedList shardRequestResults = new NamedList();
    Map<String, Slice> shardByCoreName = new HashMap<>();
    ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
    for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
        for (Replica replica : slice.getReplicas()) {
            if (replica.getState() != State.ACTIVE) {
                log.info("Replica {} is not active. Hence not sending the createsnapshot request", replica.getCoreName());
                // Since replica is not active - no point sending a request.
                continue;
            }
            String coreName = replica.getStr(CORE_NAME_PROP);
            ModifiableSolrParams params = new ModifiableSolrParams();
            params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATESNAPSHOT.toString());
            params.set(NAME, slice.getName());
            params.set(CORE_NAME_PROP, coreName);
            params.set(CoreAdminParams.COMMIT_NAME, commitName);
            ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
            log.debug("Sent createsnapshot request to core={} with commitName={}", coreName, commitName);
            shardByCoreName.put(coreName, slice);
        }
    }
    // At this point we want to make sure that at-least one replica for every shard
    // is able to create the snapshot. If that is not the case, then we fail the request.
    // This is to take care of the situation where e.g. entire shard is unavailable.
    Set<String> failedShards = new HashSet<>();
    ocmh.processResponses(shardRequestResults, shardHandler, false, null, asyncId, requestMap);
    NamedList success = (NamedList) shardRequestResults.get("success");
    List<CoreSnapshotMetaData> replicas = new ArrayList<>();
    if (success != null) {
        for (int i = 0; i < success.size(); i++) {
            NamedList resp = (NamedList) success.getVal(i);
            // Check if this core is the leader for the shard. The idea here is that during the backup
            // operation we preferably use the snapshot of the "leader" replica since it is most likely
            // to have latest state.
            String coreName = (String) resp.get(CoreAdminParams.CORE);
            Slice slice = shardByCoreName.remove(coreName);
            boolean leader = (slice.getLeader() != null && slice.getLeader().getCoreName().equals(coreName));
            resp.add(SolrSnapshotManager.SHARD_ID, slice.getName());
            resp.add(SolrSnapshotManager.LEADER, leader);
            CoreSnapshotMetaData c = new CoreSnapshotMetaData(resp);
            replicas.add(c);
            log.info("Snapshot with commitName {} is created successfully for core {}", commitName, c.getCoreName());
        }
    }
    if (!shardByCoreName.isEmpty()) {
        // One or more failures.
        log.warn("Unable to create a snapshot with name {} for following cores {}", commitName, shardByCoreName.keySet());
        // Count number of failures per shard.
        Map<String, Integer> failuresByShardId = new HashMap<>();
        for (Map.Entry<String, Slice> entry : shardByCoreName.entrySet()) {
            int f = 0;
            if (failuresByShardId.get(entry.getValue().getName()) != null) {
                f = failuresByShardId.get(entry.getValue().getName());
            }
            failuresByShardId.put(entry.getValue().getName(), f + 1);
        }
        // Now that we know number of failures per shard, we can figure out
        // if at-least one replica per shard was able to create a snapshot or not.
        DocCollection collectionStatus = ocmh.zkStateReader.getClusterState().getCollection(collectionName);
        for (Map.Entry<String, Integer> entry : failuresByShardId.entrySet()) {
            int replicaCount = collectionStatus.getSlice(entry.getKey()).getReplicas().size();
            if (replicaCount <= entry.getValue()) {
                failedShards.add(entry.getKey());
            }
        }
    }
    if (failedShards.isEmpty()) {
        // No failures.
        CollectionSnapshotMetaData meta = new CollectionSnapshotMetaData(commitName, SnapshotStatus.Successful, creationDate, replicas);
        SolrSnapshotManager.updateCollectionLevelSnapshot(zkClient, collectionName, meta);
        log.info("Saved following snapshot information for collection={} with commitName={} in Zookeeper : {}", collectionName, commitName, meta.toNamedList());
    } else {
        log.warn("Failed to create a snapshot for collection {} with commitName = {}. Snapshot could not be captured for following shards {}", collectionName, commitName, failedShards);
        // Update the ZK meta-data to include only cores with the snapshot. This will enable users to figure out
        // which cores have the named snapshot.
        CollectionSnapshotMetaData meta = new CollectionSnapshotMetaData(commitName, SnapshotStatus.Failed, creationDate, replicas);
        SolrSnapshotManager.updateCollectionLevelSnapshot(zkClient, collectionName, meta);
        log.info("Saved following snapshot information for collection={} with commitName={} in Zookeeper : {}", collectionName, commitName, meta.toNamedList());
        throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to create snapshot on shards " + failedShards);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrException(org.apache.solr.common.SolrException) HashSet(java.util.HashSet) NamedList(org.apache.solr.common.util.NamedList) ShardHandler(org.apache.solr.handler.component.ShardHandler) SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) Replica(org.apache.solr.common.cloud.Replica) Date(java.util.Date) CollectionSnapshotMetaData(org.apache.solr.core.snapshots.CollectionSnapshotMetaData) Slice(org.apache.solr.common.cloud.Slice) CoreSnapshotMetaData(org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ShardHandler (org.apache.solr.handler.component.ShardHandler)17 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)16 SolrException (org.apache.solr.common.SolrException)12 Replica (org.apache.solr.common.cloud.Replica)11 HashMap (java.util.HashMap)9 Slice (org.apache.solr.common.cloud.Slice)9 DocCollection (org.apache.solr.common.cloud.DocCollection)7 ArrayList (java.util.ArrayList)6 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)6 NamedList (org.apache.solr.common.util.NamedList)6 Map (java.util.Map)5 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)5 ShardRequest (org.apache.solr.handler.component.ShardRequest)5 ClusterState (org.apache.solr.common.cloud.ClusterState)3 SolrZkClient (org.apache.solr.common.cloud.SolrZkClient)3 CollectionSnapshotMetaData (org.apache.solr.core.snapshots.CollectionSnapshotMetaData)3 ShardResponse (org.apache.solr.handler.component.ShardResponse)3 TimeOut (org.apache.solr.util.TimeOut)3 Collection (java.util.Collection)2 HashSet (java.util.HashSet)2