Examples with ZkNodeProps - org.apache.solr.common.cloud.ZkNodeProps

Example 81 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class CdcrRequestHandler method handleCollectionCheckpointAction.

/**
   * This action is generally executed on the target cluster in order to retrieve the latest update checkpoint.
   * This checkpoint is used on the source cluster to setup the
   * {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader} of a shard leader. <br/>
   * This method will execute in parallel one
   * {@link org.apache.solr.handler.CdcrParams.CdcrAction#SHARDCHECKPOINT} request per shard leader. It will
   * then pick the lowest version number as checkpoint. Picking the lowest amongst all shards will ensure that we do not
   * pick a checkpoint that is ahead of the source cluster. This can occur when other shard leaders are sending new
   * updates to the target cluster while we are currently instantiating the
   * {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader}.
   * This solution only works in scenarios where the topology of the source and target clusters are identical.
   */
private void handleCollectionCheckpointAction(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
    ZkController zkController = core.getCoreContainer().getZkController();
    try {
        zkController.getZkStateReader().forceUpdateCollection(collection);
    } catch (Exception e) {
        log.warn("Error when updating cluster state", e);
    }
    ClusterState cstate = zkController.getClusterState();
    Collection<Slice> shards = cstate.getActiveSlices(collection);
    ExecutorService parallelExecutor = ExecutorUtil.newMDCAwareCachedThreadPool(new DefaultSolrThreadFactory("parallelCdcrExecutor"));
    long checkpoint = Long.MAX_VALUE;
    try {
        List<Callable<Long>> callables = new ArrayList<>();
        for (Slice shard : shards) {
            ZkNodeProps leaderProps = zkController.getZkStateReader().getLeaderRetry(collection, shard.getName());
            ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(leaderProps);
            callables.add(new SliceCheckpointCallable(nodeProps.getCoreUrl(), path));
        }
        for (final Future<Long> future : parallelExecutor.invokeAll(callables)) {
            long version = future.get();
            if (version < checkpoint) {
                // we must take the lowest checkpoint from all the shards
                checkpoint = version;
            }
        }
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
    } catch (ExecutionException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
    } finally {
        parallelExecutor.shutdown();
    }
    rsp.add(CdcrParams.CHECKPOINT, checkpoint);
}

Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) SolrServerException(org.apache.solr.client.solrj.SolrServerException) SolrException(org.apache.solr.common.SolrException) CancellationException(java.util.concurrent.CancellationException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Callable(java.util.concurrent.Callable) ZkController(org.apache.solr.cloud.ZkController) Slice(org.apache.solr.common.cloud.Slice) ExecutorService(java.util.concurrent.ExecutorService) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) ExecutionException(java.util.concurrent.ExecutionException) SolrException(org.apache.solr.common.SolrException)

Example 82 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class RestoreCmd method call.

@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
    // TODO maybe we can inherit createCollection's options/code
    String restoreCollectionName = message.getStr(COLLECTION_PROP);
    // of backup
    String backupName = message.getStr(NAME);
    ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
    String asyncId = message.getStr(ASYNC);
    String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
    Map<String, String> requestMap = new HashMap<>();
    CoreContainer cc = ocmh.overseer.getZkController().getCoreContainer();
    BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo));
    URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION));
    URI backupPath = repository.resolve(location, backupName);
    ZkStateReader zkStateReader = ocmh.zkStateReader;
    BackupManager backupMgr = new BackupManager(repository, zkStateReader);
    Properties properties = backupMgr.readBackupProperties(location, backupName);
    String backupCollection = properties.getProperty(BackupManager.COLLECTION_NAME_PROP);
    DocCollection backupCollectionState = backupMgr.readCollectionState(location, backupName, backupCollection);
    // Get the Solr nodes to restore a collection.
    final List<String> nodeList = OverseerCollectionMessageHandler.getLiveOrLiveAndCreateNodeSetList(zkStateReader.getClusterState().getLiveNodes(), message, RANDOM);
    int numShards = backupCollectionState.getActiveSlices().size();
    int numNrtReplicas = getInt(message, NRT_REPLICAS, backupCollectionState.getNumNrtReplicas(), 0);
    if (numNrtReplicas == 0) {
        numNrtReplicas = getInt(message, REPLICATION_FACTOR, backupCollectionState.getReplicationFactor(), 0);
    }
    int numTlogReplicas = getInt(message, TLOG_REPLICAS, backupCollectionState.getNumTlogReplicas(), 0);
    int numPullReplicas = getInt(message, PULL_REPLICAS, backupCollectionState.getNumPullReplicas(), 0);
    int totalReplicasPerShard = numNrtReplicas + numTlogReplicas + numPullReplicas;
    int maxShardsPerNode = message.getInt(MAX_SHARDS_PER_NODE, backupCollectionState.getMaxShardsPerNode());
    int availableNodeCount = nodeList.size();
    if ((numShards * totalReplicasPerShard) > (availableNodeCount * maxShardsPerNode)) {
        throw new SolrException(ErrorCode.BAD_REQUEST, String.format(Locale.ROOT, "Solr cloud with available number of nodes:%d is insufficient for" + " restoring a collection with %d shards, total replicas per shard %d and maxShardsPerNode %d." + " Consider increasing maxShardsPerNode value OR number of available nodes.", availableNodeCount, numShards, totalReplicasPerShard, maxShardsPerNode));
    }
    //Upload the configs
    String configName = (String) properties.get(COLL_CONF);
    String restoreConfigName = message.getStr(COLL_CONF, configName);
    if (zkStateReader.getConfigManager().configExists(restoreConfigName)) {
        log.info("Using existing config {}", restoreConfigName);
    //TODO add overwrite option?
    } else {
        log.info("Uploading config {}", restoreConfigName);
        backupMgr.uploadConfigDir(location, backupName, configName, restoreConfigName);
    }
    log.info("Starting restore into collection={} with backup_name={} at location={}", restoreCollectionName, backupName, location);
    //Create core-less collection
    {
        Map<String, Object> propMap = new HashMap<>();
        propMap.put(Overseer.QUEUE_OPERATION, CREATE.toString());
        // mostly true.  Prevents autoCreated=true in the collection state.
        propMap.put("fromApi", "true");
        // inherit settings from input API, defaulting to the backup's setting.  Ex: replicationFactor
        for (String collProp : COLL_PROPS.keySet()) {
            Object val = message.getProperties().getOrDefault(collProp, backupCollectionState.get(collProp));
            if (val != null) {
                propMap.put(collProp, val);
            }
        }
        propMap.put(NAME, restoreCollectionName);
        //no cores
        propMap.put(CREATE_NODE_SET, CREATE_NODE_SET_EMPTY);
        propMap.put(COLL_CONF, restoreConfigName);
        // router.*
        @SuppressWarnings("unchecked") Map<String, Object> routerProps = (Map<String, Object>) backupCollectionState.getProperties().get(DocCollection.DOC_ROUTER);
        for (Map.Entry<String, Object> pair : routerProps.entrySet()) {
            propMap.put(DocCollection.DOC_ROUTER + "." + pair.getKey(), pair.getValue());
        }
        Set<String> sliceNames = backupCollectionState.getActiveSlicesMap().keySet();
        if (backupCollectionState.getRouter() instanceof ImplicitDocRouter) {
            propMap.put(SHARDS_PROP, StrUtils.join(sliceNames, ','));
        } else {
            propMap.put(NUM_SLICES, sliceNames.size());
            // ClusterStateMutator.createCollection detects that "slices" is in fact a slice structure instead of a
            //   list of names, and if so uses this instead of building it.  We clear the replica list.
            Collection<Slice> backupSlices = backupCollectionState.getActiveSlices();
            Map<String, Slice> newSlices = new LinkedHashMap<>(backupSlices.size());
            for (Slice backupSlice : backupSlices) {
                newSlices.put(backupSlice.getName(), new Slice(backupSlice.getName(), Collections.emptyMap(), backupSlice.getProperties()));
            }
            propMap.put(SHARDS_PROP, newSlices);
        }
        ocmh.commandMap.get(CREATE).call(zkStateReader.getClusterState(), new ZkNodeProps(propMap), new NamedList());
    // note: when createCollection() returns, the collection exists (no race)
    }
    DocCollection restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
    DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
    //Mark all shards in CONSTRUCTION STATE while we restore the data
    {
        //TODO might instead createCollection accept an initial state?  Is there a race?
        Map<String, Object> propMap = new HashMap<>();
        propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
        for (Slice shard : restoreCollection.getSlices()) {
            propMap.put(shard.getName(), Slice.State.CONSTRUCTION.toString());
        }
        propMap.put(ZkStateReader.COLLECTION_PROP, restoreCollectionName);
        inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
    }
    // TODO how do we leverage the RULE / SNITCH logic in createCollection?
    ClusterState clusterState = zkStateReader.getClusterState();
    List<String> sliceNames = new ArrayList<>();
    restoreCollection.getSlices().forEach(x -> sliceNames.add(x.getName()));
    Map<ReplicaAssigner.Position, String> positionVsNodes = ocmh.identifyNodes(clusterState, nodeList, message, sliceNames, numNrtReplicas, numTlogReplicas, numPullReplicas);
    //Create one replica per shard and copy backed up data to it
    for (Slice slice : restoreCollection.getSlices()) {
        log.debug("Adding replica for shard={} collection={} ", slice.getName(), restoreCollection);
        HashMap<String, Object> propMap = new HashMap<>();
        propMap.put(Overseer.QUEUE_OPERATION, CREATESHARD);
        propMap.put(COLLECTION_PROP, restoreCollectionName);
        propMap.put(SHARD_ID_PROP, slice.getName());
        if (numNrtReplicas >= 1) {
            propMap.put(REPLICA_TYPE, Replica.Type.NRT.name());
        } else if (numTlogReplicas >= 1) {
            propMap.put(REPLICA_TYPE, Replica.Type.TLOG.name());
        } else {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unexpected number of replicas, replicationFactor, " + Replica.Type.NRT + " or " + Replica.Type.TLOG + " must be greater than 0");
        }
        // Get the first node matching the shard to restore in
        String node;
        for (Map.Entry<ReplicaAssigner.Position, String> pvn : positionVsNodes.entrySet()) {
            ReplicaAssigner.Position position = pvn.getKey();
            if (position.shard == slice.getName()) {
                node = pvn.getValue();
                propMap.put(CoreAdminParams.NODE, node);
                positionVsNodes.remove(position);
                break;
            }
        }
        // add async param
        if (asyncId != null) {
            propMap.put(ASYNC, asyncId);
        }
        ocmh.addPropertyParams(message, propMap);
        ocmh.addReplica(clusterState, new ZkNodeProps(propMap), new NamedList(), null);
    }
    //refresh the location copy of collection state
    restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
    //Copy data from backed up index to each replica
    for (Slice slice : restoreCollection.getSlices()) {
        ModifiableSolrParams params = new ModifiableSolrParams();
        params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.RESTORECORE.toString());
        params.set(NAME, "snapshot." + slice.getName());
        params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString());
        params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
        ocmh.sliceCmd(clusterState, params, null, slice, shardHandler, asyncId, requestMap);
    }
    ocmh.processResponses(new NamedList(), shardHandler, true, "Could not restore core", asyncId, requestMap);
    //Mark all shards in ACTIVE STATE
    {
        HashMap<String, Object> propMap = new HashMap<>();
        propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
        propMap.put(ZkStateReader.COLLECTION_PROP, restoreCollectionName);
        for (Slice shard : restoreCollection.getSlices()) {
            propMap.put(shard.getName(), Slice.State.ACTIVE.toString());
        }
        inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
    }
    //refresh the location copy of collection state
    restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
    if (totalReplicasPerShard > 1) {
        log.info("Adding replicas to restored collection={}", restoreCollection);
        for (Slice slice : restoreCollection.getSlices()) {
            //Add the remaining replicas for each shard, considering it's type
            int createdNrtReplicas = 0, createdTlogReplicas = 0, createdPullReplicas = 0;
            // We already created either a NRT or an TLOG replica as leader
            if (numNrtReplicas > 0) {
                createdNrtReplicas++;
            } else if (createdTlogReplicas > 0) {
                createdTlogReplicas++;
            }
            for (int i = 1; i < totalReplicasPerShard; i++) {
                Replica.Type typeToCreate;
                if (createdNrtReplicas < numNrtReplicas) {
                    createdNrtReplicas++;
                    typeToCreate = Replica.Type.NRT;
                } else if (createdTlogReplicas < numTlogReplicas) {
                    createdTlogReplicas++;
                    typeToCreate = Replica.Type.TLOG;
                } else {
                    createdPullReplicas++;
                    typeToCreate = Replica.Type.PULL;
                    assert createdPullReplicas <= numPullReplicas : "Unexpected number of replicas";
                }
                log.debug("Adding replica for shard={} collection={} of type {} ", slice.getName(), restoreCollection, typeToCreate);
                HashMap<String, Object> propMap = new HashMap<>();
                propMap.put(COLLECTION_PROP, restoreCollectionName);
                propMap.put(SHARD_ID_PROP, slice.getName());
                propMap.put(REPLICA_TYPE, typeToCreate.name());
                // Get the first node matching the shard to restore in
                String node;
                for (Map.Entry<ReplicaAssigner.Position, String> pvn : positionVsNodes.entrySet()) {
                    ReplicaAssigner.Position position = pvn.getKey();
                    if (position.shard == slice.getName()) {
                        node = pvn.getValue();
                        propMap.put(CoreAdminParams.NODE, node);
                        positionVsNodes.remove(position);
                        break;
                    }
                }
                // add async param
                if (asyncId != null) {
                    propMap.put(ASYNC, asyncId);
                }
                ocmh.addPropertyParams(message, propMap);
                ocmh.addReplica(zkStateReader.getClusterState(), new ZkNodeProps(propMap), results, null);
            }
        }
    }
    log.info("Completed restoring collection={} backupName={}", restoreCollection, backupName);
}

Also used : Set(java.util.Set) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) Properties(java.util.Properties) URI(java.net.URI) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) BackupRepository(org.apache.solr.core.backup.repository.BackupRepository) CoreContainer(org.apache.solr.core.CoreContainer) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrException(org.apache.solr.common.SolrException) ClusterState(org.apache.solr.common.cloud.ClusterState) ImplicitDocRouter(org.apache.solr.common.cloud.ImplicitDocRouter) NamedList(org.apache.solr.common.util.NamedList) ReplicaAssigner(org.apache.solr.cloud.rule.ReplicaAssigner) ShardHandler(org.apache.solr.handler.component.ShardHandler) BackupManager(org.apache.solr.core.backup.BackupManager) Replica(org.apache.solr.common.cloud.Replica) Slice(org.apache.solr.common.cloud.Slice) DocCollection(org.apache.solr.common.cloud.DocCollection) Collection(java.util.Collection) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 83 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class SplitShardCmd method split.

public boolean split(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
    String collectionName = message.getStr("collection");
    String slice = message.getStr(ZkStateReader.SHARD_ID_PROP);
    log.info("Split shard invoked");
    ZkStateReader zkStateReader = ocmh.zkStateReader;
    zkStateReader.forceUpdateCollection(collectionName);
    String splitKey = message.getStr("split.key");
    ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
    DocCollection collection = clusterState.getCollection(collectionName);
    DocRouter router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
    Slice parentSlice;
    if (slice == null) {
        if (router instanceof CompositeIdRouter) {
            Collection<Slice> searchSlices = router.getSearchSlicesSingle(splitKey, new ModifiableSolrParams(), collection);
            if (searchSlices.isEmpty()) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to find an active shard for split.key: " + splitKey);
            }
            if (searchSlices.size() > 1) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Splitting a split.key: " + splitKey + " which spans multiple shards is not supported");
            }
            parentSlice = searchSlices.iterator().next();
            slice = parentSlice.getName();
            log.info("Split by route.key: {}, parent shard is: {} ", splitKey, slice);
        } else {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Split by route key can only be used with CompositeIdRouter or subclass. Found router: " + router.getClass().getName());
        }
    } else {
        parentSlice = collection.getSlice(slice);
    }
    if (parentSlice == null) {
        // an exception already
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No shard with the specified name exists: " + slice);
    }
    // find the leader for the shard
    Replica parentShardLeader = null;
    try {
        parentShardLeader = zkStateReader.getLeaderRetry(collectionName, slice, 10000);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
    }
    // let's record the ephemeralOwner of the parent leader node
    Stat leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
    if (leaderZnodeStat == null) {
        // we just got to know the leader but its live node is gone already!
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
    }
    DocRouter.Range range = parentSlice.getRange();
    if (range == null) {
        range = new PlainIdRouter().fullRange();
    }
    List<DocRouter.Range> subRanges = null;
    String rangesStr = message.getStr(CoreAdminParams.RANGES);
    if (rangesStr != null) {
        String[] ranges = rangesStr.split(",");
        if (ranges.length == 0 || ranges.length == 1) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "There must be at least two ranges specified to split a shard");
        } else {
            subRanges = new ArrayList<>(ranges.length);
            for (int i = 0; i < ranges.length; i++) {
                String r = ranges[i];
                try {
                    subRanges.add(DocRouter.DEFAULT.fromString(r));
                } catch (Exception e) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Exception in parsing hexadecimal hash range: " + r, e);
                }
                if (!subRanges.get(i).isSubsetOf(range)) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specified hash range: " + r + " is not a subset of parent shard's range: " + range.toString());
                }
            }
            // copy to preserve original order
            List<DocRouter.Range> temp = new ArrayList<>(subRanges);
            Collections.sort(temp);
            if (!range.equals(new DocRouter.Range(temp.get(0).min, temp.get(temp.size() - 1).max))) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specified hash ranges: " + rangesStr + " do not cover the entire range of parent shard: " + range);
            }
            for (int i = 1; i < temp.size(); i++) {
                if (temp.get(i - 1).max + 1 != temp.get(i).min) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specified hash ranges: " + rangesStr + " either overlap with each other or " + "do not cover the entire range of parent shard: " + range);
                }
            }
        }
    } else if (splitKey != null) {
        if (router instanceof CompositeIdRouter) {
            CompositeIdRouter compositeIdRouter = (CompositeIdRouter) router;
            subRanges = compositeIdRouter.partitionRangeByKey(splitKey, range);
            if (subRanges.size() == 1) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The split.key: " + splitKey + " has a hash range that is exactly equal to hash range of shard: " + slice);
            }
            for (DocRouter.Range subRange : subRanges) {
                if (subRange.min == subRange.max) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The split.key: " + splitKey + " must be a compositeId");
                }
            }
            log.info("Partitioning parent shard " + slice + " range: " + parentSlice.getRange() + " yields: " + subRanges);
            rangesStr = "";
            for (int i = 0; i < subRanges.size(); i++) {
                DocRouter.Range subRange = subRanges.get(i);
                rangesStr += subRange.toString();
                if (i < subRanges.size() - 1)
                    rangesStr += ',';
            }
        }
    } else {
        // todo: fixed to two partitions?
        subRanges = router.partitionRange(2, range);
    }
    try {
        List<String> subSlices = new ArrayList<>(subRanges.size());
        List<String> subShardNames = new ArrayList<>(subRanges.size());
        String nodeName = parentShardLeader.getNodeName();
        for (int i = 0; i < subRanges.size(); i++) {
            String subSlice = slice + "_" + i;
            subSlices.add(subSlice);
            String subShardName = Assign.buildCoreName(collectionName, subSlice, Replica.Type.NRT, 1);
            subShardNames.add(subShardName);
        }
        boolean oldShardsDeleted = false;
        for (String subSlice : subSlices) {
            Slice oSlice = collection.getSlice(subSlice);
            if (oSlice != null) {
                final Slice.State state = oSlice.getState();
                if (state == Slice.State.ACTIVE) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Sub-shard: " + subSlice + " exists in active state. Aborting split shard.");
                } else if (state == Slice.State.CONSTRUCTION || state == Slice.State.RECOVERY) {
                    // delete the shards
                    log.info("Sub-shard: {} already exists therefore requesting its deletion", subSlice);
                    Map<String, Object> propMap = new HashMap<>();
                    propMap.put(Overseer.QUEUE_OPERATION, "deleteshard");
                    propMap.put(COLLECTION_PROP, collectionName);
                    propMap.put(SHARD_ID_PROP, subSlice);
                    ZkNodeProps m = new ZkNodeProps(propMap);
                    try {
                        ocmh.commandMap.get(DELETESHARD).call(clusterState, m, new NamedList());
                    } catch (Exception e) {
                        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to delete already existing sub shard: " + subSlice, e);
                    }
                    oldShardsDeleted = true;
                }
            }
        }
        if (oldShardsDeleted) {
            // refresh the locally cached cluster state
            // we know we have the latest because otherwise deleteshard would have failed
            clusterState = zkStateReader.getClusterState();
            collection = clusterState.getCollection(collectionName);
        }
        final String asyncId = message.getStr(ASYNC);
        Map<String, String> requestMap = new HashMap<>();
        for (int i = 0; i < subRanges.size(); i++) {
            String subSlice = subSlices.get(i);
            String subShardName = subShardNames.get(i);
            DocRouter.Range subRange = subRanges.get(i);
            log.info("Creating slice " + subSlice + " of collection " + collectionName + " on " + nodeName);
            Map<String, Object> propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, CREATESHARD.toLower());
            propMap.put(ZkStateReader.SHARD_ID_PROP, subSlice);
            propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
            propMap.put(ZkStateReader.SHARD_RANGE_PROP, subRange.toString());
            propMap.put(ZkStateReader.SHARD_STATE_PROP, Slice.State.CONSTRUCTION.toString());
            propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName());
            propMap.put("shard_parent_node", parentShardLeader.getNodeName());
            propMap.put("shard_parent_zk_session", leaderZnodeStat.getEphemeralOwner());
            DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
            inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
            // wait until we are able to see the new shard in cluster state
            ocmh.waitForNewShard(collectionName, subSlice);
            // refresh cluster state
            clusterState = zkStateReader.getClusterState();
            log.info("Adding replica " + subShardName + " as part of slice " + subSlice + " of collection " + collectionName + " on " + nodeName);
            propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
            propMap.put(COLLECTION_PROP, collectionName);
            propMap.put(SHARD_ID_PROP, subSlice);
            propMap.put("node", nodeName);
            propMap.put(CoreAdminParams.NAME, subShardName);
            // copy over property params:
            for (String key : message.keySet()) {
                if (key.startsWith(COLL_PROP_PREFIX)) {
                    propMap.put(key, message.getStr(key));
                }
            }
            // add async param
            if (asyncId != null) {
                propMap.put(ASYNC, asyncId);
            }
            ocmh.addReplica(clusterState, new ZkNodeProps(propMap), results, null);
        }
        ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to create subshard leaders", asyncId, requestMap);
        for (String subShardName : subShardNames) {
            // wait for parent leader to acknowledge the sub-shard core
            log.info("Asking parent leader to wait for: " + subShardName + " to be alive on: " + nodeName);
            String coreNodeName = ocmh.waitForCoreNodeName(collectionName, nodeName, subShardName);
            CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
            cmd.setCoreName(subShardName);
            cmd.setNodeName(nodeName);
            cmd.setCoreNodeName(coreNodeName);
            cmd.setState(Replica.State.ACTIVE);
            cmd.setCheckLive(true);
            cmd.setOnlyIfLeader(true);
            ModifiableSolrParams p = new ModifiableSolrParams(cmd.getParams());
            ocmh.sendShardRequest(nodeName, p, shardHandler, asyncId, requestMap);
        }
        ocmh.processResponses(results, shardHandler, true, "SPLITSHARD timed out waiting for subshard leaders to come up", asyncId, requestMap);
        log.info("Successfully created all sub-shards for collection " + collectionName + " parent shard: " + slice + " on: " + parentShardLeader);
        log.info("Splitting shard " + parentShardLeader.getName() + " as part of slice " + slice + " of collection " + collectionName + " on " + parentShardLeader);
        ModifiableSolrParams params = new ModifiableSolrParams();
        params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.SPLIT.toString());
        params.set(CoreAdminParams.CORE, parentShardLeader.getStr("core"));
        for (int i = 0; i < subShardNames.size(); i++) {
            String subShardName = subShardNames.get(i);
            params.add(CoreAdminParams.TARGET_CORE, subShardName);
        }
        params.set(CoreAdminParams.RANGES, rangesStr);
        ocmh.sendShardRequest(parentShardLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
        ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to invoke SPLIT core admin command", asyncId, requestMap);
        log.info("Index on shard: " + nodeName + " split into two successfully");
        // apply buffered updates on sub-shards
        for (int i = 0; i < subShardNames.size(); i++) {
            String subShardName = subShardNames.get(i);
            log.info("Applying buffered updates on : " + subShardName);
            params = new ModifiableSolrParams();
            params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
            params.set(CoreAdminParams.NAME, subShardName);
            ocmh.sendShardRequest(nodeName, params, shardHandler, asyncId, requestMap);
        }
        ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed while asking sub shard leaders" + " to apply buffered updates", asyncId, requestMap);
        log.info("Successfully applied buffered updates on : " + subShardNames);
        // Replica creation for the new Slices
        // look at the replication factor and see if it matches reality
        // if it does not, find best nodes to create more cores
        // TODO: Have replication factor decided in some other way instead of numShards for the parent
        int repFactor = parentSlice.getReplicas().size();
        // we need to look at every node and see how many cores it serves
        // add our new cores to existing nodes serving the least number of cores
        // but (for now) require that each core goes on a distinct node.
        // TODO: add smarter options that look at the current number of cores per
        // node?
        // for now we just go random
        Set<String> nodes = clusterState.getLiveNodes();
        List<String> nodeList = new ArrayList<>(nodes.size());
        nodeList.addAll(nodes);
        // TODO: Have maxShardsPerNode param for this operation?
        // Remove the node that hosts the parent shard for replica creation.
        nodeList.remove(nodeName);
        // TODO: change this to handle sharding a slice into > 2 sub-shards.
        Map<ReplicaAssigner.Position, String> nodeMap = ocmh.identifyNodes(clusterState, new ArrayList<>(clusterState.getLiveNodes()), new ZkNodeProps(collection.getProperties()), subSlices, repFactor - 1, 0, 0);
        List<Map<String, Object>> replicas = new ArrayList<>((repFactor - 1) * 2);
        for (Map.Entry<ReplicaAssigner.Position, String> entry : nodeMap.entrySet()) {
            String sliceName = entry.getKey().shard;
            String subShardNodeName = entry.getValue();
            String shardName = collectionName + "_" + sliceName + "_replica" + (entry.getKey().index);
            log.info("Creating replica shard " + shardName + " as part of slice " + sliceName + " of collection " + collectionName + " on " + subShardNodeName);
            ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(), ZkStateReader.COLLECTION_PROP, collectionName, ZkStateReader.SHARD_ID_PROP, sliceName, ZkStateReader.CORE_NAME_PROP, shardName, ZkStateReader.STATE_PROP, Replica.State.DOWN.toString(), ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(subShardNodeName), ZkStateReader.NODE_NAME_PROP, subShardNodeName);
            Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
            HashMap<String, Object> propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
            propMap.put(COLLECTION_PROP, collectionName);
            propMap.put(SHARD_ID_PROP, sliceName);
            propMap.put("node", subShardNodeName);
            propMap.put(CoreAdminParams.NAME, shardName);
            // copy over property params:
            for (String key : message.keySet()) {
                if (key.startsWith(COLL_PROP_PREFIX)) {
                    propMap.put(key, message.getStr(key));
                }
            }
            // add async param
            if (asyncId != null) {
                propMap.put(ASYNC, asyncId);
            }
            // special flag param to instruct addReplica not to create the replica in cluster state again
            propMap.put(SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, "true");
            replicas.add(propMap);
        }
        assert TestInjection.injectSplitFailureBeforeReplicaCreation();
        long ephemeralOwner = leaderZnodeStat.getEphemeralOwner();
        // compare against the ephemeralOwner of the parent leader node
        leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
        if (leaderZnodeStat == null || ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
            // put sub-shards in recovery_failed state
            DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
            Map<String, Object> propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
            for (String subSlice : subSlices) {
                propMap.put(subSlice, Slice.State.RECOVERY_FAILED.toString());
            }
            propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
            ZkNodeProps m = new ZkNodeProps(propMap);
            inQueue.offer(Utils.toJSON(m));
            if (leaderZnodeStat == null) {
                // the leader is not live anymore, fail the split!
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
            } else if (ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
                // there's a new leader, fail the split!
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The zk session id for the shard leader node: " + parentShardLeader.getNodeName() + " has changed from " + ephemeralOwner + " to " + leaderZnodeStat.getEphemeralOwner() + ". This can cause data loss so we must abort the split");
            }
        }
        if (repFactor == 1) {
            // switch sub shard states to 'active'
            log.info("Replication factor is 1 so switching shard states");
            DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
            Map<String, Object> propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
            propMap.put(slice, Slice.State.INACTIVE.toString());
            for (String subSlice : subSlices) {
                propMap.put(subSlice, Slice.State.ACTIVE.toString());
            }
            propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
            ZkNodeProps m = new ZkNodeProps(propMap);
            inQueue.offer(Utils.toJSON(m));
        } else {
            log.info("Requesting shard state be set to 'recovery'");
            DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
            Map<String, Object> propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
            for (String subSlice : subSlices) {
                propMap.put(subSlice, Slice.State.RECOVERY.toString());
            }
            propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
            ZkNodeProps m = new ZkNodeProps(propMap);
            inQueue.offer(Utils.toJSON(m));
        }
        // now actually create replica cores on sub shard nodes
        for (Map<String, Object> replica : replicas) {
            ocmh.addReplica(clusterState, new ZkNodeProps(replica), results, null);
        }
        ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to create subshard replicas", asyncId, requestMap);
        log.info("Successfully created all replica shards for all sub-slices " + subSlices);
        ocmh.commit(results, slice, parentShardLeader);
        return true;
    } catch (SolrException e) {
        throw e;
    } catch (Exception e) {
        log.error("Error executing split operation for collection: " + collectionName + " parent shard: " + slice, e);
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, null, e);
    }
}

Also used : HashMap(java.util.HashMap) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) CoreAdminRequest(org.apache.solr.client.solrj.request.CoreAdminRequest) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Stat(org.apache.zookeeper.data.Stat) DocRouter(org.apache.solr.common.cloud.DocRouter) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrException(org.apache.solr.common.SolrException) NamedList(org.apache.solr.common.util.NamedList) ShardHandler(org.apache.solr.handler.component.ShardHandler) Replica(org.apache.solr.common.cloud.Replica) SolrException(org.apache.solr.common.SolrException) CompositeIdRouter(org.apache.solr.common.cloud.CompositeIdRouter) Slice(org.apache.solr.common.cloud.Slice) PlainIdRouter(org.apache.solr.common.cloud.PlainIdRouter) HashMap(java.util.HashMap) Map(java.util.Map)

Example 84 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class ZkController method publish.

/**
   * Publish core state to overseer.
   */
public void publish(final CoreDescriptor cd, final Replica.State state, boolean updateLastState, boolean forcePublish) throws KeeperException, InterruptedException {
    if (!forcePublish) {
        try (SolrCore core = cc.getCore(cd.getName())) {
            if (core == null || core.isClosed()) {
                return;
            }
            MDCLoggingContext.setCore(core);
        }
    } else {
        MDCLoggingContext.setCoreDescriptor(cc, cd);
    }
    try {
        String collection = cd.getCloudDescriptor().getCollectionName();
        log.debug("publishing state={}", state.toString());
        // System.out.println(Thread.currentThread().getStackTrace()[3]);
        Integer numShards = cd.getCloudDescriptor().getNumShards();
        if (numShards == null) {
            // XXX sys prop hack
            log.debug("numShards not found on descriptor - reading it from system property");
            numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP);
        }
        assert collection != null && collection.length() > 0;
        String shardId = cd.getCloudDescriptor().getShardId();
        String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
        // recovery as requested before becoming active; don't even look at lirState if going down
        if (state != Replica.State.DOWN) {
            final Replica.State lirState = getLeaderInitiatedRecoveryState(collection, shardId, coreNodeName);
            if (lirState != null) {
                assert cd.getCloudDescriptor().getReplicaType() != Replica.Type.PULL : "LIR should not happen for pull replicas!";
                if (state == Replica.State.ACTIVE) {
                    // trying to become active, so leader-initiated state must be recovering
                    if (lirState == Replica.State.RECOVERING) {
                        updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, Replica.State.ACTIVE, cd, true);
                    } else if (lirState == Replica.State.DOWN) {
                        throw new SolrException(ErrorCode.INVALID_STATE, "Cannot publish state of core '" + cd.getName() + "' as active without recovering first!");
                    }
                } else if (state == Replica.State.RECOVERING) {
                    // if it is currently DOWN, then trying to enter into recovering state is good
                    if (lirState == Replica.State.DOWN) {
                        updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, Replica.State.RECOVERING, cd, true);
                    }
                }
            }
        }
        Map<String, Object> props = new HashMap<>();
        props.put(Overseer.QUEUE_OPERATION, "state");
        props.put(ZkStateReader.STATE_PROP, state.toString());
        props.put(ZkStateReader.BASE_URL_PROP, getBaseUrl());
        props.put(ZkStateReader.CORE_NAME_PROP, cd.getName());
        props.put(ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles());
        props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
        props.put(ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId());
        props.put(ZkStateReader.COLLECTION_PROP, collection);
        props.put(ZkStateReader.REPLICA_TYPE, cd.getCloudDescriptor().getReplicaType().toString());
        if (numShards != null) {
            props.put(ZkStateReader.NUM_SHARDS_PROP, numShards.toString());
        }
        if (coreNodeName != null) {
            props.put(ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
        }
        try (SolrCore core = cc.getCore(cd.getName())) {
            if (core != null && core.getDirectoryFactory().isSharedStorage()) {
                if (core != null && core.getDirectoryFactory().isSharedStorage()) {
                    props.put("dataDir", core.getDataDir());
                    UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
                    if (ulog != null) {
                        props.put("ulogDir", ulog.getLogDir());
                    }
                }
            }
        } catch (SolrCoreInitializationException ex) {
            // The core had failed to initialize (in a previous request, not this one), hence nothing to do here.
            log.info("The core '{}' had failed to initialize before.", cd.getName());
        }
        ZkNodeProps m = new ZkNodeProps(props);
        if (updateLastState) {
            cd.getCloudDescriptor().lastPublished = state;
        }
        overseerJobQueue.offer(Utils.toJSON(m));
    } finally {
        MDCLoggingContext.clear();
    }
}

Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) SolrCore(org.apache.solr.core.SolrCore) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) UpdateLog(org.apache.solr.update.UpdateLog) SolrCoreInitializationException(org.apache.solr.core.SolrCoreInitializationException) Replica(org.apache.solr.common.cloud.Replica) SolrException(org.apache.solr.common.SolrException)

Example 85 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class ZkController method publishNodeAsDown.

/**
   * Best effort to set DOWN state for all replicas on node.
   * 
   * @param nodeName to operate on
   */
public void publishNodeAsDown(String nodeName) {
    log.debug("Publish node={} as DOWN", nodeName);
    ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.DOWNNODE.toLower(), ZkStateReader.NODE_NAME_PROP, nodeName);
    try {
        Overseer.getStateUpdateQueue(getZkClient()).offer(Utils.toJSON(m));
    } catch (InterruptedException e) {
        Thread.interrupted();
        log.debug("Publish node as down was interrupted.");
    } catch (Exception e) {
        log.warn("Could not publish node as down: " + e.getMessage());
    }
}

Also used : ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) TimeoutException(java.util.concurrent.TimeoutException) SolrException(org.apache.solr.common.SolrException) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) SessionExpiredException(org.apache.zookeeper.KeeperException.SessionExpiredException) ConnectionLossException(org.apache.zookeeper.KeeperException.ConnectionLossException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) SolrCoreInitializationException(org.apache.solr.core.SolrCoreInitializationException)

Aggregations

ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)91 SolrException (org.apache.solr.common.SolrException)35 HashMap (java.util.HashMap)28 Replica (org.apache.solr.common.cloud.Replica)22 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)20 ArrayList (java.util.ArrayList)19 Slice (org.apache.solr.common.cloud.Slice)19 KeeperException (org.apache.zookeeper.KeeperException)19 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)16 Test (org.junit.Test)16 DocCollection (org.apache.solr.common.cloud.DocCollection)15 SolrZkClient (org.apache.solr.common.cloud.SolrZkClient)14 Map (java.util.Map)13 ClusterState (org.apache.solr.common.cloud.ClusterState)13 IOException (java.io.IOException)10 ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)10 ZooKeeperException (org.apache.solr.common.cloud.ZooKeeperException)10 NamedList (org.apache.solr.common.util.NamedList)10 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)9 SolrCore (org.apache.solr.core.SolrCore)8