use of org.apache.solr.handler.component.ShardHandler in project lucene-solr by apache.
the class RestoreCmd method call.
@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
// TODO maybe we can inherit createCollection's options/code
String restoreCollectionName = message.getStr(COLLECTION_PROP);
// of backup
String backupName = message.getStr(NAME);
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
String asyncId = message.getStr(ASYNC);
String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
Map<String, String> requestMap = new HashMap<>();
CoreContainer cc = ocmh.overseer.getZkController().getCoreContainer();
BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo));
URI location = repository.createURI(message.getStr(CoreAdminParams.BACKUP_LOCATION));
URI backupPath = repository.resolve(location, backupName);
ZkStateReader zkStateReader = ocmh.zkStateReader;
BackupManager backupMgr = new BackupManager(repository, zkStateReader);
Properties properties = backupMgr.readBackupProperties(location, backupName);
String backupCollection = properties.getProperty(BackupManager.COLLECTION_NAME_PROP);
DocCollection backupCollectionState = backupMgr.readCollectionState(location, backupName, backupCollection);
// Get the Solr nodes to restore a collection.
final List<String> nodeList = OverseerCollectionMessageHandler.getLiveOrLiveAndCreateNodeSetList(zkStateReader.getClusterState().getLiveNodes(), message, RANDOM);
int numShards = backupCollectionState.getActiveSlices().size();
int numNrtReplicas = getInt(message, NRT_REPLICAS, backupCollectionState.getNumNrtReplicas(), 0);
if (numNrtReplicas == 0) {
numNrtReplicas = getInt(message, REPLICATION_FACTOR, backupCollectionState.getReplicationFactor(), 0);
}
int numTlogReplicas = getInt(message, TLOG_REPLICAS, backupCollectionState.getNumTlogReplicas(), 0);
int numPullReplicas = getInt(message, PULL_REPLICAS, backupCollectionState.getNumPullReplicas(), 0);
int totalReplicasPerShard = numNrtReplicas + numTlogReplicas + numPullReplicas;
int maxShardsPerNode = message.getInt(MAX_SHARDS_PER_NODE, backupCollectionState.getMaxShardsPerNode());
int availableNodeCount = nodeList.size();
if ((numShards * totalReplicasPerShard) > (availableNodeCount * maxShardsPerNode)) {
throw new SolrException(ErrorCode.BAD_REQUEST, String.format(Locale.ROOT, "Solr cloud with available number of nodes:%d is insufficient for" + " restoring a collection with %d shards, total replicas per shard %d and maxShardsPerNode %d." + " Consider increasing maxShardsPerNode value OR number of available nodes.", availableNodeCount, numShards, totalReplicasPerShard, maxShardsPerNode));
}
//Upload the configs
String configName = (String) properties.get(COLL_CONF);
String restoreConfigName = message.getStr(COLL_CONF, configName);
if (zkStateReader.getConfigManager().configExists(restoreConfigName)) {
log.info("Using existing config {}", restoreConfigName);
//TODO add overwrite option?
} else {
log.info("Uploading config {}", restoreConfigName);
backupMgr.uploadConfigDir(location, backupName, configName, restoreConfigName);
}
log.info("Starting restore into collection={} with backup_name={} at location={}", restoreCollectionName, backupName, location);
//Create core-less collection
{
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, CREATE.toString());
// mostly true. Prevents autoCreated=true in the collection state.
propMap.put("fromApi", "true");
// inherit settings from input API, defaulting to the backup's setting. Ex: replicationFactor
for (String collProp : COLL_PROPS.keySet()) {
Object val = message.getProperties().getOrDefault(collProp, backupCollectionState.get(collProp));
if (val != null) {
propMap.put(collProp, val);
}
}
propMap.put(NAME, restoreCollectionName);
//no cores
propMap.put(CREATE_NODE_SET, CREATE_NODE_SET_EMPTY);
propMap.put(COLL_CONF, restoreConfigName);
// router.*
@SuppressWarnings("unchecked") Map<String, Object> routerProps = (Map<String, Object>) backupCollectionState.getProperties().get(DocCollection.DOC_ROUTER);
for (Map.Entry<String, Object> pair : routerProps.entrySet()) {
propMap.put(DocCollection.DOC_ROUTER + "." + pair.getKey(), pair.getValue());
}
Set<String> sliceNames = backupCollectionState.getActiveSlicesMap().keySet();
if (backupCollectionState.getRouter() instanceof ImplicitDocRouter) {
propMap.put(SHARDS_PROP, StrUtils.join(sliceNames, ','));
} else {
propMap.put(NUM_SLICES, sliceNames.size());
// ClusterStateMutator.createCollection detects that "slices" is in fact a slice structure instead of a
// list of names, and if so uses this instead of building it. We clear the replica list.
Collection<Slice> backupSlices = backupCollectionState.getActiveSlices();
Map<String, Slice> newSlices = new LinkedHashMap<>(backupSlices.size());
for (Slice backupSlice : backupSlices) {
newSlices.put(backupSlice.getName(), new Slice(backupSlice.getName(), Collections.emptyMap(), backupSlice.getProperties()));
}
propMap.put(SHARDS_PROP, newSlices);
}
ocmh.commandMap.get(CREATE).call(zkStateReader.getClusterState(), new ZkNodeProps(propMap), new NamedList());
// note: when createCollection() returns, the collection exists (no race)
}
DocCollection restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
//Mark all shards in CONSTRUCTION STATE while we restore the data
{
//TODO might instead createCollection accept an initial state? Is there a race?
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
for (Slice shard : restoreCollection.getSlices()) {
propMap.put(shard.getName(), Slice.State.CONSTRUCTION.toString());
}
propMap.put(ZkStateReader.COLLECTION_PROP, restoreCollectionName);
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
}
// TODO how do we leverage the RULE / SNITCH logic in createCollection?
ClusterState clusterState = zkStateReader.getClusterState();
List<String> sliceNames = new ArrayList<>();
restoreCollection.getSlices().forEach(x -> sliceNames.add(x.getName()));
Map<ReplicaAssigner.Position, String> positionVsNodes = ocmh.identifyNodes(clusterState, nodeList, message, sliceNames, numNrtReplicas, numTlogReplicas, numPullReplicas);
//Create one replica per shard and copy backed up data to it
for (Slice slice : restoreCollection.getSlices()) {
log.debug("Adding replica for shard={} collection={} ", slice.getName(), restoreCollection);
HashMap<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, CREATESHARD);
propMap.put(COLLECTION_PROP, restoreCollectionName);
propMap.put(SHARD_ID_PROP, slice.getName());
if (numNrtReplicas >= 1) {
propMap.put(REPLICA_TYPE, Replica.Type.NRT.name());
} else if (numTlogReplicas >= 1) {
propMap.put(REPLICA_TYPE, Replica.Type.TLOG.name());
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unexpected number of replicas, replicationFactor, " + Replica.Type.NRT + " or " + Replica.Type.TLOG + " must be greater than 0");
}
// Get the first node matching the shard to restore in
String node;
for (Map.Entry<ReplicaAssigner.Position, String> pvn : positionVsNodes.entrySet()) {
ReplicaAssigner.Position position = pvn.getKey();
if (position.shard == slice.getName()) {
node = pvn.getValue();
propMap.put(CoreAdminParams.NODE, node);
positionVsNodes.remove(position);
break;
}
}
// add async param
if (asyncId != null) {
propMap.put(ASYNC, asyncId);
}
ocmh.addPropertyParams(message, propMap);
ocmh.addReplica(clusterState, new ZkNodeProps(propMap), new NamedList(), null);
}
//refresh the location copy of collection state
restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
//Copy data from backed up index to each replica
for (Slice slice : restoreCollection.getSlices()) {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.RESTORECORE.toString());
params.set(NAME, "snapshot." + slice.getName());
params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString());
params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
ocmh.sliceCmd(clusterState, params, null, slice, shardHandler, asyncId, requestMap);
}
ocmh.processResponses(new NamedList(), shardHandler, true, "Could not restore core", asyncId, requestMap);
//Mark all shards in ACTIVE STATE
{
HashMap<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
propMap.put(ZkStateReader.COLLECTION_PROP, restoreCollectionName);
for (Slice shard : restoreCollection.getSlices()) {
propMap.put(shard.getName(), Slice.State.ACTIVE.toString());
}
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
}
//refresh the location copy of collection state
restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
if (totalReplicasPerShard > 1) {
log.info("Adding replicas to restored collection={}", restoreCollection);
for (Slice slice : restoreCollection.getSlices()) {
//Add the remaining replicas for each shard, considering it's type
int createdNrtReplicas = 0, createdTlogReplicas = 0, createdPullReplicas = 0;
// We already created either a NRT or an TLOG replica as leader
if (numNrtReplicas > 0) {
createdNrtReplicas++;
} else if (createdTlogReplicas > 0) {
createdTlogReplicas++;
}
for (int i = 1; i < totalReplicasPerShard; i++) {
Replica.Type typeToCreate;
if (createdNrtReplicas < numNrtReplicas) {
createdNrtReplicas++;
typeToCreate = Replica.Type.NRT;
} else if (createdTlogReplicas < numTlogReplicas) {
createdTlogReplicas++;
typeToCreate = Replica.Type.TLOG;
} else {
createdPullReplicas++;
typeToCreate = Replica.Type.PULL;
assert createdPullReplicas <= numPullReplicas : "Unexpected number of replicas";
}
log.debug("Adding replica for shard={} collection={} of type {} ", slice.getName(), restoreCollection, typeToCreate);
HashMap<String, Object> propMap = new HashMap<>();
propMap.put(COLLECTION_PROP, restoreCollectionName);
propMap.put(SHARD_ID_PROP, slice.getName());
propMap.put(REPLICA_TYPE, typeToCreate.name());
// Get the first node matching the shard to restore in
String node;
for (Map.Entry<ReplicaAssigner.Position, String> pvn : positionVsNodes.entrySet()) {
ReplicaAssigner.Position position = pvn.getKey();
if (position.shard == slice.getName()) {
node = pvn.getValue();
propMap.put(CoreAdminParams.NODE, node);
positionVsNodes.remove(position);
break;
}
}
// add async param
if (asyncId != null) {
propMap.put(ASYNC, asyncId);
}
ocmh.addPropertyParams(message, propMap);
ocmh.addReplica(zkStateReader.getClusterState(), new ZkNodeProps(propMap), results, null);
}
}
}
log.info("Completed restoring collection={} backupName={}", restoreCollection, backupName);
}
use of org.apache.solr.handler.component.ShardHandler in project lucene-solr by apache.
the class SplitShardCmd method split.
public boolean split(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr("collection");
String slice = message.getStr(ZkStateReader.SHARD_ID_PROP);
log.info("Split shard invoked");
ZkStateReader zkStateReader = ocmh.zkStateReader;
zkStateReader.forceUpdateCollection(collectionName);
String splitKey = message.getStr("split.key");
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
DocCollection collection = clusterState.getCollection(collectionName);
DocRouter router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
Slice parentSlice;
if (slice == null) {
if (router instanceof CompositeIdRouter) {
Collection<Slice> searchSlices = router.getSearchSlicesSingle(splitKey, new ModifiableSolrParams(), collection);
if (searchSlices.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to find an active shard for split.key: " + splitKey);
}
if (searchSlices.size() > 1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Splitting a split.key: " + splitKey + " which spans multiple shards is not supported");
}
parentSlice = searchSlices.iterator().next();
slice = parentSlice.getName();
log.info("Split by route.key: {}, parent shard is: {} ", splitKey, slice);
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Split by route key can only be used with CompositeIdRouter or subclass. Found router: " + router.getClass().getName());
}
} else {
parentSlice = collection.getSlice(slice);
}
if (parentSlice == null) {
// an exception already
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No shard with the specified name exists: " + slice);
}
// find the leader for the shard
Replica parentShardLeader = null;
try {
parentShardLeader = zkStateReader.getLeaderRetry(collectionName, slice, 10000);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
// let's record the ephemeralOwner of the parent leader node
Stat leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
if (leaderZnodeStat == null) {
// we just got to know the leader but its live node is gone already!
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
}
DocRouter.Range range = parentSlice.getRange();
if (range == null) {
range = new PlainIdRouter().fullRange();
}
List<DocRouter.Range> subRanges = null;
String rangesStr = message.getStr(CoreAdminParams.RANGES);
if (rangesStr != null) {
String[] ranges = rangesStr.split(",");
if (ranges.length == 0 || ranges.length == 1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "There must be at least two ranges specified to split a shard");
} else {
subRanges = new ArrayList<>(ranges.length);
for (int i = 0; i < ranges.length; i++) {
String r = ranges[i];
try {
subRanges.add(DocRouter.DEFAULT.fromString(r));
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Exception in parsing hexadecimal hash range: " + r, e);
}
if (!subRanges.get(i).isSubsetOf(range)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specified hash range: " + r + " is not a subset of parent shard's range: " + range.toString());
}
}
// copy to preserve original order
List<DocRouter.Range> temp = new ArrayList<>(subRanges);
Collections.sort(temp);
if (!range.equals(new DocRouter.Range(temp.get(0).min, temp.get(temp.size() - 1).max))) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specified hash ranges: " + rangesStr + " do not cover the entire range of parent shard: " + range);
}
for (int i = 1; i < temp.size(); i++) {
if (temp.get(i - 1).max + 1 != temp.get(i).min) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specified hash ranges: " + rangesStr + " either overlap with each other or " + "do not cover the entire range of parent shard: " + range);
}
}
}
} else if (splitKey != null) {
if (router instanceof CompositeIdRouter) {
CompositeIdRouter compositeIdRouter = (CompositeIdRouter) router;
subRanges = compositeIdRouter.partitionRangeByKey(splitKey, range);
if (subRanges.size() == 1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The split.key: " + splitKey + " has a hash range that is exactly equal to hash range of shard: " + slice);
}
for (DocRouter.Range subRange : subRanges) {
if (subRange.min == subRange.max) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The split.key: " + splitKey + " must be a compositeId");
}
}
log.info("Partitioning parent shard " + slice + " range: " + parentSlice.getRange() + " yields: " + subRanges);
rangesStr = "";
for (int i = 0; i < subRanges.size(); i++) {
DocRouter.Range subRange = subRanges.get(i);
rangesStr += subRange.toString();
if (i < subRanges.size() - 1)
rangesStr += ',';
}
}
} else {
// todo: fixed to two partitions?
subRanges = router.partitionRange(2, range);
}
try {
List<String> subSlices = new ArrayList<>(subRanges.size());
List<String> subShardNames = new ArrayList<>(subRanges.size());
String nodeName = parentShardLeader.getNodeName();
for (int i = 0; i < subRanges.size(); i++) {
String subSlice = slice + "_" + i;
subSlices.add(subSlice);
String subShardName = Assign.buildCoreName(collectionName, subSlice, Replica.Type.NRT, 1);
subShardNames.add(subShardName);
}
boolean oldShardsDeleted = false;
for (String subSlice : subSlices) {
Slice oSlice = collection.getSlice(subSlice);
if (oSlice != null) {
final Slice.State state = oSlice.getState();
if (state == Slice.State.ACTIVE) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Sub-shard: " + subSlice + " exists in active state. Aborting split shard.");
} else if (state == Slice.State.CONSTRUCTION || state == Slice.State.RECOVERY) {
// delete the shards
log.info("Sub-shard: {} already exists therefore requesting its deletion", subSlice);
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, "deleteshard");
propMap.put(COLLECTION_PROP, collectionName);
propMap.put(SHARD_ID_PROP, subSlice);
ZkNodeProps m = new ZkNodeProps(propMap);
try {
ocmh.commandMap.get(DELETESHARD).call(clusterState, m, new NamedList());
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to delete already existing sub shard: " + subSlice, e);
}
oldShardsDeleted = true;
}
}
}
if (oldShardsDeleted) {
// refresh the locally cached cluster state
// we know we have the latest because otherwise deleteshard would have failed
clusterState = zkStateReader.getClusterState();
collection = clusterState.getCollection(collectionName);
}
final String asyncId = message.getStr(ASYNC);
Map<String, String> requestMap = new HashMap<>();
for (int i = 0; i < subRanges.size(); i++) {
String subSlice = subSlices.get(i);
String subShardName = subShardNames.get(i);
DocRouter.Range subRange = subRanges.get(i);
log.info("Creating slice " + subSlice + " of collection " + collectionName + " on " + nodeName);
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, CREATESHARD.toLower());
propMap.put(ZkStateReader.SHARD_ID_PROP, subSlice);
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
propMap.put(ZkStateReader.SHARD_RANGE_PROP, subRange.toString());
propMap.put(ZkStateReader.SHARD_STATE_PROP, Slice.State.CONSTRUCTION.toString());
propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName());
propMap.put("shard_parent_node", parentShardLeader.getNodeName());
propMap.put("shard_parent_zk_session", leaderZnodeStat.getEphemeralOwner());
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
// wait until we are able to see the new shard in cluster state
ocmh.waitForNewShard(collectionName, subSlice);
// refresh cluster state
clusterState = zkStateReader.getClusterState();
log.info("Adding replica " + subShardName + " as part of slice " + subSlice + " of collection " + collectionName + " on " + nodeName);
propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
propMap.put(COLLECTION_PROP, collectionName);
propMap.put(SHARD_ID_PROP, subSlice);
propMap.put("node", nodeName);
propMap.put(CoreAdminParams.NAME, subShardName);
// copy over property params:
for (String key : message.keySet()) {
if (key.startsWith(COLL_PROP_PREFIX)) {
propMap.put(key, message.getStr(key));
}
}
// add async param
if (asyncId != null) {
propMap.put(ASYNC, asyncId);
}
ocmh.addReplica(clusterState, new ZkNodeProps(propMap), results, null);
}
ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to create subshard leaders", asyncId, requestMap);
for (String subShardName : subShardNames) {
// wait for parent leader to acknowledge the sub-shard core
log.info("Asking parent leader to wait for: " + subShardName + " to be alive on: " + nodeName);
String coreNodeName = ocmh.waitForCoreNodeName(collectionName, nodeName, subShardName);
CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
cmd.setCoreName(subShardName);
cmd.setNodeName(nodeName);
cmd.setCoreNodeName(coreNodeName);
cmd.setState(Replica.State.ACTIVE);
cmd.setCheckLive(true);
cmd.setOnlyIfLeader(true);
ModifiableSolrParams p = new ModifiableSolrParams(cmd.getParams());
ocmh.sendShardRequest(nodeName, p, shardHandler, asyncId, requestMap);
}
ocmh.processResponses(results, shardHandler, true, "SPLITSHARD timed out waiting for subshard leaders to come up", asyncId, requestMap);
log.info("Successfully created all sub-shards for collection " + collectionName + " parent shard: " + slice + " on: " + parentShardLeader);
log.info("Splitting shard " + parentShardLeader.getName() + " as part of slice " + slice + " of collection " + collectionName + " on " + parentShardLeader);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.SPLIT.toString());
params.set(CoreAdminParams.CORE, parentShardLeader.getStr("core"));
for (int i = 0; i < subShardNames.size(); i++) {
String subShardName = subShardNames.get(i);
params.add(CoreAdminParams.TARGET_CORE, subShardName);
}
params.set(CoreAdminParams.RANGES, rangesStr);
ocmh.sendShardRequest(parentShardLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to invoke SPLIT core admin command", asyncId, requestMap);
log.info("Index on shard: " + nodeName + " split into two successfully");
// apply buffered updates on sub-shards
for (int i = 0; i < subShardNames.size(); i++) {
String subShardName = subShardNames.get(i);
log.info("Applying buffered updates on : " + subShardName);
params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
params.set(CoreAdminParams.NAME, subShardName);
ocmh.sendShardRequest(nodeName, params, shardHandler, asyncId, requestMap);
}
ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed while asking sub shard leaders" + " to apply buffered updates", asyncId, requestMap);
log.info("Successfully applied buffered updates on : " + subShardNames);
// Replica creation for the new Slices
// look at the replication factor and see if it matches reality
// if it does not, find best nodes to create more cores
// TODO: Have replication factor decided in some other way instead of numShards for the parent
int repFactor = parentSlice.getReplicas().size();
// we need to look at every node and see how many cores it serves
// add our new cores to existing nodes serving the least number of cores
// but (for now) require that each core goes on a distinct node.
// TODO: add smarter options that look at the current number of cores per
// node?
// for now we just go random
Set<String> nodes = clusterState.getLiveNodes();
List<String> nodeList = new ArrayList<>(nodes.size());
nodeList.addAll(nodes);
// TODO: Have maxShardsPerNode param for this operation?
// Remove the node that hosts the parent shard for replica creation.
nodeList.remove(nodeName);
// TODO: change this to handle sharding a slice into > 2 sub-shards.
Map<ReplicaAssigner.Position, String> nodeMap = ocmh.identifyNodes(clusterState, new ArrayList<>(clusterState.getLiveNodes()), new ZkNodeProps(collection.getProperties()), subSlices, repFactor - 1, 0, 0);
List<Map<String, Object>> replicas = new ArrayList<>((repFactor - 1) * 2);
for (Map.Entry<ReplicaAssigner.Position, String> entry : nodeMap.entrySet()) {
String sliceName = entry.getKey().shard;
String subShardNodeName = entry.getValue();
String shardName = collectionName + "_" + sliceName + "_replica" + (entry.getKey().index);
log.info("Creating replica shard " + shardName + " as part of slice " + sliceName + " of collection " + collectionName + " on " + subShardNodeName);
ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(), ZkStateReader.COLLECTION_PROP, collectionName, ZkStateReader.SHARD_ID_PROP, sliceName, ZkStateReader.CORE_NAME_PROP, shardName, ZkStateReader.STATE_PROP, Replica.State.DOWN.toString(), ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(subShardNodeName), ZkStateReader.NODE_NAME_PROP, subShardNodeName);
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
HashMap<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
propMap.put(COLLECTION_PROP, collectionName);
propMap.put(SHARD_ID_PROP, sliceName);
propMap.put("node", subShardNodeName);
propMap.put(CoreAdminParams.NAME, shardName);
// copy over property params:
for (String key : message.keySet()) {
if (key.startsWith(COLL_PROP_PREFIX)) {
propMap.put(key, message.getStr(key));
}
}
// add async param
if (asyncId != null) {
propMap.put(ASYNC, asyncId);
}
// special flag param to instruct addReplica not to create the replica in cluster state again
propMap.put(SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, "true");
replicas.add(propMap);
}
assert TestInjection.injectSplitFailureBeforeReplicaCreation();
long ephemeralOwner = leaderZnodeStat.getEphemeralOwner();
// compare against the ephemeralOwner of the parent leader node
leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
if (leaderZnodeStat == null || ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
// put sub-shards in recovery_failed state
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
for (String subSlice : subSlices) {
propMap.put(subSlice, Slice.State.RECOVERY_FAILED.toString());
}
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m));
if (leaderZnodeStat == null) {
// the leader is not live anymore, fail the split!
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
} else if (ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
// there's a new leader, fail the split!
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The zk session id for the shard leader node: " + parentShardLeader.getNodeName() + " has changed from " + ephemeralOwner + " to " + leaderZnodeStat.getEphemeralOwner() + ". This can cause data loss so we must abort the split");
}
}
if (repFactor == 1) {
// switch sub shard states to 'active'
log.info("Replication factor is 1 so switching shard states");
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
propMap.put(slice, Slice.State.INACTIVE.toString());
for (String subSlice : subSlices) {
propMap.put(subSlice, Slice.State.ACTIVE.toString());
}
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m));
} else {
log.info("Requesting shard state be set to 'recovery'");
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
for (String subSlice : subSlices) {
propMap.put(subSlice, Slice.State.RECOVERY.toString());
}
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m));
}
// now actually create replica cores on sub shard nodes
for (Map<String, Object> replica : replicas) {
ocmh.addReplica(clusterState, new ZkNodeProps(replica), results, null);
}
ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to create subshard replicas", asyncId, requestMap);
log.info("Successfully created all replica shards for all sub-slices " + subSlices);
ocmh.commit(results, slice, parentShardLeader);
return true;
} catch (SolrException e) {
throw e;
} catch (Exception e) {
log.error("Error executing split operation for collection: " + collectionName + " parent shard: " + slice, e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, null, e);
}
}
Aggregations