use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.
the class BackupCmd method copyIndexFiles.
private void copyIndexFiles(URI backupPath, ZkNodeProps request, NamedList results) throws Exception {
String collectionName = request.getStr(COLLECTION_PROP);
String backupName = request.getStr(NAME);
String asyncId = request.getStr(ASYNC);
String repoName = request.getStr(CoreAdminParams.BACKUP_REPOSITORY);
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
Map<String, String> requestMap = new HashMap<>();
String commitName = request.getStr(CoreAdminParams.COMMIT_NAME);
Optional<CollectionSnapshotMetaData> snapshotMeta = Optional.empty();
if (commitName != null) {
SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient();
snapshotMeta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
if (!snapshotMeta.isPresent()) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " does not exist for collection " + collectionName);
}
if (snapshotMeta.get().getStatus() != SnapshotStatus.Successful) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " for collection " + collectionName + " has not completed successfully. The status is " + snapshotMeta.get().getStatus());
}
}
log.info("Starting backup of collection={} with backupName={} at location={}", collectionName, backupName, backupPath);
Collection<String> shardsToConsider = Collections.emptySet();
if (snapshotMeta.isPresent()) {
shardsToConsider = snapshotMeta.get().getShards();
}
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getActiveSlices()) {
Replica replica = null;
if (snapshotMeta.isPresent()) {
if (!shardsToConsider.contains(slice.getName())) {
log.warn("Skipping the backup for shard {} since it wasn't part of the collection {} when snapshot {} was created.", slice.getName(), collectionName, snapshotMeta.get().getName());
continue;
}
replica = selectReplicaWithSnapshot(snapshotMeta.get(), slice);
} else {
// Note - Actually this can return a null value when there is no leader for this shard.
replica = slice.getLeader();
if (replica == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "No 'leader' replica available for shard " + slice.getName() + " of collection " + collectionName);
}
}
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.BACKUPCORE.toString());
params.set(NAME, slice.getName());
params.set(CoreAdminParams.BACKUP_REPOSITORY, repoName);
// note: index dir will be here then the "snapshot." + slice name
params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString());
params.set(CORE_NAME_PROP, coreName);
if (snapshotMeta.isPresent()) {
params.set(CoreAdminParams.COMMIT_NAME, snapshotMeta.get().getName());
}
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
log.debug("Sent backup request to core={} for backupName={}", coreName, backupName);
}
log.debug("Sent backup requests to all shard leaders for backupName={}", backupName);
ocmh.processResponses(results, shardHandler, true, "Could not backup all replicas", asyncId, requestMap);
}
use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.
the class DeleteShardCmd method call.
@Override
public void call(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(ZkStateReader.COLLECTION_PROP);
String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
log.info("Delete shard invoked");
Slice slice = clusterState.getSlice(collectionName, sliceId);
if (slice == null) {
if (clusterState.hasCollection(collectionName)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No shard with name " + sliceId + " exists for collection " + collectionName);
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collectionName);
}
}
// For now, only allow for deletions of Inactive slices or custom hashes (range==null).
// TODO: Add check for range gaps on Slice deletion
final Slice.State state = slice.getState();
if (!(slice.getRange() == null || state == Slice.State.INACTIVE || state == Slice.State.RECOVERY || state == Slice.State.CONSTRUCTION) || state == Slice.State.RECOVERY_FAILED) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The slice: " + slice.getName() + " is currently " + state + ". Only non-active (or custom-hashed) slices can be deleted.");
}
if (state == Slice.State.RECOVERY) {
// mark the slice as 'construction' and only then try to delete the cores
// see SOLR-9455
DistributedQueue inQueue = Overseer.getStateUpdateQueue(ocmh.zkStateReader.getZkClient());
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
propMap.put(sliceId, Slice.State.CONSTRUCTION.toString());
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m));
}
String asyncId = message.getStr(ASYNC);
try {
List<ZkNodeProps> replicas = getReplicasForSlice(collectionName, slice);
CountDownLatch cleanupLatch = new CountDownLatch(replicas.size());
for (ZkNodeProps r : replicas) {
final ZkNodeProps replica = r.plus(message.getProperties()).plus("parallel", "true").plus(ASYNC, asyncId);
log.info("Deleting replica for collection={} shard={} on node={}", replica.getStr(COLLECTION_PROP), replica.getStr(SHARD_ID_PROP), replica.getStr(CoreAdminParams.NODE));
NamedList deleteResult = new NamedList();
try {
((DeleteReplicaCmd) ocmh.commandMap.get(DELETEREPLICA)).deleteReplica(clusterState, replica, deleteResult, () -> {
cleanupLatch.countDown();
if (deleteResult.get("failure") != null) {
synchronized (results) {
results.add("failure", String.format(Locale.ROOT, "Failed to delete replica for collection=%s shard=%s" + " on node=%s", replica.getStr(COLLECTION_PROP), replica.getStr(SHARD_ID_PROP), replica.getStr(NODE_NAME_PROP)));
}
}
SimpleOrderedMap success = (SimpleOrderedMap) deleteResult.get("success");
if (success != null) {
synchronized (results) {
results.add("success", success);
}
}
});
} catch (KeeperException e) {
log.warn("Error deleting replica: " + r, e);
cleanupLatch.countDown();
} catch (Exception e) {
log.warn("Error deleting replica: " + r, e);
cleanupLatch.countDown();
throw e;
}
}
log.debug("Waiting for delete shard action to complete");
cleanupLatch.await(5, TimeUnit.MINUTES);
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETESHARD.toLower(), ZkStateReader.COLLECTION_PROP, collectionName, ZkStateReader.SHARD_ID_PROP, sliceId);
ZkStateReader zkStateReader = ocmh.zkStateReader;
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
// wait for a while until we don't see the shard
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
boolean removed = false;
while (!timeout.hasTimedOut()) {
Thread.sleep(100);
DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
removed = collection.getSlice(sliceId) == null;
if (removed) {
// just a bit of time so it's more likely other readers see on return
Thread.sleep(100);
break;
}
}
if (!removed) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not fully remove collection: " + collectionName + " shard: " + sliceId);
}
log.info("Successfully deleted collection: " + collectionName + ", shard: " + sliceId);
} catch (SolrException e) {
throw e;
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collectionName + " shard: " + sliceId, e);
}
}
use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.
the class DeleteReplicaCmd method deleteReplicaBasedOnCount.
/**
* Delete replicas based on count for a given collection. If a shard is passed, uses that
* else deletes given num replicas across all shards for the given collection.
*/
void deleteReplicaBasedOnCount(ClusterState clusterState, ZkNodeProps message, NamedList results, Runnable onComplete, boolean parallel) throws KeeperException, InterruptedException {
ocmh.checkRequired(message, COLLECTION_PROP, COUNT_PROP);
int count = Integer.parseInt(message.getStr(COUNT_PROP));
String collectionName = message.getStr(COLLECTION_PROP);
String shard = message.getStr(SHARD_ID_PROP);
DocCollection coll = clusterState.getCollection(collectionName);
Slice slice = null;
//Validate if shard is passed.
if (shard != null) {
slice = coll.getSlice(shard);
if (slice == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid shard name : " + shard + " in collection : " + collectionName);
}
}
Map<Slice, Set<String>> shardToReplicasMapping = new HashMap<Slice, Set<String>>();
if (slice != null) {
Set<String> replicasToBeDeleted = pickReplicasTobeDeleted(slice, shard, collectionName, count);
shardToReplicasMapping.put(slice, replicasToBeDeleted);
} else {
//If there are many replicas left, remove the rest based on count.
Collection<Slice> allSlices = coll.getSlices();
for (Slice individualSlice : allSlices) {
Set<String> replicasToBeDeleted = pickReplicasTobeDeleted(individualSlice, individualSlice.getName(), collectionName, count);
shardToReplicasMapping.put(individualSlice, replicasToBeDeleted);
}
}
for (Slice shardSlice : shardToReplicasMapping.keySet()) {
String shardId = shardSlice.getName();
Set<String> replicas = shardToReplicasMapping.get(shardSlice);
//callDeleteReplica on all replicas
for (String replica : replicas) {
log.debug("Deleting replica {} for shard {} based on count {}", replica, shardId, count);
deleteCore(shardSlice, collectionName, replica, message, shard, results, onComplete, parallel);
}
results.add("shard_id", shardId);
results.add("replicas_deleted", replicas);
}
}
use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.
the class DeleteSnapshotCmd method call.
@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(COLLECTION_PROP);
String commitName = message.getStr(CoreAdminParams.COMMIT_NAME);
String asyncId = message.getStr(ASYNC);
Map<String, String> requestMap = new HashMap<>();
NamedList shardRequestResults = new NamedList();
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient();
Optional<CollectionSnapshotMetaData> meta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
if (!meta.isPresent()) {
// Snapshot not found. Nothing to do.
return;
}
log.info("Deleting a snapshot for collection={} with commitName={}", collectionName, commitName);
Set<String> existingCores = new HashSet<>();
for (Slice s : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
for (Replica r : s.getReplicas()) {
existingCores.add(r.getCoreName());
}
}
Set<String> coresWithSnapshot = new HashSet<>();
for (CoreSnapshotMetaData m : meta.get().getReplicaSnapshots()) {
if (existingCores.contains(m.getCoreName())) {
coresWithSnapshot.add(m.getCoreName());
}
}
log.info("Existing cores with snapshot for collection={} are {}", collectionName, existingCores);
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
for (Replica replica : slice.getReplicas()) {
if (replica.getState() == State.DOWN) {
// Since replica is down - no point sending a request.
continue;
}
// replicas to contact at this point, we try on all replicas.
if (meta.get().getStatus() == SnapshotStatus.InProgress || coresWithSnapshot.contains(replica.getCoreName())) {
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.DELETESNAPSHOT.toString());
params.set(NAME, slice.getName());
params.set(CORE_NAME_PROP, coreName);
params.set(CoreAdminParams.COMMIT_NAME, commitName);
log.info("Sending deletesnapshot request to core={} with commitName={}", coreName, commitName);
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
}
}
}
ocmh.processResponses(shardRequestResults, shardHandler, false, null, asyncId, requestMap);
NamedList success = (NamedList) shardRequestResults.get("success");
List<CoreSnapshotMetaData> replicas = new ArrayList<>();
if (success != null) {
for (int i = 0; i < success.size(); i++) {
NamedList resp = (NamedList) success.getVal(i);
// Unfortunately async processing logic doesn't provide the "core" name automatically.
String coreName = (String) resp.get("core");
coresWithSnapshot.remove(coreName);
}
}
if (!coresWithSnapshot.isEmpty()) {
// One or more failures.
log.warn("Failed to delete a snapshot for collection {} with commitName = {}. Snapshot could not be deleted for following cores {}", collectionName, commitName, coresWithSnapshot);
List<CoreSnapshotMetaData> replicasWithSnapshot = new ArrayList<>();
for (CoreSnapshotMetaData m : meta.get().getReplicaSnapshots()) {
if (coresWithSnapshot.contains(m.getCoreName())) {
replicasWithSnapshot.add(m);
}
}
// Update the ZK meta-data to include only cores with the snapshot. This will enable users to figure out
// which cores still contain the named snapshot.
CollectionSnapshotMetaData newResult = new CollectionSnapshotMetaData(meta.get().getName(), SnapshotStatus.Failed, meta.get().getCreationDate(), replicasWithSnapshot);
SolrSnapshotManager.updateCollectionLevelSnapshot(zkClient, collectionName, newResult);
log.info("Saved snapshot information for collection={} with commitName={} in Zookeeper as follows", collectionName, commitName, Utils.toJSON(newResult));
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to delete snapshot on cores " + coresWithSnapshot);
} else {
// Delete the ZK path so that we eliminate the references of this snapshot from collection level meta-data.
SolrSnapshotManager.deleteCollectionLevelSnapshot(zkClient, collectionName, commitName);
log.info("Deleted Zookeeper snapshot metdata for collection={} with commitName={}", collectionName, commitName);
log.info("Successfully deleted snapshot for collection={} with commitName={}", collectionName, commitName);
}
}
use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.
the class AddReplicaCmd method addReplica.
ZkNodeProps addReplica(ClusterState clusterState, ZkNodeProps message, NamedList results, Runnable onComplete) throws KeeperException, InterruptedException {
log.info("addReplica() : {}", Utils.toJSONString(message));
String collection = message.getStr(COLLECTION_PROP);
String node = message.getStr(CoreAdminParams.NODE);
String shard = message.getStr(SHARD_ID_PROP);
String coreName = message.getStr(CoreAdminParams.NAME);
Replica.Type replicaType = Replica.Type.valueOf(message.getStr(ZkStateReader.REPLICA_TYPE, Replica.Type.NRT.name()).toUpperCase(Locale.ROOT));
boolean parallel = message.getBool("parallel", false);
if (StringUtils.isBlank(coreName)) {
coreName = message.getStr(CoreAdminParams.PROPERTY_PREFIX + CoreAdminParams.NAME);
}
final String asyncId = message.getStr(ASYNC);
DocCollection coll = clusterState.getCollection(collection);
if (coll == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " does not exist");
}
if (coll.getSlice(shard) == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " shard: " + shard + " does not exist");
}
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
boolean skipCreateReplicaInClusterState = message.getBool(SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, false);
// Kind of unnecessary, but it does put the logic of whether to override maxShardsPerNode in one place.
if (!skipCreateReplicaInClusterState) {
node = getNodesForNewReplicas(clusterState, collection, shard, 1, node, ocmh.overseer.getZkController().getCoreContainer()).get(0).nodeName;
}
log.info("Node Identified {} for creating new replica", node);
if (!clusterState.liveNodesContain(node)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Node: " + node + " is not live");
}
if (coreName == null) {
coreName = Assign.buildCoreName(coll, shard, replicaType);
} else if (!skipCreateReplicaInClusterState) {
//Validate that the core name is unique in that collection
for (Slice slice : coll.getSlices()) {
for (Replica replica : slice.getReplicas()) {
String replicaCoreName = replica.getStr(CORE_NAME_PROP);
if (coreName.equals(replicaCoreName)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Another replica with the same core name already exists" + " for this collection");
}
}
}
}
ModifiableSolrParams params = new ModifiableSolrParams();
ZkStateReader zkStateReader = ocmh.zkStateReader;
if (!Overseer.isLegacy(zkStateReader)) {
if (!skipCreateReplicaInClusterState) {
ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(), ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.SHARD_ID_PROP, shard, ZkStateReader.CORE_NAME_PROP, coreName, ZkStateReader.STATE_PROP, Replica.State.DOWN.toString(), ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(node), ZkStateReader.NODE_NAME_PROP, node, ZkStateReader.REPLICA_TYPE, replicaType.name());
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
}
params.set(CoreAdminParams.CORE_NODE_NAME, ocmh.waitToSeeReplicasInState(collection, Collections.singletonList(coreName)).get(coreName).getName());
}
String configName = zkStateReader.readConfigName(collection);
String routeKey = message.getStr(ShardParams._ROUTE_);
String dataDir = message.getStr(CoreAdminParams.DATA_DIR);
String instanceDir = message.getStr(CoreAdminParams.INSTANCE_DIR);
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.CREATE.toString());
params.set(CoreAdminParams.NAME, coreName);
params.set(COLL_CONF, configName);
params.set(CoreAdminParams.COLLECTION, collection);
params.set(CoreAdminParams.REPLICA_TYPE, replicaType.name());
if (shard != null) {
params.set(CoreAdminParams.SHARD, shard);
} else if (routeKey != null) {
Collection<Slice> slices = coll.getRouter().getSearchSlicesSingle(routeKey, null, coll);
if (slices.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No active shard serving _route_=" + routeKey + " found");
} else {
params.set(CoreAdminParams.SHARD, slices.iterator().next().getName());
}
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specify either 'shard' or _route_ param");
}
if (dataDir != null) {
params.set(CoreAdminParams.DATA_DIR, dataDir);
}
if (instanceDir != null) {
params.set(CoreAdminParams.INSTANCE_DIR, instanceDir);
}
ocmh.addPropertyParams(message, params);
// For tracking async calls.
Map<String, String> requestMap = new HashMap<>();
ocmh.sendShardRequest(node, params, shardHandler, asyncId, requestMap);
final String fnode = node;
final String fcoreName = coreName;
Runnable runnable = () -> {
ocmh.processResponses(results, shardHandler, true, "ADDREPLICA failed to create replica", asyncId, requestMap);
ocmh.waitForCoreNodeName(collection, fnode, fcoreName);
if (onComplete != null)
onComplete.run();
};
if (!parallel) {
runnable.run();
} else {
ocmh.tpe.submit(runnable);
}
return new ZkNodeProps(ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.SHARD_ID_PROP, shard, ZkStateReader.CORE_NAME_PROP, coreName, ZkStateReader.NODE_NAME_PROP, node);
}
Aggregations