use of org.apache.solr.core.snapshots.CollectionSnapshotMetaData in project lucene-solr by apache.
the class BackupCmd method selectReplicaWithSnapshot.
private Replica selectReplicaWithSnapshot(CollectionSnapshotMetaData snapshotMeta, Slice slice) {
// The goal here is to choose the snapshot of the replica which was the leader at the time snapshot was created.
// If that is not possible, we choose any other replica for the given shard.
Collection<CoreSnapshotMetaData> snapshots = snapshotMeta.getReplicaSnapshotsForShard(slice.getName());
Optional<CoreSnapshotMetaData> leaderCore = snapshots.stream().filter(x -> x.isLeader()).findFirst();
if (leaderCore.isPresent()) {
log.info("Replica {} was the leader when snapshot {} was created.", leaderCore.get().getCoreName(), snapshotMeta.getName());
Replica r = slice.getReplica(leaderCore.get().getCoreName());
if ((r != null) && !r.getState().equals(State.DOWN)) {
return r;
}
}
Optional<Replica> r = slice.getReplicas().stream().filter(x -> x.getState() != State.DOWN && snapshotMeta.isSnapshotExists(slice.getName(), x)).findFirst();
if (!r.isPresent()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to find any live replica with a snapshot named " + snapshotMeta.getName() + " for shard " + slice.getName());
}
return r.get();
}
use of org.apache.solr.core.snapshots.CollectionSnapshotMetaData in project lucene-solr by apache.
the class BackupCmd method copyIndexFiles.
private void copyIndexFiles(URI backupPath, ZkNodeProps request, NamedList results) throws Exception {
String collectionName = request.getStr(COLLECTION_PROP);
String backupName = request.getStr(NAME);
String asyncId = request.getStr(ASYNC);
String repoName = request.getStr(CoreAdminParams.BACKUP_REPOSITORY);
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
Map<String, String> requestMap = new HashMap<>();
String commitName = request.getStr(CoreAdminParams.COMMIT_NAME);
Optional<CollectionSnapshotMetaData> snapshotMeta = Optional.empty();
if (commitName != null) {
SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient();
snapshotMeta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
if (!snapshotMeta.isPresent()) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " does not exist for collection " + collectionName);
}
if (snapshotMeta.get().getStatus() != SnapshotStatus.Successful) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " for collection " + collectionName + " has not completed successfully. The status is " + snapshotMeta.get().getStatus());
}
}
log.info("Starting backup of collection={} with backupName={} at location={}", collectionName, backupName, backupPath);
Collection<String> shardsToConsider = Collections.emptySet();
if (snapshotMeta.isPresent()) {
shardsToConsider = snapshotMeta.get().getShards();
}
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getActiveSlices()) {
Replica replica = null;
if (snapshotMeta.isPresent()) {
if (!shardsToConsider.contains(slice.getName())) {
log.warn("Skipping the backup for shard {} since it wasn't part of the collection {} when snapshot {} was created.", slice.getName(), collectionName, snapshotMeta.get().getName());
continue;
}
replica = selectReplicaWithSnapshot(snapshotMeta.get(), slice);
} else {
// Note - Actually this can return a null value when there is no leader for this shard.
replica = slice.getLeader();
if (replica == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "No 'leader' replica available for shard " + slice.getName() + " of collection " + collectionName);
}
}
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.BACKUPCORE.toString());
params.set(NAME, slice.getName());
params.set(CoreAdminParams.BACKUP_REPOSITORY, repoName);
// note: index dir will be here then the "snapshot." + slice name
params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString());
params.set(CORE_NAME_PROP, coreName);
if (snapshotMeta.isPresent()) {
params.set(CoreAdminParams.COMMIT_NAME, snapshotMeta.get().getName());
}
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
log.debug("Sent backup request to core={} for backupName={}", coreName, backupName);
}
log.debug("Sent backup requests to all shard leaders for backupName={}", backupName);
ocmh.processResponses(results, shardHandler, true, "Could not backup all replicas", asyncId, requestMap);
}
use of org.apache.solr.core.snapshots.CollectionSnapshotMetaData in project lucene-solr by apache.
the class DeleteSnapshotCmd method call.
@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(COLLECTION_PROP);
String commitName = message.getStr(CoreAdminParams.COMMIT_NAME);
String asyncId = message.getStr(ASYNC);
Map<String, String> requestMap = new HashMap<>();
NamedList shardRequestResults = new NamedList();
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient();
Optional<CollectionSnapshotMetaData> meta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
if (!meta.isPresent()) {
// Snapshot not found. Nothing to do.
return;
}
log.info("Deleting a snapshot for collection={} with commitName={}", collectionName, commitName);
Set<String> existingCores = new HashSet<>();
for (Slice s : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
for (Replica r : s.getReplicas()) {
existingCores.add(r.getCoreName());
}
}
Set<String> coresWithSnapshot = new HashSet<>();
for (CoreSnapshotMetaData m : meta.get().getReplicaSnapshots()) {
if (existingCores.contains(m.getCoreName())) {
coresWithSnapshot.add(m.getCoreName());
}
}
log.info("Existing cores with snapshot for collection={} are {}", collectionName, existingCores);
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
for (Replica replica : slice.getReplicas()) {
if (replica.getState() == State.DOWN) {
// Since replica is down - no point sending a request.
continue;
}
// replicas to contact at this point, we try on all replicas.
if (meta.get().getStatus() == SnapshotStatus.InProgress || coresWithSnapshot.contains(replica.getCoreName())) {
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.DELETESNAPSHOT.toString());
params.set(NAME, slice.getName());
params.set(CORE_NAME_PROP, coreName);
params.set(CoreAdminParams.COMMIT_NAME, commitName);
log.info("Sending deletesnapshot request to core={} with commitName={}", coreName, commitName);
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
}
}
}
ocmh.processResponses(shardRequestResults, shardHandler, false, null, asyncId, requestMap);
NamedList success = (NamedList) shardRequestResults.get("success");
List<CoreSnapshotMetaData> replicas = new ArrayList<>();
if (success != null) {
for (int i = 0; i < success.size(); i++) {
NamedList resp = (NamedList) success.getVal(i);
// Unfortunately async processing logic doesn't provide the "core" name automatically.
String coreName = (String) resp.get("core");
coresWithSnapshot.remove(coreName);
}
}
if (!coresWithSnapshot.isEmpty()) {
// One or more failures.
log.warn("Failed to delete a snapshot for collection {} with commitName = {}. Snapshot could not be deleted for following cores {}", collectionName, commitName, coresWithSnapshot);
List<CoreSnapshotMetaData> replicasWithSnapshot = new ArrayList<>();
for (CoreSnapshotMetaData m : meta.get().getReplicaSnapshots()) {
if (coresWithSnapshot.contains(m.getCoreName())) {
replicasWithSnapshot.add(m);
}
}
// Update the ZK meta-data to include only cores with the snapshot. This will enable users to figure out
// which cores still contain the named snapshot.
CollectionSnapshotMetaData newResult = new CollectionSnapshotMetaData(meta.get().getName(), SnapshotStatus.Failed, meta.get().getCreationDate(), replicasWithSnapshot);
SolrSnapshotManager.updateCollectionLevelSnapshot(zkClient, collectionName, newResult);
log.info("Saved snapshot information for collection={} with commitName={} in Zookeeper as follows", collectionName, commitName, Utils.toJSON(newResult));
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to delete snapshot on cores " + coresWithSnapshot);
} else {
// Delete the ZK path so that we eliminate the references of this snapshot from collection level meta-data.
SolrSnapshotManager.deleteCollectionLevelSnapshot(zkClient, collectionName, commitName);
log.info("Deleted Zookeeper snapshot metdata for collection={} with commitName={}", collectionName, commitName);
log.info("Successfully deleted snapshot for collection={} with commitName={}", collectionName, commitName);
}
}
use of org.apache.solr.core.snapshots.CollectionSnapshotMetaData in project lucene-solr by apache.
the class CreateSnapshotCmd method call.
@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(COLLECTION_PROP);
String commitName = message.getStr(CoreAdminParams.COMMIT_NAME);
String asyncId = message.getStr(ASYNC);
SolrZkClient zkClient = this.ocmh.overseer.getZkController().getZkClient();
Date creationDate = new Date();
if (SolrSnapshotManager.snapshotExists(zkClient, collectionName, commitName)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " already exists for collection " + collectionName);
}
log.info("Creating a snapshot for collection={} with commitName={}", collectionName, commitName);
// Create a node in ZK to store the collection level snapshot meta-data.
SolrSnapshotManager.createCollectionLevelSnapshot(zkClient, collectionName, new CollectionSnapshotMetaData(commitName));
log.info("Created a ZK path to store snapshot information for collection={} with commitName={}", collectionName, commitName);
Map<String, String> requestMap = new HashMap<>();
NamedList shardRequestResults = new NamedList();
Map<String, Slice> shardByCoreName = new HashMap<>();
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
for (Replica replica : slice.getReplicas()) {
if (replica.getState() != State.ACTIVE) {
log.info("Replica {} is not active. Hence not sending the createsnapshot request", replica.getCoreName());
// Since replica is not active - no point sending a request.
continue;
}
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATESNAPSHOT.toString());
params.set(NAME, slice.getName());
params.set(CORE_NAME_PROP, coreName);
params.set(CoreAdminParams.COMMIT_NAME, commitName);
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
log.debug("Sent createsnapshot request to core={} with commitName={}", coreName, commitName);
shardByCoreName.put(coreName, slice);
}
}
// At this point we want to make sure that at-least one replica for every shard
// is able to create the snapshot. If that is not the case, then we fail the request.
// This is to take care of the situation where e.g. entire shard is unavailable.
Set<String> failedShards = new HashSet<>();
ocmh.processResponses(shardRequestResults, shardHandler, false, null, asyncId, requestMap);
NamedList success = (NamedList) shardRequestResults.get("success");
List<CoreSnapshotMetaData> replicas = new ArrayList<>();
if (success != null) {
for (int i = 0; i < success.size(); i++) {
NamedList resp = (NamedList) success.getVal(i);
// Check if this core is the leader for the shard. The idea here is that during the backup
// operation we preferably use the snapshot of the "leader" replica since it is most likely
// to have latest state.
String coreName = (String) resp.get(CoreAdminParams.CORE);
Slice slice = shardByCoreName.remove(coreName);
boolean leader = (slice.getLeader() != null && slice.getLeader().getCoreName().equals(coreName));
resp.add(SolrSnapshotManager.SHARD_ID, slice.getName());
resp.add(SolrSnapshotManager.LEADER, leader);
CoreSnapshotMetaData c = new CoreSnapshotMetaData(resp);
replicas.add(c);
log.info("Snapshot with commitName {} is created successfully for core {}", commitName, c.getCoreName());
}
}
if (!shardByCoreName.isEmpty()) {
// One or more failures.
log.warn("Unable to create a snapshot with name {} for following cores {}", commitName, shardByCoreName.keySet());
// Count number of failures per shard.
Map<String, Integer> failuresByShardId = new HashMap<>();
for (Map.Entry<String, Slice> entry : shardByCoreName.entrySet()) {
int f = 0;
if (failuresByShardId.get(entry.getValue().getName()) != null) {
f = failuresByShardId.get(entry.getValue().getName());
}
failuresByShardId.put(entry.getValue().getName(), f + 1);
}
// Now that we know number of failures per shard, we can figure out
// if at-least one replica per shard was able to create a snapshot or not.
DocCollection collectionStatus = ocmh.zkStateReader.getClusterState().getCollection(collectionName);
for (Map.Entry<String, Integer> entry : failuresByShardId.entrySet()) {
int replicaCount = collectionStatus.getSlice(entry.getKey()).getReplicas().size();
if (replicaCount <= entry.getValue()) {
failedShards.add(entry.getKey());
}
}
}
if (failedShards.isEmpty()) {
// No failures.
CollectionSnapshotMetaData meta = new CollectionSnapshotMetaData(commitName, SnapshotStatus.Successful, creationDate, replicas);
SolrSnapshotManager.updateCollectionLevelSnapshot(zkClient, collectionName, meta);
log.info("Saved following snapshot information for collection={} with commitName={} in Zookeeper : {}", collectionName, commitName, meta.toNamedList());
} else {
log.warn("Failed to create a snapshot for collection {} with commitName = {}. Snapshot could not be captured for following shards {}", collectionName, commitName, failedShards);
// Update the ZK meta-data to include only cores with the snapshot. This will enable users to figure out
// which cores have the named snapshot.
CollectionSnapshotMetaData meta = new CollectionSnapshotMetaData(commitName, SnapshotStatus.Failed, creationDate, replicas);
SolrSnapshotManager.updateCollectionLevelSnapshot(zkClient, collectionName, meta);
log.info("Saved following snapshot information for collection={} with commitName={} in Zookeeper : {}", collectionName, commitName, meta.toNamedList());
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to create snapshot on shards " + failedShards);
}
}
Aggregations