use of org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData in project lucene-solr by apache.
the class TestSolrCloudSnapshots method testSnapshots.
@Test
public void testSnapshots() throws Exception {
CloudSolrClient solrClient = cluster.getSolrClient();
String collectionName = "SolrCloudSnapshots";
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf1", NUM_SHARDS, NUM_REPLICAS);
create.process(solrClient);
int nDocs = BackupRestoreUtils.indexDocs(cluster.getSolrClient(), collectionName, docsSeed);
BackupRestoreUtils.verifyDocs(nDocs, solrClient, collectionName);
String commitName = TestUtil.randomSimpleString(random(), 1, 5);
// Verify if snapshot creation works with replica failures.
boolean replicaFailures = usually();
Optional<String> stoppedCoreName = Optional.empty();
if (replicaFailures) {
// Here the assumption is that Solr will spread the replicas uniformly across nodes.
// If this is not true for some reason, then we will need to add some logic to find a
// node with a single replica.
this.cluster.getRandomJetty(random()).stop();
// Sleep a bit for allowing ZK watch to fire.
Thread.sleep(5000);
// Figure out if at-least one replica is "down".
DocCollection collState = solrClient.getZkStateReader().getClusterState().getCollection(collectionName);
for (Slice s : collState.getSlices()) {
for (Replica replica : s.getReplicas()) {
if (replica.getState() == State.DOWN) {
stoppedCoreName = Optional.of(replica.getCoreName());
}
}
}
}
int expectedCoresWithSnapshot = stoppedCoreName.isPresent() ? (NUM_SHARDS * NUM_REPLICAS) - 1 : (NUM_SHARDS * NUM_REPLICAS);
CollectionAdminRequest.CreateSnapshot createSnap = new CollectionAdminRequest.CreateSnapshot(collectionName, commitName);
createSnap.process(solrClient);
Collection<CollectionSnapshotMetaData> collectionSnaps = listCollectionSnapshots(solrClient, collectionName);
assertEquals(1, collectionSnaps.size());
CollectionSnapshotMetaData meta = collectionSnaps.iterator().next();
assertEquals(commitName, meta.getName());
assertEquals(CollectionSnapshotMetaData.SnapshotStatus.Successful, meta.getStatus());
assertEquals(expectedCoresWithSnapshot, meta.getReplicaSnapshots().size());
Map<String, CoreSnapshotMetaData> snapshotByCoreName = meta.getReplicaSnapshots().stream().collect(Collectors.toMap(CoreSnapshotMetaData::getCoreName, Function.identity()));
DocCollection collectionState = solrClient.getZkStateReader().getClusterState().getCollection(collectionName);
assertEquals(2, collectionState.getActiveSlices().size());
for (Slice shard : collectionState.getActiveSlices()) {
assertEquals(2, shard.getReplicas().size());
for (Replica replica : shard.getReplicas()) {
if (stoppedCoreName.isPresent() && stoppedCoreName.get().equals(replica.getCoreName())) {
// We know that the snapshot is not created for this replica.
continue;
}
String replicaBaseUrl = replica.getStr(BASE_URL_PROP);
String coreName = replica.getStr(ZkStateReader.CORE_NAME_PROP);
assertTrue(snapshotByCoreName.containsKey(coreName));
CoreSnapshotMetaData coreSnapshot = snapshotByCoreName.get(coreName);
try (SolrClient adminClient = getHttpSolrClient(replicaBaseUrl)) {
Collection<SnapshotMetaData> snapshots = listCoreSnapshots(adminClient, coreName);
Optional<SnapshotMetaData> metaData = snapshots.stream().filter(x -> commitName.equals(x.getName())).findFirst();
assertTrue("Snapshot not created for core " + coreName, metaData.isPresent());
assertEquals(coreSnapshot.getIndexDirPath(), metaData.get().getIndexDirPath());
assertEquals(coreSnapshot.getGenerationNumber(), metaData.get().getGenerationNumber());
}
}
}
// Delete all documents.
{
solrClient.deleteByQuery(collectionName, "*:*");
solrClient.commit(collectionName);
BackupRestoreUtils.verifyDocs(0, solrClient, collectionName);
}
String backupLocation = createTempDir().toFile().getAbsolutePath();
String backupName = "mytestbackup";
String restoreCollectionName = collectionName + "_restored";
//Create a backup using the earlier created snapshot.
{
CollectionAdminRequest.Backup backup = CollectionAdminRequest.backupCollection(collectionName, backupName).setLocation(backupLocation).setCommitName(commitName);
if (random().nextBoolean()) {
assertEquals(0, backup.process(solrClient).getStatus());
} else {
//async
assertEquals(RequestStatusState.COMPLETED, backup.processAndWait(solrClient, 30));
}
}
// Restore backup.
{
CollectionAdminRequest.Restore restore = CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName).setLocation(backupLocation);
if (replicaFailures) {
// In this case one of the Solr servers would be down. Hence we need to increase
// max_shards_per_node property for restore command to succeed.
restore.setMaxShardsPerNode(2);
}
if (random().nextBoolean()) {
assertEquals(0, restore.process(solrClient).getStatus());
} else {
//async
assertEquals(RequestStatusState.COMPLETED, restore.processAndWait(solrClient, 30));
}
AbstractDistribZkTestBase.waitForRecoveriesToFinish(restoreCollectionName, cluster.getSolrClient().getZkStateReader(), log.isDebugEnabled(), true, 30);
BackupRestoreUtils.verifyDocs(nDocs, solrClient, restoreCollectionName);
}
// Verify if the snapshot deletion works correctly when one or more replicas containing the snapshot are
// deleted
boolean replicaDeletion = rarely();
if (replicaDeletion) {
CoreSnapshotMetaData replicaToDelete = null;
for (String shardId : meta.getShards()) {
List<CoreSnapshotMetaData> replicas = meta.getReplicaSnapshotsForShard(shardId);
if (replicas.size() > 1) {
int r_index = random().nextInt(replicas.size());
replicaToDelete = replicas.get(r_index);
}
}
if (replicaToDelete != null) {
collectionState = solrClient.getZkStateReader().getClusterState().getCollection(collectionName);
for (Slice s : collectionState.getSlices()) {
for (Replica r : s.getReplicas()) {
if (r.getCoreName().equals(replicaToDelete.getCoreName())) {
log.info("Deleting replica {}", r);
CollectionAdminRequest.DeleteReplica delReplica = CollectionAdminRequest.deleteReplica(collectionName, replicaToDelete.getShardId(), r.getName());
delReplica.process(solrClient);
// The replica deletion will cleanup the snapshot meta-data.
snapshotByCoreName.remove(r.getCoreName());
break;
}
}
}
}
}
// Delete snapshot
CollectionAdminRequest.DeleteSnapshot deleteSnap = new CollectionAdminRequest.DeleteSnapshot(collectionName, commitName);
deleteSnap.process(solrClient);
// Wait for a while so that the clusterstate.json updates are propagated to the client side.
Thread.sleep(2000);
collectionState = solrClient.getZkStateReader().getClusterState().getCollection(collectionName);
for (Slice shard : collectionState.getActiveSlices()) {
for (Replica replica : shard.getReplicas()) {
if (stoppedCoreName.isPresent() && stoppedCoreName.get().equals(replica.getCoreName())) {
// We know that the snapshot was not created for this replica.
continue;
}
String replicaBaseUrl = replica.getStr(BASE_URL_PROP);
String coreName = replica.getStr(ZkStateReader.CORE_NAME_PROP);
try (SolrClient adminClient = getHttpSolrClient(replicaBaseUrl)) {
Collection<SnapshotMetaData> snapshots = listCoreSnapshots(adminClient, coreName);
Optional<SnapshotMetaData> metaData = snapshots.stream().filter(x -> commitName.equals(x.getName())).findFirst();
assertFalse("Snapshot not deleted for core " + coreName, metaData.isPresent());
// Remove the entry for core if the snapshot is deleted successfully.
snapshotByCoreName.remove(coreName);
}
}
}
// Verify all core-level snapshots are deleted.
assertTrue("The cores remaining " + snapshotByCoreName, snapshotByCoreName.isEmpty());
assertTrue(listCollectionSnapshots(solrClient, collectionName).isEmpty());
// Verify if the collection deletion result in proper cleanup of snapshot metadata.
{
String commitName_2 = commitName + "_2";
CollectionAdminRequest.CreateSnapshot createSnap_2 = new CollectionAdminRequest.CreateSnapshot(collectionName, commitName_2);
assertEquals(0, createSnap_2.process(solrClient).getStatus());
Collection<CollectionSnapshotMetaData> collectionSnaps_2 = listCollectionSnapshots(solrClient, collectionName);
assertEquals(1, collectionSnaps.size());
assertEquals(commitName_2, collectionSnaps_2.iterator().next().getName());
// Delete collection
CollectionAdminRequest.Delete deleteCol = CollectionAdminRequest.deleteCollection(collectionName);
assertEquals(0, deleteCol.process(solrClient).getStatus());
assertTrue(SolrSnapshotManager.listSnapshots(solrClient.getZkStateReader().getZkClient(), collectionName).isEmpty());
}
}
use of org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData in project lucene-solr by apache.
the class BackupCmd method selectReplicaWithSnapshot.
private Replica selectReplicaWithSnapshot(CollectionSnapshotMetaData snapshotMeta, Slice slice) {
// The goal here is to choose the snapshot of the replica which was the leader at the time snapshot was created.
// If that is not possible, we choose any other replica for the given shard.
Collection<CoreSnapshotMetaData> snapshots = snapshotMeta.getReplicaSnapshotsForShard(slice.getName());
Optional<CoreSnapshotMetaData> leaderCore = snapshots.stream().filter(x -> x.isLeader()).findFirst();
if (leaderCore.isPresent()) {
log.info("Replica {} was the leader when snapshot {} was created.", leaderCore.get().getCoreName(), snapshotMeta.getName());
Replica r = slice.getReplica(leaderCore.get().getCoreName());
if ((r != null) && !r.getState().equals(State.DOWN)) {
return r;
}
}
Optional<Replica> r = slice.getReplicas().stream().filter(x -> x.getState() != State.DOWN && snapshotMeta.isSnapshotExists(slice.getName(), x)).findFirst();
if (!r.isPresent()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to find any live replica with a snapshot named " + snapshotMeta.getName() + " for shard " + slice.getName());
}
return r.get();
}
use of org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData in project lucene-solr by apache.
the class DeleteSnapshotCmd method call.
@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(COLLECTION_PROP);
String commitName = message.getStr(CoreAdminParams.COMMIT_NAME);
String asyncId = message.getStr(ASYNC);
Map<String, String> requestMap = new HashMap<>();
NamedList shardRequestResults = new NamedList();
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
SolrZkClient zkClient = ocmh.overseer.getZkController().getZkClient();
Optional<CollectionSnapshotMetaData> meta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
if (!meta.isPresent()) {
// Snapshot not found. Nothing to do.
return;
}
log.info("Deleting a snapshot for collection={} with commitName={}", collectionName, commitName);
Set<String> existingCores = new HashSet<>();
for (Slice s : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
for (Replica r : s.getReplicas()) {
existingCores.add(r.getCoreName());
}
}
Set<String> coresWithSnapshot = new HashSet<>();
for (CoreSnapshotMetaData m : meta.get().getReplicaSnapshots()) {
if (existingCores.contains(m.getCoreName())) {
coresWithSnapshot.add(m.getCoreName());
}
}
log.info("Existing cores with snapshot for collection={} are {}", collectionName, existingCores);
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
for (Replica replica : slice.getReplicas()) {
if (replica.getState() == State.DOWN) {
// Since replica is down - no point sending a request.
continue;
}
// replicas to contact at this point, we try on all replicas.
if (meta.get().getStatus() == SnapshotStatus.InProgress || coresWithSnapshot.contains(replica.getCoreName())) {
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.DELETESNAPSHOT.toString());
params.set(NAME, slice.getName());
params.set(CORE_NAME_PROP, coreName);
params.set(CoreAdminParams.COMMIT_NAME, commitName);
log.info("Sending deletesnapshot request to core={} with commitName={}", coreName, commitName);
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
}
}
}
ocmh.processResponses(shardRequestResults, shardHandler, false, null, asyncId, requestMap);
NamedList success = (NamedList) shardRequestResults.get("success");
List<CoreSnapshotMetaData> replicas = new ArrayList<>();
if (success != null) {
for (int i = 0; i < success.size(); i++) {
NamedList resp = (NamedList) success.getVal(i);
// Unfortunately async processing logic doesn't provide the "core" name automatically.
String coreName = (String) resp.get("core");
coresWithSnapshot.remove(coreName);
}
}
if (!coresWithSnapshot.isEmpty()) {
// One or more failures.
log.warn("Failed to delete a snapshot for collection {} with commitName = {}. Snapshot could not be deleted for following cores {}", collectionName, commitName, coresWithSnapshot);
List<CoreSnapshotMetaData> replicasWithSnapshot = new ArrayList<>();
for (CoreSnapshotMetaData m : meta.get().getReplicaSnapshots()) {
if (coresWithSnapshot.contains(m.getCoreName())) {
replicasWithSnapshot.add(m);
}
}
// Update the ZK meta-data to include only cores with the snapshot. This will enable users to figure out
// which cores still contain the named snapshot.
CollectionSnapshotMetaData newResult = new CollectionSnapshotMetaData(meta.get().getName(), SnapshotStatus.Failed, meta.get().getCreationDate(), replicasWithSnapshot);
SolrSnapshotManager.updateCollectionLevelSnapshot(zkClient, collectionName, newResult);
log.info("Saved snapshot information for collection={} with commitName={} in Zookeeper as follows", collectionName, commitName, Utils.toJSON(newResult));
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to delete snapshot on cores " + coresWithSnapshot);
} else {
// Delete the ZK path so that we eliminate the references of this snapshot from collection level meta-data.
SolrSnapshotManager.deleteCollectionLevelSnapshot(zkClient, collectionName, commitName);
log.info("Deleted Zookeeper snapshot metdata for collection={} with commitName={}", collectionName, commitName);
log.info("Successfully deleted snapshot for collection={} with commitName={}", collectionName, commitName);
}
}
use of org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData in project lucene-solr by apache.
the class SolrSnapshotsTool method getIndexFilesPathForSnapshot.
public Map<String, List<String>> getIndexFilesPathForSnapshot(String collectionName, String snapshotName, Optional<String> pathPrefix) throws SolrServerException, IOException {
Map<String, List<String>> result = new HashMap<>();
Collection<CollectionSnapshotMetaData> snaps = listCollectionSnapshots(collectionName);
Optional<CollectionSnapshotMetaData> meta = Optional.empty();
for (CollectionSnapshotMetaData m : snaps) {
if (snapshotName.equals(m.getName())) {
meta = Optional.of(m);
}
}
if (!meta.isPresent()) {
throw new IllegalArgumentException("The snapshot named " + snapshotName + " is not found for collection " + collectionName);
}
DocCollection collectionState = solrClient.getZkStateReader().getClusterState().getCollection(collectionName);
for (Slice s : collectionState.getSlices()) {
List<CoreSnapshotMetaData> replicaSnaps = meta.get().getReplicaSnapshotsForShard(s.getName());
// Prepare a list of *existing* replicas (since one or more replicas could have been deleted after the snapshot creation).
List<CoreSnapshotMetaData> availableReplicas = new ArrayList<>();
for (CoreSnapshotMetaData m : replicaSnaps) {
if (isReplicaAvailable(s, m.getCoreName())) {
availableReplicas.add(m);
}
}
if (availableReplicas.isEmpty()) {
throw new IllegalArgumentException("The snapshot named " + snapshotName + " not found for shard " + s.getName() + " of collection " + collectionName);
}
// Prefer a leader replica (at the time when the snapshot was created).
CoreSnapshotMetaData coreSnap = availableReplicas.get(0);
for (CoreSnapshotMetaData m : availableReplicas) {
if (m.isLeader()) {
coreSnap = m;
}
}
String indexDirPath = coreSnap.getIndexDirPath();
if (pathPrefix.isPresent()) {
// If the path prefix is specified, rebuild the path to the index directory.
Path t = new Path(coreSnap.getIndexDirPath());
indexDirPath = (new Path(pathPrefix.get(), t.toUri().getPath())).toString();
}
List<String> paths = new ArrayList<>();
for (String fileName : coreSnap.getFiles()) {
Path p = new Path(indexDirPath, fileName);
paths.add(p.toString());
}
result.put(s.getName(), paths);
}
return result;
}
use of org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData in project lucene-solr by apache.
the class CreateSnapshotCmd method call.
@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(COLLECTION_PROP);
String commitName = message.getStr(CoreAdminParams.COMMIT_NAME);
String asyncId = message.getStr(ASYNC);
SolrZkClient zkClient = this.ocmh.overseer.getZkController().getZkClient();
Date creationDate = new Date();
if (SolrSnapshotManager.snapshotExists(zkClient, collectionName, commitName)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " already exists for collection " + collectionName);
}
log.info("Creating a snapshot for collection={} with commitName={}", collectionName, commitName);
// Create a node in ZK to store the collection level snapshot meta-data.
SolrSnapshotManager.createCollectionLevelSnapshot(zkClient, collectionName, new CollectionSnapshotMetaData(commitName));
log.info("Created a ZK path to store snapshot information for collection={} with commitName={}", collectionName, commitName);
Map<String, String> requestMap = new HashMap<>();
NamedList shardRequestResults = new NamedList();
Map<String, Slice> shardByCoreName = new HashMap<>();
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
for (Replica replica : slice.getReplicas()) {
if (replica.getState() != State.ACTIVE) {
log.info("Replica {} is not active. Hence not sending the createsnapshot request", replica.getCoreName());
// Since replica is not active - no point sending a request.
continue;
}
String coreName = replica.getStr(CORE_NAME_PROP);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATESNAPSHOT.toString());
params.set(NAME, slice.getName());
params.set(CORE_NAME_PROP, coreName);
params.set(CoreAdminParams.COMMIT_NAME, commitName);
ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
log.debug("Sent createsnapshot request to core={} with commitName={}", coreName, commitName);
shardByCoreName.put(coreName, slice);
}
}
// At this point we want to make sure that at-least one replica for every shard
// is able to create the snapshot. If that is not the case, then we fail the request.
// This is to take care of the situation where e.g. entire shard is unavailable.
Set<String> failedShards = new HashSet<>();
ocmh.processResponses(shardRequestResults, shardHandler, false, null, asyncId, requestMap);
NamedList success = (NamedList) shardRequestResults.get("success");
List<CoreSnapshotMetaData> replicas = new ArrayList<>();
if (success != null) {
for (int i = 0; i < success.size(); i++) {
NamedList resp = (NamedList) success.getVal(i);
// Check if this core is the leader for the shard. The idea here is that during the backup
// operation we preferably use the snapshot of the "leader" replica since it is most likely
// to have latest state.
String coreName = (String) resp.get(CoreAdminParams.CORE);
Slice slice = shardByCoreName.remove(coreName);
boolean leader = (slice.getLeader() != null && slice.getLeader().getCoreName().equals(coreName));
resp.add(SolrSnapshotManager.SHARD_ID, slice.getName());
resp.add(SolrSnapshotManager.LEADER, leader);
CoreSnapshotMetaData c = new CoreSnapshotMetaData(resp);
replicas.add(c);
log.info("Snapshot with commitName {} is created successfully for core {}", commitName, c.getCoreName());
}
}
if (!shardByCoreName.isEmpty()) {
// One or more failures.
log.warn("Unable to create a snapshot with name {} for following cores {}", commitName, shardByCoreName.keySet());
// Count number of failures per shard.
Map<String, Integer> failuresByShardId = new HashMap<>();
for (Map.Entry<String, Slice> entry : shardByCoreName.entrySet()) {
int f = 0;
if (failuresByShardId.get(entry.getValue().getName()) != null) {
f = failuresByShardId.get(entry.getValue().getName());
}
failuresByShardId.put(entry.getValue().getName(), f + 1);
}
// Now that we know number of failures per shard, we can figure out
// if at-least one replica per shard was able to create a snapshot or not.
DocCollection collectionStatus = ocmh.zkStateReader.getClusterState().getCollection(collectionName);
for (Map.Entry<String, Integer> entry : failuresByShardId.entrySet()) {
int replicaCount = collectionStatus.getSlice(entry.getKey()).getReplicas().size();
if (replicaCount <= entry.getValue()) {
failedShards.add(entry.getKey());
}
}
}
if (failedShards.isEmpty()) {
// No failures.
CollectionSnapshotMetaData meta = new CollectionSnapshotMetaData(commitName, SnapshotStatus.Successful, creationDate, replicas);
SolrSnapshotManager.updateCollectionLevelSnapshot(zkClient, collectionName, meta);
log.info("Saved following snapshot information for collection={} with commitName={} in Zookeeper : {}", collectionName, commitName, meta.toNamedList());
} else {
log.warn("Failed to create a snapshot for collection {} with commitName = {}. Snapshot could not be captured for following shards {}", collectionName, commitName, failedShards);
// Update the ZK meta-data to include only cores with the snapshot. This will enable users to figure out
// which cores have the named snapshot.
CollectionSnapshotMetaData meta = new CollectionSnapshotMetaData(commitName, SnapshotStatus.Failed, creationDate, replicas);
SolrSnapshotManager.updateCollectionLevelSnapshot(zkClient, collectionName, meta);
log.info("Saved following snapshot information for collection={} with commitName={} in Zookeeper : {}", collectionName, commitName, meta.toNamedList());
throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to create snapshot on shards " + failedShards);
}
}
Aggregations