Search in sources :

Example 6 with CoreSnapshotMetaData

use of org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData in project lucene-solr by apache.

the class CreateSnapshotCmd method call.

@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
    String collectionName = message.getStr(COLLECTION_PROP);
    String commitName = message.getStr(CoreAdminParams.COMMIT_NAME);
    String asyncId = message.getStr(ASYNC);
    SolrZkClient zkClient = this.ocmh.overseer.getZkController().getZkClient();
    Date creationDate = new Date();
    if (SolrSnapshotManager.snapshotExists(zkClient, collectionName, commitName)) {
        throw new SolrException(ErrorCode.BAD_REQUEST, "Snapshot with name " + commitName + " already exists for collection " + collectionName);
    }
    log.info("Creating a snapshot for collection={} with commitName={}", collectionName, commitName);
    // Create a node in ZK to store the collection level snapshot meta-data.
    SolrSnapshotManager.createCollectionLevelSnapshot(zkClient, collectionName, new CollectionSnapshotMetaData(commitName));
    log.info("Created a ZK path to store snapshot information for collection={} with commitName={}", collectionName, commitName);
    Map<String, String> requestMap = new HashMap<>();
    NamedList shardRequestResults = new NamedList();
    Map<String, Slice> shardByCoreName = new HashMap<>();
    ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
    for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
        for (Replica replica : slice.getReplicas()) {
            if (replica.getState() != State.ACTIVE) {
                log.info("Replica {} is not active. Hence not sending the createsnapshot request", replica.getCoreName());
                // Since replica is not active - no point sending a request.
                continue;
            }
            String coreName = replica.getStr(CORE_NAME_PROP);
            ModifiableSolrParams params = new ModifiableSolrParams();
            params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATESNAPSHOT.toString());
            params.set(NAME, slice.getName());
            params.set(CORE_NAME_PROP, coreName);
            params.set(CoreAdminParams.COMMIT_NAME, commitName);
            ocmh.sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
            log.debug("Sent createsnapshot request to core={} with commitName={}", coreName, commitName);
            shardByCoreName.put(coreName, slice);
        }
    }
    // At this point we want to make sure that at-least one replica for every shard
    // is able to create the snapshot. If that is not the case, then we fail the request.
    // This is to take care of the situation where e.g. entire shard is unavailable.
    Set<String> failedShards = new HashSet<>();
    ocmh.processResponses(shardRequestResults, shardHandler, false, null, asyncId, requestMap);
    NamedList success = (NamedList) shardRequestResults.get("success");
    List<CoreSnapshotMetaData> replicas = new ArrayList<>();
    if (success != null) {
        for (int i = 0; i < success.size(); i++) {
            NamedList resp = (NamedList) success.getVal(i);
            // Check if this core is the leader for the shard. The idea here is that during the backup
            // operation we preferably use the snapshot of the "leader" replica since it is most likely
            // to have latest state.
            String coreName = (String) resp.get(CoreAdminParams.CORE);
            Slice slice = shardByCoreName.remove(coreName);
            boolean leader = (slice.getLeader() != null && slice.getLeader().getCoreName().equals(coreName));
            resp.add(SolrSnapshotManager.SHARD_ID, slice.getName());
            resp.add(SolrSnapshotManager.LEADER, leader);
            CoreSnapshotMetaData c = new CoreSnapshotMetaData(resp);
            replicas.add(c);
            log.info("Snapshot with commitName {} is created successfully for core {}", commitName, c.getCoreName());
        }
    }
    if (!shardByCoreName.isEmpty()) {
        // One or more failures.
        log.warn("Unable to create a snapshot with name {} for following cores {}", commitName, shardByCoreName.keySet());
        // Count number of failures per shard.
        Map<String, Integer> failuresByShardId = new HashMap<>();
        for (Map.Entry<String, Slice> entry : shardByCoreName.entrySet()) {
            int f = 0;
            if (failuresByShardId.get(entry.getValue().getName()) != null) {
                f = failuresByShardId.get(entry.getValue().getName());
            }
            failuresByShardId.put(entry.getValue().getName(), f + 1);
        }
        // Now that we know number of failures per shard, we can figure out
        // if at-least one replica per shard was able to create a snapshot or not.
        DocCollection collectionStatus = ocmh.zkStateReader.getClusterState().getCollection(collectionName);
        for (Map.Entry<String, Integer> entry : failuresByShardId.entrySet()) {
            int replicaCount = collectionStatus.getSlice(entry.getKey()).getReplicas().size();
            if (replicaCount <= entry.getValue()) {
                failedShards.add(entry.getKey());
            }
        }
    }
    if (failedShards.isEmpty()) {
        // No failures.
        CollectionSnapshotMetaData meta = new CollectionSnapshotMetaData(commitName, SnapshotStatus.Successful, creationDate, replicas);
        SolrSnapshotManager.updateCollectionLevelSnapshot(zkClient, collectionName, meta);
        log.info("Saved following snapshot information for collection={} with commitName={} in Zookeeper : {}", collectionName, commitName, meta.toNamedList());
    } else {
        log.warn("Failed to create a snapshot for collection {} with commitName = {}. Snapshot could not be captured for following shards {}", collectionName, commitName, failedShards);
        // Update the ZK meta-data to include only cores with the snapshot. This will enable users to figure out
        // which cores have the named snapshot.
        CollectionSnapshotMetaData meta = new CollectionSnapshotMetaData(commitName, SnapshotStatus.Failed, creationDate, replicas);
        SolrSnapshotManager.updateCollectionLevelSnapshot(zkClient, collectionName, meta);
        log.info("Saved following snapshot information for collection={} with commitName={} in Zookeeper : {}", collectionName, commitName, meta.toNamedList());
        throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to create snapshot on shards " + failedShards);
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrException(org.apache.solr.common.SolrException) HashSet(java.util.HashSet) NamedList(org.apache.solr.common.util.NamedList) ShardHandler(org.apache.solr.handler.component.ShardHandler) SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) Replica(org.apache.solr.common.cloud.Replica) Date(java.util.Date) CollectionSnapshotMetaData(org.apache.solr.core.snapshots.CollectionSnapshotMetaData) Slice(org.apache.solr.common.cloud.Slice) CoreSnapshotMetaData(org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

CoreSnapshotMetaData (org.apache.solr.core.snapshots.CollectionSnapshotMetaData.CoreSnapshotMetaData)6 Slice (org.apache.solr.common.cloud.Slice)5 NamedList (org.apache.solr.common.util.NamedList)5 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 DocCollection (org.apache.solr.common.cloud.DocCollection)4 Replica (org.apache.solr.common.cloud.Replica)4 Map (java.util.Map)3 SolrException (org.apache.solr.common.SolrException)3 SolrZkClient (org.apache.solr.common.cloud.SolrZkClient)3 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)3 CollectionSnapshotMetaData (org.apache.solr.core.snapshots.CollectionSnapshotMetaData)3 ShardHandler (org.apache.solr.handler.component.ShardHandler)3 MethodHandles (java.lang.invoke.MethodHandles)2 Collection (java.util.Collection)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Optional (java.util.Optional)2 State (org.apache.solr.common.cloud.Replica.State)2 Logger (org.slf4j.Logger)2