Search in sources :

Example 6 with ShardSnapshotStatus

use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project elasticsearch by elastic.

the class SnapshotShardsService method processIndexShardSnapshots.

/**
     * Checks if any new shards should be snapshotted on this node
     *
     * @param event cluster state changed event
     */
private void processIndexShardSnapshots(ClusterChangedEvent event) {
    SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
    Map<Snapshot, SnapshotShards> survivors = new HashMap<>();
    // First, remove snapshots that are no longer there
    for (Map.Entry<Snapshot, SnapshotShards> entry : shardSnapshots.entrySet()) {
        final Snapshot snapshot = entry.getKey();
        if (snapshotsInProgress != null && snapshotsInProgress.snapshot(snapshot) != null) {
            survivors.put(entry.getKey(), entry.getValue());
        } else {
            // state update, which is being processed here
            for (IndexShardSnapshotStatus snapshotStatus : entry.getValue().shards.values()) {
                if (snapshotStatus.stage() == Stage.INIT || snapshotStatus.stage() == Stage.STARTED) {
                    snapshotStatus.abort();
                }
            }
        }
    }
    // For now we will be mostly dealing with a single snapshot at a time but might have multiple simultaneously running
    // snapshots in the future
    Map<Snapshot, Map<ShardId, IndexShardSnapshotStatus>> newSnapshots = new HashMap<>();
    // Now go through all snapshots and update existing or create missing
    final String localNodeId = event.state().nodes().getLocalNodeId();
    final DiscoveryNode masterNode = event.state().nodes().getMasterNode();
    final Map<Snapshot, Map<String, IndexId>> snapshotIndices = new HashMap<>();
    if (snapshotsInProgress != null) {
        for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
            snapshotIndices.put(entry.snapshot(), entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())));
            if (entry.state() == State.STARTED) {
                Map<ShardId, IndexShardSnapshotStatus> startedShards = new HashMap<>();
                SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshot());
                for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
                    // Add all new shards to start processing on
                    if (localNodeId.equals(shard.value.nodeId())) {
                        if (shard.value.state() == State.INIT && (snapshotShards == null || !snapshotShards.shards.containsKey(shard.key))) {
                            logger.trace("[{}] - Adding shard to the queue", shard.key);
                            startedShards.put(shard.key, new IndexShardSnapshotStatus());
                        }
                    }
                }
                if (!startedShards.isEmpty()) {
                    newSnapshots.put(entry.snapshot(), startedShards);
                    if (snapshotShards != null) {
                        // We already saw this snapshot but we need to add more started shards
                        Map<ShardId, IndexShardSnapshotStatus> shards = new HashMap<>();
                        // Put all shards that were already running on this node
                        shards.putAll(snapshotShards.shards);
                        // Put all newly started shards
                        shards.putAll(startedShards);
                        survivors.put(entry.snapshot(), new SnapshotShards(unmodifiableMap(shards)));
                    } else {
                        // Brand new snapshot that we haven't seen before
                        survivors.put(entry.snapshot(), new SnapshotShards(unmodifiableMap(startedShards)));
                    }
                }
            } else if (entry.state() == State.ABORTED) {
                // Abort all running shards for this snapshot
                SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshot());
                if (snapshotShards != null) {
                    for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
                        IndexShardSnapshotStatus snapshotStatus = snapshotShards.shards.get(shard.key);
                        if (snapshotStatus != null) {
                            switch(snapshotStatus.stage()) {
                                case INIT:
                                case STARTED:
                                    snapshotStatus.abort();
                                    break;
                                case FINALIZE:
                                    logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, letting it finish", entry.snapshot(), shard.key);
                                    break;
                                case DONE:
                                    logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, updating status on the master", entry.snapshot(), shard.key);
                                    updateIndexShardSnapshotStatus(entry.snapshot(), shard.key, new ShardSnapshotStatus(localNodeId, State.SUCCESS), masterNode);
                                    break;
                                case FAILURE:
                                    logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, updating status on the master", entry.snapshot(), shard.key);
                                    updateIndexShardSnapshotStatus(entry.snapshot(), shard.key, new ShardSnapshotStatus(localNodeId, State.FAILED, snapshotStatus.failure()), masterNode);
                                    break;
                                default:
                                    throw new IllegalStateException("Unknown snapshot shard stage " + snapshotStatus.stage());
                            }
                        }
                    }
                }
            }
        }
    }
    // Update the list of snapshots that we saw and tried to started
    // If startup of these shards fails later, we don't want to try starting these shards again
    shutdownLock.lock();
    try {
        shardSnapshots = unmodifiableMap(survivors);
        if (shardSnapshots.isEmpty()) {
            // Notify all waiting threads that no more snapshots
            shutdownCondition.signalAll();
        }
    } finally {
        shutdownLock.unlock();
    }
    // We have new shards to starts
    if (newSnapshots.isEmpty() == false) {
        Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
        for (final Map.Entry<Snapshot, Map<ShardId, IndexShardSnapshotStatus>> entry : newSnapshots.entrySet()) {
            Map<String, IndexId> indicesMap = snapshotIndices.get(entry.getKey());
            assert indicesMap != null;
            for (final Map.Entry<ShardId, IndexShardSnapshotStatus> shardEntry : entry.getValue().entrySet()) {
                final ShardId shardId = shardEntry.getKey();
                try {
                    final IndexShard indexShard = indicesService.indexServiceSafe(shardId.getIndex()).getShardOrNull(shardId.id());
                    final IndexId indexId = indicesMap.get(shardId.getIndexName());
                    assert indexId != null;
                    executor.execute(new AbstractRunnable() {

                        @Override
                        public void doRun() {
                            snapshot(indexShard, entry.getKey(), indexId, shardEntry.getValue());
                            updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.SUCCESS), masterNode);
                        }

                        @Override
                        public void onFailure(Exception e) {
                            logger.warn((Supplier<?>) () -> new ParameterizedMessage("[{}] [{}] failed to create snapshot", shardId, entry.getKey()), e);
                            updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.FAILED, ExceptionsHelper.detailedMessage(e)), masterNode);
                        }
                    });
                } catch (Exception e) {
                    updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.FAILED, ExceptionsHelper.detailedMessage(e)), masterNode);
                }
            }
        }
    }
}
Also used : IndexShardSnapshotStatus(org.elasticsearch.index.snapshots.IndexShardSnapshotStatus) AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) HashMap(java.util.HashMap) ShardId(org.elasticsearch.index.shard.ShardId) Executor(java.util.concurrent.Executor) ClusterStateTaskExecutor(org.elasticsearch.cluster.ClusterStateTaskExecutor) Supplier(org.apache.logging.log4j.util.Supplier) IndexId(org.elasticsearch.repositories.IndexId) IndexShard(org.elasticsearch.index.shard.IndexShard) IndexShardSnapshotFailedException(org.elasticsearch.index.snapshots.IndexShardSnapshotFailedException) SnapshotFailedEngineException(org.elasticsearch.index.engine.SnapshotFailedEngineException) IOException(java.io.IOException) SnapshotsInProgress(org.elasticsearch.cluster.SnapshotsInProgress) ShardSnapshotStatus(org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) IndexShardSnapshotStatus(org.elasticsearch.index.snapshots.IndexShardSnapshotStatus) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ObjectObjectCursor(com.carrotsearch.hppc.cursors.ObjectObjectCursor) Map(java.util.Map) ImmutableOpenMap(org.elasticsearch.common.collect.ImmutableOpenMap) HashMap(java.util.HashMap) Collections.emptyMap(java.util.Collections.emptyMap) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap)

Example 7 with ShardSnapshotStatus

use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project elasticsearch by elastic.

the class SnapshotsService method processWaitingShards.

private ImmutableOpenMap<ShardId, ShardSnapshotStatus> processWaitingShards(ImmutableOpenMap<ShardId, ShardSnapshotStatus> snapshotShards, RoutingTable routingTable) {
    boolean snapshotChanged = false;
    ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shards = ImmutableOpenMap.builder();
    for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardEntry : snapshotShards) {
        ShardSnapshotStatus shardStatus = shardEntry.value;
        ShardId shardId = shardEntry.key;
        if (shardStatus.state() == State.WAITING) {
            IndexRoutingTable indexShardRoutingTable = routingTable.index(shardId.getIndex());
            if (indexShardRoutingTable != null) {
                IndexShardRoutingTable shardRouting = indexShardRoutingTable.shard(shardId.id());
                if (shardRouting != null && shardRouting.primaryShard() != null) {
                    if (shardRouting.primaryShard().started()) {
                        // Shard that we were waiting for has started on a node, let's process it
                        snapshotChanged = true;
                        logger.trace("starting shard that we were waiting for [{}] on node [{}]", shardId, shardStatus.nodeId());
                        shards.put(shardId, new ShardSnapshotStatus(shardRouting.primaryShard().currentNodeId()));
                        continue;
                    } else if (shardRouting.primaryShard().initializing() || shardRouting.primaryShard().relocating()) {
                        // Shard that we were waiting for hasn't started yet or still relocating - will continue to wait
                        shards.put(shardId, shardStatus);
                        continue;
                    }
                }
            }
            // Shard that we were waiting for went into unassigned state or disappeared - giving up
            snapshotChanged = true;
            logger.warn("failing snapshot of shard [{}] on unassigned shard [{}]", shardId, shardStatus.nodeId());
            shards.put(shardId, new ShardSnapshotStatus(shardStatus.nodeId(), State.FAILED, "shard is unassigned"));
        } else {
            shards.put(shardId, shardStatus);
        }
    }
    if (snapshotChanged) {
        return shards.build();
    } else {
        return null;
    }
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) IndexRoutingTable(org.elasticsearch.cluster.routing.IndexRoutingTable) IndexShardRoutingTable(org.elasticsearch.cluster.routing.IndexShardRoutingTable) ShardSnapshotStatus(org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) IndexShardSnapshotStatus(org.elasticsearch.index.snapshots.IndexShardSnapshotStatus) ImmutableOpenMap(org.elasticsearch.common.collect.ImmutableOpenMap)

Example 8 with ShardSnapshotStatus

use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project elasticsearch by elastic.

the class SnapshotsService method removedNodesCleanupNeeded.

private boolean removedNodesCleanupNeeded(ClusterChangedEvent event) {
    SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
    if (snapshotsInProgress == null) {
        return false;
    }
    // Check if we just became the master
    boolean newMaster = !event.previousState().nodes().isLocalNodeElectedMaster();
    for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) {
        if (newMaster && (snapshot.state() == State.SUCCESS || snapshot.state() == State.INIT)) {
            // We just replaced old master and snapshots in intermediate states needs to be cleaned
            return true;
        }
        for (DiscoveryNode node : event.nodesDelta().removedNodes()) {
            for (ObjectCursor<ShardSnapshotStatus> shardStatus : snapshot.shards().values()) {
                if (!shardStatus.value.state().completed() && node.getId().equals(shardStatus.value.nodeId())) {
                    // At least one shard was running on the removed node - we need to fail it
                    return true;
                }
            }
        }
    }
    return false;
}
Also used : DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) SnapshotsInProgress(org.elasticsearch.cluster.SnapshotsInProgress) ShardSnapshotStatus(org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) IndexShardSnapshotStatus(org.elasticsearch.index.snapshots.IndexShardSnapshotStatus)

Aggregations

ShardSnapshotStatus (org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus)8 IndexShardSnapshotStatus (org.elasticsearch.index.snapshots.IndexShardSnapshotStatus)7 SnapshotsInProgress (org.elasticsearch.cluster.SnapshotsInProgress)6 ImmutableOpenMap (org.elasticsearch.common.collect.ImmutableOpenMap)5 ShardId (org.elasticsearch.index.shard.ShardId)5 ObjectObjectCursor (com.carrotsearch.hppc.cursors.ObjectObjectCursor)4 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)3 ClusterState (org.elasticsearch.cluster.ClusterState)3 ClusterStateUpdateTask (org.elasticsearch.cluster.ClusterStateUpdateTask)3 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)3 RepositoryMissingException (org.elasticsearch.repositories.RepositoryMissingException)3 Collections.emptyMap (java.util.Collections.emptyMap)2 Collections.unmodifiableMap (java.util.Collections.unmodifiableMap)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)2 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)2 IndexId (org.elasticsearch.repositories.IndexId)2 List (java.util.List)1