use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project crate by crate.
the class SnapshotShardsService method startNewSnapshots.
private void startNewSnapshots(SnapshotsInProgress snapshotsInProgress) {
// For now we will be mostly dealing with a single snapshot at a time but might have multiple simultaneously running
// snapshots in the future
// Now go through all snapshots and update existing or create missing
final String localNodeId = clusterService.localNode().getId();
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
final State entryState = entry.state();
if (entryState == State.STARTED) {
Map<ShardId, IndexShardSnapshotStatus> startedShards = null;
final Snapshot snapshot = entry.snapshot();
Map<ShardId, IndexShardSnapshotStatus> snapshotShards = shardSnapshots.getOrDefault(snapshot, emptyMap());
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
// Add all new shards to start processing on
final ShardId shardId = shard.key;
final ShardSnapshotStatus shardSnapshotStatus = shard.value;
if (localNodeId.equals(shardSnapshotStatus.nodeId()) && shardSnapshotStatus.state() == ShardState.INIT && snapshotShards.containsKey(shardId) == false) {
LOGGER.trace("[{}] - Adding shard to the queue", shardId);
if (startedShards == null) {
startedShards = new HashMap<>();
}
startedShards.put(shardId, IndexShardSnapshotStatus.newInitializing(shardSnapshotStatus.generation()));
}
}
if (startedShards != null && startedShards.isEmpty() == false) {
shardSnapshots.computeIfAbsent(snapshot, s -> new HashMap<>()).putAll(startedShards);
startNewShards(entry, startedShards);
}
} else if (entryState == State.ABORTED) {
// Abort all running shards for this snapshot
final Snapshot snapshot = entry.snapshot();
Map<ShardId, IndexShardSnapshotStatus> snapshotShards = shardSnapshots.getOrDefault(snapshot, emptyMap());
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
final IndexShardSnapshotStatus snapshotStatus = snapshotShards.get(shard.key);
if (snapshotStatus == null) {
// notify master that abort has completed by moving to FAILED
if (shard.value.state() == ShardState.ABORTED) {
notifyFailedSnapshotShard(snapshot, shard.key, shard.value.reason());
}
} else {
snapshotStatus.abortIfNotCompleted("snapshot has been aborted");
}
}
}
}
}
use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project crate by crate.
the class SnapshotShardsService method syncShardStatsOnNewMaster.
/**
* Checks if any shards were processed that the new master doesn't know about
*/
private void syncShardStatsOnNewMaster(ClusterChangedEvent event) {
SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
if (snapshotsInProgress == null) {
return;
}
// Clear request deduplicator since we need to send all requests that were potentially not handled by the previous
// master again
remoteFailedRequestDeduplicator.clear();
for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) {
if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) {
Map<ShardId, IndexShardSnapshotStatus> localShards = currentSnapshotShards(snapshot.snapshot());
if (localShards != null) {
ImmutableOpenMap<ShardId, ShardSnapshotStatus> masterShards = snapshot.shards();
for (Map.Entry<ShardId, IndexShardSnapshotStatus> localShard : localShards.entrySet()) {
ShardId shardId = localShard.getKey();
ShardSnapshotStatus masterShard = masterShards.get(shardId);
if (masterShard != null && masterShard.state().completed() == false) {
final IndexShardSnapshotStatus.Copy indexShardSnapshotStatus = localShard.getValue().asCopy();
final Stage stage = indexShardSnapshotStatus.getStage();
// Master knows about the shard and thinks it has not completed
if (stage == Stage.DONE) {
// but we think the shard is done - we need to make new master know that the shard is done
LOGGER.debug("[{}] new master thinks the shard [{}] is not completed but the shard is done locally, " + "updating status on the master", snapshot.snapshot(), shardId);
notifySuccessfulSnapshotShard(snapshot.snapshot(), shardId, localShard.getValue().generation());
} else if (stage == Stage.FAILURE) {
// but we think the shard failed - we need to make new master know that the shard failed
LOGGER.debug("[{}] new master thinks the shard [{}] is not completed but the shard failed locally, " + "updating status on master", snapshot.snapshot(), shardId);
notifyFailedSnapshotShard(snapshot.snapshot(), shardId, indexShardSnapshotStatus.getFailure());
}
}
}
}
}
}
}
use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project elasticsearch by elastic.
the class SnapshotShardsService method processIndexShardSnapshots.
/**
* Checks if any new shards should be snapshotted on this node
*
* @param event cluster state changed event
*/
private void processIndexShardSnapshots(ClusterChangedEvent event) {
SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
Map<Snapshot, SnapshotShards> survivors = new HashMap<>();
// First, remove snapshots that are no longer there
for (Map.Entry<Snapshot, SnapshotShards> entry : shardSnapshots.entrySet()) {
final Snapshot snapshot = entry.getKey();
if (snapshotsInProgress != null && snapshotsInProgress.snapshot(snapshot) != null) {
survivors.put(entry.getKey(), entry.getValue());
} else {
// state update, which is being processed here
for (IndexShardSnapshotStatus snapshotStatus : entry.getValue().shards.values()) {
if (snapshotStatus.stage() == Stage.INIT || snapshotStatus.stage() == Stage.STARTED) {
snapshotStatus.abort();
}
}
}
}
// For now we will be mostly dealing with a single snapshot at a time but might have multiple simultaneously running
// snapshots in the future
Map<Snapshot, Map<ShardId, IndexShardSnapshotStatus>> newSnapshots = new HashMap<>();
// Now go through all snapshots and update existing or create missing
final String localNodeId = event.state().nodes().getLocalNodeId();
final DiscoveryNode masterNode = event.state().nodes().getMasterNode();
final Map<Snapshot, Map<String, IndexId>> snapshotIndices = new HashMap<>();
if (snapshotsInProgress != null) {
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
snapshotIndices.put(entry.snapshot(), entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())));
if (entry.state() == State.STARTED) {
Map<ShardId, IndexShardSnapshotStatus> startedShards = new HashMap<>();
SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshot());
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
// Add all new shards to start processing on
if (localNodeId.equals(shard.value.nodeId())) {
if (shard.value.state() == State.INIT && (snapshotShards == null || !snapshotShards.shards.containsKey(shard.key))) {
logger.trace("[{}] - Adding shard to the queue", shard.key);
startedShards.put(shard.key, new IndexShardSnapshotStatus());
}
}
}
if (!startedShards.isEmpty()) {
newSnapshots.put(entry.snapshot(), startedShards);
if (snapshotShards != null) {
// We already saw this snapshot but we need to add more started shards
Map<ShardId, IndexShardSnapshotStatus> shards = new HashMap<>();
// Put all shards that were already running on this node
shards.putAll(snapshotShards.shards);
// Put all newly started shards
shards.putAll(startedShards);
survivors.put(entry.snapshot(), new SnapshotShards(unmodifiableMap(shards)));
} else {
// Brand new snapshot that we haven't seen before
survivors.put(entry.snapshot(), new SnapshotShards(unmodifiableMap(startedShards)));
}
}
} else if (entry.state() == State.ABORTED) {
// Abort all running shards for this snapshot
SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshot());
if (snapshotShards != null) {
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
IndexShardSnapshotStatus snapshotStatus = snapshotShards.shards.get(shard.key);
if (snapshotStatus != null) {
switch(snapshotStatus.stage()) {
case INIT:
case STARTED:
snapshotStatus.abort();
break;
case FINALIZE:
logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, letting it finish", entry.snapshot(), shard.key);
break;
case DONE:
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, updating status on the master", entry.snapshot(), shard.key);
updateIndexShardSnapshotStatus(entry.snapshot(), shard.key, new ShardSnapshotStatus(localNodeId, State.SUCCESS), masterNode);
break;
case FAILURE:
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, updating status on the master", entry.snapshot(), shard.key);
updateIndexShardSnapshotStatus(entry.snapshot(), shard.key, new ShardSnapshotStatus(localNodeId, State.FAILED, snapshotStatus.failure()), masterNode);
break;
default:
throw new IllegalStateException("Unknown snapshot shard stage " + snapshotStatus.stage());
}
}
}
}
}
}
}
// Update the list of snapshots that we saw and tried to started
// If startup of these shards fails later, we don't want to try starting these shards again
shutdownLock.lock();
try {
shardSnapshots = unmodifiableMap(survivors);
if (shardSnapshots.isEmpty()) {
// Notify all waiting threads that no more snapshots
shutdownCondition.signalAll();
}
} finally {
shutdownLock.unlock();
}
// We have new shards to starts
if (newSnapshots.isEmpty() == false) {
Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
for (final Map.Entry<Snapshot, Map<ShardId, IndexShardSnapshotStatus>> entry : newSnapshots.entrySet()) {
Map<String, IndexId> indicesMap = snapshotIndices.get(entry.getKey());
assert indicesMap != null;
for (final Map.Entry<ShardId, IndexShardSnapshotStatus> shardEntry : entry.getValue().entrySet()) {
final ShardId shardId = shardEntry.getKey();
try {
final IndexShard indexShard = indicesService.indexServiceSafe(shardId.getIndex()).getShardOrNull(shardId.id());
final IndexId indexId = indicesMap.get(shardId.getIndexName());
assert indexId != null;
executor.execute(new AbstractRunnable() {
@Override
public void doRun() {
snapshot(indexShard, entry.getKey(), indexId, shardEntry.getValue());
updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.SUCCESS), masterNode);
}
@Override
public void onFailure(Exception e) {
logger.warn((Supplier<?>) () -> new ParameterizedMessage("[{}] [{}] failed to create snapshot", shardId, entry.getKey()), e);
updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.FAILED, ExceptionsHelper.detailedMessage(e)), masterNode);
}
});
} catch (Exception e) {
updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.FAILED, ExceptionsHelper.detailedMessage(e)), masterNode);
}
}
}
}
}
use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project elasticsearch by elastic.
the class SnapshotsService method processWaitingShards.
private ImmutableOpenMap<ShardId, ShardSnapshotStatus> processWaitingShards(ImmutableOpenMap<ShardId, ShardSnapshotStatus> snapshotShards, RoutingTable routingTable) {
boolean snapshotChanged = false;
ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shards = ImmutableOpenMap.builder();
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardEntry : snapshotShards) {
ShardSnapshotStatus shardStatus = shardEntry.value;
ShardId shardId = shardEntry.key;
if (shardStatus.state() == State.WAITING) {
IndexRoutingTable indexShardRoutingTable = routingTable.index(shardId.getIndex());
if (indexShardRoutingTable != null) {
IndexShardRoutingTable shardRouting = indexShardRoutingTable.shard(shardId.id());
if (shardRouting != null && shardRouting.primaryShard() != null) {
if (shardRouting.primaryShard().started()) {
// Shard that we were waiting for has started on a node, let's process it
snapshotChanged = true;
logger.trace("starting shard that we were waiting for [{}] on node [{}]", shardId, shardStatus.nodeId());
shards.put(shardId, new ShardSnapshotStatus(shardRouting.primaryShard().currentNodeId()));
continue;
} else if (shardRouting.primaryShard().initializing() || shardRouting.primaryShard().relocating()) {
// Shard that we were waiting for hasn't started yet or still relocating - will continue to wait
shards.put(shardId, shardStatus);
continue;
}
}
}
// Shard that we were waiting for went into unassigned state or disappeared - giving up
snapshotChanged = true;
logger.warn("failing snapshot of shard [{}] on unassigned shard [{}]", shardId, shardStatus.nodeId());
shards.put(shardId, new ShardSnapshotStatus(shardStatus.nodeId(), State.FAILED, "shard is unassigned"));
} else {
shards.put(shardId, shardStatus);
}
}
if (snapshotChanged) {
return shards.build();
} else {
return null;
}
}
use of org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus in project elasticsearch by elastic.
the class SnapshotsService method removedNodesCleanupNeeded.
private boolean removedNodesCleanupNeeded(ClusterChangedEvent event) {
SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
if (snapshotsInProgress == null) {
return false;
}
// Check if we just became the master
boolean newMaster = !event.previousState().nodes().isLocalNodeElectedMaster();
for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) {
if (newMaster && (snapshot.state() == State.SUCCESS || snapshot.state() == State.INIT)) {
// We just replaced old master and snapshots in intermediate states needs to be cleaned
return true;
}
for (DiscoveryNode node : event.nodesDelta().removedNodes()) {
for (ObjectCursor<ShardSnapshotStatus> shardStatus : snapshot.shards().values()) {
if (!shardStatus.value.state().completed() && node.getId().equals(shardStatus.value.nodeId())) {
// At least one shard was running on the removed node - we need to fail it
return true;
}
}
}
}
return false;
}
Aggregations