use of org.elasticsearch.snapshots.Snapshot in project crate by crate.
the class BlobStoreRepository method deleteSnapshot.
@Override
public void deleteSnapshot(SnapshotId snapshotId, long repositoryStateId, boolean writeShardGens, ActionListener<Void> listener) {
if (isReadOnly()) {
listener.onFailure(new RepositoryException(metadata.name(), "cannot delete snapshot from a readonly repository"));
} else {
final long latestKnownGen = latestKnownRepoGen.get();
if (latestKnownGen > repositoryStateId) {
listener.onFailure(new ConcurrentSnapshotExecutionException(new Snapshot(metadata.name(), snapshotId), "Another concurrent operation moved repo generation to [ " + latestKnownGen + "] but this delete assumed generation [" + repositoryStateId + "]"));
return;
}
try {
final Map<String, BlobMetadata> rootBlobs = blobContainer().listBlobs();
final RepositoryData repositoryData = safeRepositoryData(repositoryStateId, rootBlobs);
// Cache the indices that were found before writing out the new index-N blob so that a stuck master will never
// delete an index that was created by another master node after writing this index-N blob.
final Map<String, BlobContainer> foundIndices = blobStore().blobContainer(indicesPath()).children();
doDeleteShardSnapshots(snapshotId, repositoryStateId, foundIndices, rootBlobs, repositoryData, writeShardGens, listener);
} catch (Exception ex) {
listener.onFailure(new RepositoryException(metadata.name(), "failed to delete snapshot [" + snapshotId + "]", ex));
}
}
}
use of org.elasticsearch.snapshots.Snapshot in project crate by crate.
the class BlobStoreRepository method writeUpdatedShardMetadataAndComputeDeletes.
// updates the shard state metadata for shards of a snapshot that is to be deleted. Also computes the files to be cleaned up.
private void writeUpdatedShardMetadataAndComputeDeletes(SnapshotId snapshotId, RepositoryData oldRepositoryData, boolean useUUIDs, ActionListener<Collection<ShardSnapshotMetaDeleteResult>> onAllShardsCompleted) {
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
final List<IndexId> indices = oldRepositoryData.indicesToUpdateAfterRemovingSnapshot(snapshotId);
if (indices.isEmpty()) {
onAllShardsCompleted.onResponse(Collections.emptyList());
return;
}
// Listener that flattens out the delete results for each index
final ActionListener<Collection<ShardSnapshotMetaDeleteResult>> deleteIndexMetadataListener = new GroupedActionListener<>(ActionListener.map(onAllShardsCompleted, res -> res.stream().flatMap(Collection::stream).collect(Collectors.toList())), indices.size());
for (IndexId indexId : indices) {
final Set<SnapshotId> survivingSnapshots = oldRepositoryData.getSnapshots(indexId).stream().filter(id -> id.equals(snapshotId) == false).collect(Collectors.toSet());
executor.execute(ActionRunnable.wrap(deleteIndexMetadataListener, deleteIdxMetaListener -> {
final StepListener<IndexMetadata> snapshotIndexMetadataListener = new StepListener<>();
try {
getSnapshotIndexMetadata(snapshotId, indexId, snapshotIndexMetadataListener);
} catch (Exception ex) {
LOGGER.warn(() -> new ParameterizedMessage("[{}] [{}] failed to read metadata for index", snapshotId, indexId.getName()), ex);
// Just invoke the listener without any shard generations to count it down, this index will be cleaned up
// by the stale data cleanup in the end.
// TODO: Getting here means repository corruption. We should find a way of dealing with this instead of just ignoring
// it and letting the cleanup deal with it.
deleteIdxMetaListener.onResponse(null);
return;
}
snapshotIndexMetadataListener.whenComplete(indexMetadata -> {
final int shardCount = indexMetadata.getNumberOfShards();
assert shardCount > 0 : "index did not have positive shard count, get [" + shardCount + "]";
// Listener for collecting the results of removing the snapshot from each shard's metadata in the current index
final ActionListener<ShardSnapshotMetaDeleteResult> allShardsListener = new GroupedActionListener<>(deleteIdxMetaListener, shardCount);
final Index index = indexMetadata.getIndex();
for (int shardId = 0; shardId < indexMetadata.getNumberOfShards(); shardId++) {
final ShardId shard = new ShardId(index, shardId);
executor.execute(new AbstractRunnable() {
@Override
protected void doRun() throws Exception {
final BlobContainer shardContainer = shardContainer(indexId, shard);
final Set<String> blobs = getShardBlobs(shard, shardContainer);
final BlobStoreIndexShardSnapshots blobStoreIndexShardSnapshots;
final String newGen;
if (useUUIDs) {
newGen = UUIDs.randomBase64UUID();
blobStoreIndexShardSnapshots = buildBlobStoreIndexShardSnapshots(blobs, shardContainer, oldRepositoryData.shardGenerations().getShardGen(indexId, shard.getId())).v1();
} else {
Tuple<BlobStoreIndexShardSnapshots, Long> tuple = buildBlobStoreIndexShardSnapshots(blobs, shardContainer);
newGen = Long.toString(tuple.v2() + 1);
blobStoreIndexShardSnapshots = tuple.v1();
}
allShardsListener.onResponse(deleteFromShardSnapshotMeta(survivingSnapshots, indexId, shard, snapshotId, shardContainer, blobs, blobStoreIndexShardSnapshots, newGen));
}
@Override
public void onFailure(Exception ex) {
LOGGER.warn(() -> new ParameterizedMessage("[{}] failed to delete shard data for shard [{}][{}]", snapshotId, indexId.getName(), shard.id()), ex);
// Just passing null here to count down the listener instead of failing it, the stale data left behind
// here will be retried in the next delete or repository cleanup
allShardsListener.onResponse(null);
}
});
}
}, onAllShardsCompleted::onFailure);
}));
}
}
use of org.elasticsearch.snapshots.Snapshot in project crate by crate.
the class BlobStoreRepository method writeIndexGen.
/**
* Writing a new index generation is a three step process.
* First, the {@link RepositoryMetadata} entry for this repository is set into a pending state by incrementing its
* pending generation {@code P} while its safe generation {@code N} remains unchanged.
* Second, the updated {@link RepositoryData} is written to generation {@code P + 1}.
* Lastly, the {@link RepositoryMetadata} entry for this repository is updated to the new generation {@code P + 1} and thus
* pending and safe generation are set to the same value marking the end of the update of the repository data.
*
* @param repositoryData RepositoryData to write
* @param expectedGen expected repository generation at the start of the operation
* @param writeShardGens whether to write {@link ShardGenerations} to the new {@link RepositoryData} blob
* @param listener completion listener
*/
protected void writeIndexGen(RepositoryData repositoryData, long expectedGen, boolean writeShardGens, ActionListener<Void> listener) {
// can not write to a read only repository
assert isReadOnly() == false;
final long currentGen = repositoryData.getGenId();
if (currentGen != expectedGen) {
// the index file was updated by a concurrent operation, so we were operating on stale
// repository data
listener.onFailure(new RepositoryException(metadata.name(), "concurrent modification of the index-N file, expected current generation [" + expectedGen + "], actual current generation [" + currentGen + "]"));
return;
}
// Step 1: Set repository generation state to the next possible pending generation
final StepListener<Long> setPendingStep = new StepListener<>();
clusterService.submitStateUpdateTask("set pending repository generation [" + metadata.name() + "][" + expectedGen + "]", new ClusterStateUpdateTask() {
private long newGen;
@Override
public ClusterState execute(ClusterState currentState) {
final RepositoryMetadata meta = getRepoMetadata(currentState);
final String repoName = metadata.name();
final long genInState = meta.generation();
final boolean uninitializedMeta = meta.generation() == RepositoryData.UNKNOWN_REPO_GEN || bestEffortConsistency;
if (uninitializedMeta == false && meta.pendingGeneration() != genInState) {
LOGGER.info("Trying to write new repository data over unfinished write, repo [{}] is at " + "safe generation [{}] and pending generation [{}]", meta.name(), genInState, meta.pendingGeneration());
}
assert expectedGen == RepositoryData.EMPTY_REPO_GEN || uninitializedMeta || expectedGen == meta.generation() : "Expected non-empty generation [" + expectedGen + "] does not match generation tracked in [" + meta + "]";
// If we run into the empty repo generation for the expected gen, the repo is assumed to have been cleared of
// all contents by an external process so we reset the safe generation to the empty generation.
final long safeGeneration = expectedGen == RepositoryData.EMPTY_REPO_GEN ? RepositoryData.EMPTY_REPO_GEN : (uninitializedMeta ? expectedGen : genInState);
// Regardless of whether or not the safe generation has been reset, the pending generation always increments so that
// even if a repository has been manually cleared of all contents we will never reuse the same repository generation.
// This is motivated by the consistency behavior the S3 based blob repository implementation has to support which does
// not offer any consistency guarantees when it comes to overwriting the same blob name with different content.
final long nextPendingGen = metadata.pendingGeneration() + 1;
newGen = uninitializedMeta ? Math.max(expectedGen + 1, nextPendingGen) : nextPendingGen;
assert newGen > latestKnownRepoGen.get() : "Attempted new generation [" + newGen + "] must be larger than latest known generation [" + latestKnownRepoGen.get() + "]";
return ClusterState.builder(currentState).metadata(Metadata.builder(currentState.getMetadata()).putCustom(RepositoriesMetadata.TYPE, currentState.metadata().<RepositoriesMetadata>custom(RepositoriesMetadata.TYPE).withUpdatedGeneration(repoName, safeGeneration, newGen)).build()).build();
}
@Override
public void onFailure(String source, Exception e) {
listener.onFailure(new RepositoryException(metadata.name(), "Failed to execute cluster state update [" + source + "]", e));
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
setPendingStep.onResponse(newGen);
}
});
final StepListener<RepositoryData> filterRepositoryDataStep = new StepListener<>();
// Step 2: Write new index-N blob to repository and update index.latest
setPendingStep.whenComplete(newGen -> threadPool().executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.wrap(listener, l -> {
// BwC logic: Load snapshot version information if any snapshot is missing a version in RepositoryData so that the new
// RepositoryData contains a version for every snapshot
final List<SnapshotId> snapshotIdsWithoutVersion = repositoryData.getSnapshotIds().stream().filter(snapshotId -> repositoryData.getVersion(snapshotId) == null).collect(Collectors.toList());
if (snapshotIdsWithoutVersion.isEmpty() == false) {
final Map<SnapshotId, Version> updatedVersionMap = new ConcurrentHashMap<>();
final GroupedActionListener<Void> loadAllVersionsListener = new GroupedActionListener<>(ActionListener.runAfter(new ActionListener<Collection<Void>>() {
@Override
public void onResponse(Collection<Void> voids) {
LOGGER.info("Successfully loaded all snapshot's version information for {} from snapshot metadata", AllocationService.firstListElementsToCommaDelimitedString(snapshotIdsWithoutVersion, SnapshotId::toString, LOGGER.isDebugEnabled()));
}
@Override
public void onFailure(Exception e) {
LOGGER.warn("Failure when trying to load missing version information from snapshot metadata", e);
}
}, () -> filterRepositoryDataStep.onResponse(repositoryData.withVersions(updatedVersionMap))), snapshotIdsWithoutVersion.size());
for (SnapshotId snapshotId : snapshotIdsWithoutVersion) {
threadPool().executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.run(loadAllVersionsListener, () -> {
ActionListener<SnapshotInfo> snapshotInfoListener = ActionListener.delegateFailure(loadAllVersionsListener, (delegate, snapshotInfo) -> {
updatedVersionMap.put(snapshotId, snapshotInfo.version());
delegate.onResponse(null);
});
getSnapshotInfo(snapshotId, snapshotInfoListener);
}));
}
} else {
filterRepositoryDataStep.onResponse(repositoryData);
}
})), listener::onFailure);
filterRepositoryDataStep.whenComplete(filteredRepositoryData -> {
final long newGen = setPendingStep.result();
if (latestKnownRepoGen.get() >= newGen) {
throw new IllegalArgumentException("Tried writing generation [" + newGen + "] but repository is at least at generation [" + latestKnownRepoGen.get() + "] already");
}
// write the index file
final String indexBlob = INDEX_FILE_PREFIX + Long.toString(newGen);
LOGGER.debug("Repository [{}] writing new index generational blob [{}]", metadata.name(), indexBlob);
writeAtomic(indexBlob, BytesReference.bytes(filteredRepositoryData.snapshotsToXContent(XContentFactory.jsonBuilder(), writeShardGens)), true);
// write the current generation to the index-latest file
final BytesReference genBytes;
try (BytesStreamOutput bStream = new BytesStreamOutput()) {
bStream.writeLong(newGen);
genBytes = bStream.bytes();
}
LOGGER.debug("Repository [{}] updating index.latest with generation [{}]", metadata.name(), newGen);
writeAtomic(INDEX_LATEST_BLOB, genBytes, false);
// Step 3: Update CS to reflect new repository generation.
clusterService.submitStateUpdateTask("set safe repository generation [" + metadata.name() + "][" + newGen + "]", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
final RepositoryMetadata meta = getRepoMetadata(currentState);
if (meta.generation() != expectedGen) {
throw new IllegalStateException("Tried to update repo generation to [" + newGen + "] but saw unexpected generation in state [" + meta + "]");
}
if (meta.pendingGeneration() != newGen) {
throw new IllegalStateException("Tried to update from unexpected pending repo generation [" + meta.pendingGeneration() + "] after write to generation [" + newGen + "]");
}
return ClusterState.builder(currentState).metadata(Metadata.builder(currentState.getMetadata()).putCustom(RepositoriesMetadata.TYPE, currentState.metadata().<RepositoriesMetadata>custom(RepositoriesMetadata.TYPE).withUpdatedGeneration(metadata.name(), newGen, newGen)).build()).build();
}
@Override
public void onFailure(String source, Exception e) {
listener.onFailure(new RepositoryException(metadata.name(), "Failed to execute cluster state update [" + source + "]", e));
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.run(listener, () -> {
// Delete all now outdated index files up to 1000 blobs back from the new generation.
// If there are more than 1000 dangling index-N cleanup functionality on repo delete will take care of them.
// Deleting one older than the current expectedGen is done for BwC reasons as older versions used to keep
// two index-N blobs around.
final List<String> oldIndexN = LongStream.range(Math.max(Math.max(expectedGen - 1, 0), newGen - 1000), newGen).mapToObj(gen -> INDEX_FILE_PREFIX + gen).collect(Collectors.toList());
try {
blobContainer().deleteBlobsIgnoringIfNotExists(oldIndexN);
} catch (IOException e) {
LOGGER.warn("Failed to clean up old index blobs {}", oldIndexN);
}
}));
}
});
}, listener::onFailure);
}
use of org.elasticsearch.snapshots.Snapshot in project crate by crate.
the class NodeVersionAllocationDeciderTests method testMessages.
public void testMessages() {
Metadata metadata = Metadata.builder().put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build();
RoutingNode newNode = new RoutingNode("newNode", newNode("newNode", Version.CURRENT));
RoutingNode oldNode = new RoutingNode("oldNode", newNode("oldNode", VersionUtils.getPreviousVersion()));
final ClusterName clusterName = ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY);
ClusterState clusterState = ClusterState.builder(clusterName).metadata(metadata).routingTable(initialRoutingTable).nodes(DiscoveryNodes.builder().add(newNode.node()).add(oldNode.node())).build();
final ShardId shardId = clusterState.routingTable().index("test").shard(0).getShardId();
final ShardRouting primaryShard = clusterState.routingTable().shardRoutingTable(shardId).primaryShard();
final ShardRouting replicaShard = clusterState.routingTable().shardRoutingTable(shardId).replicaShards().get(0);
RoutingAllocation routingAllocation = new RoutingAllocation(null, clusterState.getRoutingNodes(), clusterState, null, 0);
routingAllocation.debugDecision(true);
final NodeVersionAllocationDecider allocationDecider = new NodeVersionAllocationDecider();
Decision decision = allocationDecider.canAllocate(primaryShard, newNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.YES));
assertThat(decision.getExplanation(), is("the primary shard is new or already existed on the node"));
decision = allocationDecider.canAllocate(ShardRoutingHelper.initialize(primaryShard, "oldNode"), newNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.YES));
assertThat(decision.getExplanation(), is("can relocate primary shard from a node with version [" + oldNode.node().getVersion() + "] to a node with equal-or-newer version [" + newNode.node().getVersion() + "]"));
decision = allocationDecider.canAllocate(ShardRoutingHelper.initialize(primaryShard, "newNode"), oldNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.NO));
assertThat(decision.getExplanation(), is("cannot relocate primary shard from a node with version [" + newNode.node().getVersion() + "] to a node with older version [" + oldNode.node().getVersion() + "]"));
final SnapshotRecoverySource newVersionSnapshot = new SnapshotRecoverySource(UUIDs.randomBase64UUID(), new Snapshot("rep1", new SnapshotId("snp1", UUIDs.randomBase64UUID())), newNode.node().getVersion(), "test");
final SnapshotRecoverySource oldVersionSnapshot = new SnapshotRecoverySource(UUIDs.randomBase64UUID(), new Snapshot("rep1", new SnapshotId("snp1", UUIDs.randomBase64UUID())), oldNode.node().getVersion(), "test");
decision = allocationDecider.canAllocate(ShardRoutingHelper.newWithRestoreSource(primaryShard, newVersionSnapshot), oldNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.NO));
assertThat(decision.getExplanation(), is("node version [" + oldNode.node().getVersion() + "] is older than the snapshot version [" + newNode.node().getVersion() + "]"));
decision = allocationDecider.canAllocate(ShardRoutingHelper.newWithRestoreSource(primaryShard, oldVersionSnapshot), newNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.YES));
assertThat(decision.getExplanation(), is("node version [" + newNode.node().getVersion() + "] is the same or newer than snapshot version [" + oldNode.node().getVersion() + "]"));
final RoutingChangesObserver routingChangesObserver = new RoutingChangesObserver.AbstractRoutingChangesObserver();
final RoutingNodes routingNodes = new RoutingNodes(clusterState, false);
final ShardRouting startedPrimary = routingNodes.startShard(logger, routingNodes.initializeShard(primaryShard, "newNode", null, 0, routingChangesObserver), routingChangesObserver);
routingAllocation = new RoutingAllocation(null, routingNodes, clusterState, null, 0);
routingAllocation.debugDecision(true);
decision = allocationDecider.canAllocate(replicaShard, oldNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.NO));
assertThat(decision.getExplanation(), is("cannot allocate replica shard to a node with version [" + oldNode.node().getVersion() + "] since this is older than the primary version [" + newNode.node().getVersion() + "]"));
routingNodes.startShard(logger, routingNodes.relocateShard(startedPrimary, "oldNode", 0, routingChangesObserver).v2(), routingChangesObserver);
routingAllocation = new RoutingAllocation(null, routingNodes, clusterState, null, 0);
routingAllocation.debugDecision(true);
decision = allocationDecider.canAllocate(replicaShard, newNode, routingAllocation);
assertThat(decision.type(), is(Decision.Type.YES));
assertThat(decision.getExplanation(), is("can allocate replica shard to a node with version [" + newNode.node().getVersion() + "] since this is equal-or-newer than the primary version [" + oldNode.node().getVersion() + "]"));
}
Aggregations