use of org.elasticsearch.repositories.ShardGenerations in project crate by crate.
the class BlobStoreRepository method finalizeSnapshot.
/**
* {@inheritDoc}
*/
@Override
public void finalizeSnapshot(final SnapshotId snapshotId, final ShardGenerations shardGenerations, final long startTime, final String failure, final int totalShards, final List<SnapshotShardFailure> shardFailures, final long repositoryStateId, final boolean includeGlobalState, final Metadata clusterMetadata, boolean writeShardGens, final ActionListener<SnapshotInfo> listener) {
final Collection<IndexId> indices = shardGenerations.indices();
// Once we are done writing the updated index-N blob we remove the now unreferenced index-${uuid} blobs in each shard
// directory if all nodes are at least at version SnapshotsService#SHARD_GEN_IN_REPO_DATA_VERSION
// If there are older version nodes in the cluster, we don't need to run this cleanup as it will have already happened
// when writing the index-${N} to each shard directory.
final Consumer<Exception> onUpdateFailure = e -> listener.onFailure(new SnapshotException(metadata.name(), snapshotId, "failed to update snapshot in repository", e));
final ActionListener<SnapshotInfo> allMetaListener = new GroupedActionListener<>(ActionListener.wrap(snapshotInfos -> {
assert snapshotInfos.size() == 1 : "Should have only received a single SnapshotInfo but received " + snapshotInfos;
final SnapshotInfo snapshotInfo = snapshotInfos.iterator().next();
getRepositoryData(ActionListener.wrap(existingRepositoryData -> {
final RepositoryData updatedRepositoryData = existingRepositoryData.addSnapshot(snapshotId, snapshotInfo.state(), Version.CURRENT, shardGenerations);
writeIndexGen(updatedRepositoryData, repositoryStateId, writeShardGens, ActionListener.wrap(v -> {
if (writeShardGens) {
cleanupOldShardGens(existingRepositoryData, updatedRepositoryData);
}
listener.onResponse(snapshotInfo);
}, onUpdateFailure));
}, onUpdateFailure));
}, onUpdateFailure), 2 + indices.size());
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
// We ignore all FileAlreadyExistsException when writing metadata since otherwise a master failover while in this method will
// mean that no snap-${uuid}.dat blob is ever written for this snapshot. This is safe because any updated version of the
// index or global metadata will be compatible with the segments written in this snapshot as well.
// Failing on an already existing index-${repoGeneration} below ensures that the index.latest blob is not updated in a way
// that decrements the generation it points at
// Write Global Metadata
executor.execute(ActionRunnable.run(allMetaListener, () -> globalMetadataFormat.write(clusterMetadata, blobContainer(), snapshotId.getUUID(), false)));
// write the index metadata for each index in the snapshot
for (IndexId index : indices) {
executor.execute(ActionRunnable.run(allMetaListener, () -> indexMetadataFormat.write(clusterMetadata.index(index.getName()), indexContainer(index), snapshotId.getUUID(), false)));
}
executor.execute(ActionRunnable.supply(allMetaListener, () -> {
final SnapshotInfo snapshotInfo = new SnapshotInfo(snapshotId, indices.stream().map(IndexId::getName).collect(Collectors.toList()), startTime, failure, threadPool.absoluteTimeInMillis(), totalShards, shardFailures, includeGlobalState);
snapshotFormat.write(snapshotInfo, blobContainer(), snapshotId.getUUID(), false);
return snapshotInfo;
}));
}
use of org.elasticsearch.repositories.ShardGenerations in project crate by crate.
the class SnapshotsService method shards.
/**
* Calculates the list of shards that should be included into the current snapshot
*
* @param clusterState cluster state
* @param snapshot SnapshotsInProgress Entry
* @return list of shard to be included into current snapshot
*/
private static ImmutableOpenMap<ShardId, SnapshotsInProgress.ShardSnapshotStatus> shards(ClusterState clusterState, SnapshotsInProgress.Entry snapshot, RepositoryData repositoryData) {
ImmutableOpenMap.Builder<ShardId, SnapshotsInProgress.ShardSnapshotStatus> builder = ImmutableOpenMap.builder();
Metadata metadata = clusterState.metadata();
final ShardGenerations shardGenerations = repositoryData.shardGenerations();
for (IndexId index : snapshot.indices()) {
final String indexName = index.getName();
final boolean isNewIndex = repositoryData.getIndices().containsKey(indexName) == false;
IndexMetadata indexMetadata = metadata.index(indexName);
if (indexMetadata == null) {
// The index was deleted before we managed to start the snapshot - mark it as missing.
builder.put(new ShardId(indexName, IndexMetadata.INDEX_UUID_NA_VALUE, 0), new SnapshotsInProgress.ShardSnapshotStatus(null, ShardState.MISSING, "missing index", null));
} else {
IndexRoutingTable indexRoutingTable = clusterState.getRoutingTable().index(indexName);
for (int i = 0; i < indexMetadata.getNumberOfShards(); i++) {
ShardId shardId = new ShardId(indexMetadata.getIndex(), i);
final String shardRepoGeneration;
if (snapshot.useShardGenerations()) {
if (isNewIndex) {
assert shardGenerations.getShardGen(index, shardId.getId()) == null : "Found shard generation for new index [" + index + "]";
shardRepoGeneration = ShardGenerations.NEW_SHARD_GEN;
} else {
shardRepoGeneration = shardGenerations.getShardGen(index, shardId.getId());
}
} else {
shardRepoGeneration = null;
}
if (indexRoutingTable != null) {
ShardRouting primary = indexRoutingTable.shard(i).primaryShard();
if (primary == null || !primary.assignedToNode()) {
builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(null, ShardState.MISSING, "primary shard is not allocated", shardRepoGeneration));
} else if (primary.relocating() || primary.initializing()) {
builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(primary.currentNodeId(), ShardState.WAITING, shardRepoGeneration));
} else if (!primary.started()) {
builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(primary.currentNodeId(), ShardState.MISSING, "primary shard hasn't been started yet", shardRepoGeneration));
} else {
builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(primary.currentNodeId(), shardRepoGeneration));
}
} else {
builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(null, ShardState.MISSING, "missing routing table"));
}
}
}
}
return builder.build();
}
use of org.elasticsearch.repositories.ShardGenerations in project crate by crate.
the class SnapshotsService method endSnapshot.
/**
* Finalizes the shard in repository and then removes it from cluster state
* <p>
* This is non-blocking method that runs on a thread from SNAPSHOT thread pool
*
* @param entry SnapshotsInProgress.Entry current snapshot in progress
* @param metadata Metadata cluster metadata
*/
private void endSnapshot(SnapshotsInProgress.Entry entry, Metadata metadata) {
if (endingSnapshots.add(entry.snapshot()) == false) {
return;
}
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(new AbstractRunnable() {
@Override
protected void doRun() {
final Snapshot snapshot = entry.snapshot();
final Repository repository = repositoriesService.repository(snapshot.getRepository());
final String failure = entry.failure();
LOGGER.trace("[{}] finalizing snapshot in repository, state: [{}], failure[{}]", snapshot, entry.state(), failure);
ArrayList<SnapshotShardFailure> shardFailures = new ArrayList<>();
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardStatus : entry.shards()) {
ShardId shardId = shardStatus.key;
ShardSnapshotStatus status = shardStatus.value;
if (status.state().failed()) {
shardFailures.add(new SnapshotShardFailure(status.nodeId(), shardId, status.reason()));
}
}
final ShardGenerations shardGenerations = buildGenerations(entry, metadata);
repository.finalizeSnapshot(snapshot.getSnapshotId(), shardGenerations, entry.startTime(), failure, entry.partial() ? shardGenerations.totalShards() : entry.shards().size(), unmodifiableList(shardFailures), entry.repositoryStateId(), entry.includeGlobalState(), metadataForSnapshot(entry, metadata), entry.useShardGenerations(), ActionListener.wrap(snapshotInfo -> {
removeSnapshotFromClusterState(snapshot, snapshotInfo, null);
LOGGER.info("snapshot [{}] completed with state [{}]", snapshot, snapshotInfo.state());
}, this::onFailure));
}
@Override
public void onFailure(final Exception e) {
Snapshot snapshot = entry.snapshot();
if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) {
// Failure due to not being master any more, don't try to remove snapshot from cluster state the next master
// will try ending this snapshot again
LOGGER.debug(() -> new ParameterizedMessage("[{}] failed to update cluster state during snapshot finalization", snapshot), e);
endingSnapshots.remove(snapshot);
} else {
LOGGER.warn(() -> new ParameterizedMessage("[{}] failed to finalize snapshot", snapshot), e);
removeSnapshotFromClusterState(snapshot, null, e);
}
}
});
}
use of org.elasticsearch.repositories.ShardGenerations in project crate by crate.
the class SnapshotsService method buildGenerations.
private static ShardGenerations buildGenerations(SnapshotsInProgress.Entry snapshot, Metadata metadata) {
ShardGenerations.Builder builder = ShardGenerations.builder();
final Map<String, IndexId> indexLookup = new HashMap<>();
snapshot.indices().forEach(idx -> indexLookup.put(idx.getName(), idx));
snapshot.shards().forEach(c -> {
if (metadata.index(c.key.getIndex()) == null) {
assert snapshot.partial() : "Index [" + c.key.getIndex() + "] was deleted during a snapshot but snapshot was not partial.";
return;
}
final IndexId indexId = indexLookup.get(c.key.getIndexName());
if (indexId != null) {
builder.put(indexId, c.key.id(), c.value.generation());
}
});
return builder.build();
}
use of org.elasticsearch.repositories.ShardGenerations in project crate by crate.
the class BlobStoreRepository method writeIndexGen.
/**
* Writing a new index generation is a three step process.
* First, the {@link RepositoryMetadata} entry for this repository is set into a pending state by incrementing its
* pending generation {@code P} while its safe generation {@code N} remains unchanged.
* Second, the updated {@link RepositoryData} is written to generation {@code P + 1}.
* Lastly, the {@link RepositoryMetadata} entry for this repository is updated to the new generation {@code P + 1} and thus
* pending and safe generation are set to the same value marking the end of the update of the repository data.
*
* @param repositoryData RepositoryData to write
* @param expectedGen expected repository generation at the start of the operation
* @param writeShardGens whether to write {@link ShardGenerations} to the new {@link RepositoryData} blob
* @param listener completion listener
*/
protected void writeIndexGen(RepositoryData repositoryData, long expectedGen, boolean writeShardGens, ActionListener<Void> listener) {
// can not write to a read only repository
assert isReadOnly() == false;
final long currentGen = repositoryData.getGenId();
if (currentGen != expectedGen) {
// the index file was updated by a concurrent operation, so we were operating on stale
// repository data
listener.onFailure(new RepositoryException(metadata.name(), "concurrent modification of the index-N file, expected current generation [" + expectedGen + "], actual current generation [" + currentGen + "]"));
return;
}
// Step 1: Set repository generation state to the next possible pending generation
final StepListener<Long> setPendingStep = new StepListener<>();
clusterService.submitStateUpdateTask("set pending repository generation [" + metadata.name() + "][" + expectedGen + "]", new ClusterStateUpdateTask() {
private long newGen;
@Override
public ClusterState execute(ClusterState currentState) {
final RepositoryMetadata meta = getRepoMetadata(currentState);
final String repoName = metadata.name();
final long genInState = meta.generation();
final boolean uninitializedMeta = meta.generation() == RepositoryData.UNKNOWN_REPO_GEN || bestEffortConsistency;
if (uninitializedMeta == false && meta.pendingGeneration() != genInState) {
LOGGER.info("Trying to write new repository data over unfinished write, repo [{}] is at " + "safe generation [{}] and pending generation [{}]", meta.name(), genInState, meta.pendingGeneration());
}
assert expectedGen == RepositoryData.EMPTY_REPO_GEN || uninitializedMeta || expectedGen == meta.generation() : "Expected non-empty generation [" + expectedGen + "] does not match generation tracked in [" + meta + "]";
// If we run into the empty repo generation for the expected gen, the repo is assumed to have been cleared of
// all contents by an external process so we reset the safe generation to the empty generation.
final long safeGeneration = expectedGen == RepositoryData.EMPTY_REPO_GEN ? RepositoryData.EMPTY_REPO_GEN : (uninitializedMeta ? expectedGen : genInState);
// Regardless of whether or not the safe generation has been reset, the pending generation always increments so that
// even if a repository has been manually cleared of all contents we will never reuse the same repository generation.
// This is motivated by the consistency behavior the S3 based blob repository implementation has to support which does
// not offer any consistency guarantees when it comes to overwriting the same blob name with different content.
final long nextPendingGen = metadata.pendingGeneration() + 1;
newGen = uninitializedMeta ? Math.max(expectedGen + 1, nextPendingGen) : nextPendingGen;
assert newGen > latestKnownRepoGen.get() : "Attempted new generation [" + newGen + "] must be larger than latest known generation [" + latestKnownRepoGen.get() + "]";
return ClusterState.builder(currentState).metadata(Metadata.builder(currentState.getMetadata()).putCustom(RepositoriesMetadata.TYPE, currentState.metadata().<RepositoriesMetadata>custom(RepositoriesMetadata.TYPE).withUpdatedGeneration(repoName, safeGeneration, newGen)).build()).build();
}
@Override
public void onFailure(String source, Exception e) {
listener.onFailure(new RepositoryException(metadata.name(), "Failed to execute cluster state update [" + source + "]", e));
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
setPendingStep.onResponse(newGen);
}
});
final StepListener<RepositoryData> filterRepositoryDataStep = new StepListener<>();
// Step 2: Write new index-N blob to repository and update index.latest
setPendingStep.whenComplete(newGen -> threadPool().executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.wrap(listener, l -> {
// BwC logic: Load snapshot version information if any snapshot is missing a version in RepositoryData so that the new
// RepositoryData contains a version for every snapshot
final List<SnapshotId> snapshotIdsWithoutVersion = repositoryData.getSnapshotIds().stream().filter(snapshotId -> repositoryData.getVersion(snapshotId) == null).collect(Collectors.toList());
if (snapshotIdsWithoutVersion.isEmpty() == false) {
final Map<SnapshotId, Version> updatedVersionMap = new ConcurrentHashMap<>();
final GroupedActionListener<Void> loadAllVersionsListener = new GroupedActionListener<>(ActionListener.runAfter(new ActionListener<Collection<Void>>() {
@Override
public void onResponse(Collection<Void> voids) {
LOGGER.info("Successfully loaded all snapshot's version information for {} from snapshot metadata", AllocationService.firstListElementsToCommaDelimitedString(snapshotIdsWithoutVersion, SnapshotId::toString, LOGGER.isDebugEnabled()));
}
@Override
public void onFailure(Exception e) {
LOGGER.warn("Failure when trying to load missing version information from snapshot metadata", e);
}
}, () -> filterRepositoryDataStep.onResponse(repositoryData.withVersions(updatedVersionMap))), snapshotIdsWithoutVersion.size());
for (SnapshotId snapshotId : snapshotIdsWithoutVersion) {
threadPool().executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.run(loadAllVersionsListener, () -> {
ActionListener<SnapshotInfo> snapshotInfoListener = ActionListener.delegateFailure(loadAllVersionsListener, (delegate, snapshotInfo) -> {
updatedVersionMap.put(snapshotId, snapshotInfo.version());
delegate.onResponse(null);
});
getSnapshotInfo(snapshotId, snapshotInfoListener);
}));
}
} else {
filterRepositoryDataStep.onResponse(repositoryData);
}
})), listener::onFailure);
filterRepositoryDataStep.whenComplete(filteredRepositoryData -> {
final long newGen = setPendingStep.result();
if (latestKnownRepoGen.get() >= newGen) {
throw new IllegalArgumentException("Tried writing generation [" + newGen + "] but repository is at least at generation [" + latestKnownRepoGen.get() + "] already");
}
// write the index file
final String indexBlob = INDEX_FILE_PREFIX + Long.toString(newGen);
LOGGER.debug("Repository [{}] writing new index generational blob [{}]", metadata.name(), indexBlob);
writeAtomic(indexBlob, BytesReference.bytes(filteredRepositoryData.snapshotsToXContent(XContentFactory.jsonBuilder(), writeShardGens)), true);
// write the current generation to the index-latest file
final BytesReference genBytes;
try (BytesStreamOutput bStream = new BytesStreamOutput()) {
bStream.writeLong(newGen);
genBytes = bStream.bytes();
}
LOGGER.debug("Repository [{}] updating index.latest with generation [{}]", metadata.name(), newGen);
writeAtomic(INDEX_LATEST_BLOB, genBytes, false);
// Step 3: Update CS to reflect new repository generation.
clusterService.submitStateUpdateTask("set safe repository generation [" + metadata.name() + "][" + newGen + "]", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
final RepositoryMetadata meta = getRepoMetadata(currentState);
if (meta.generation() != expectedGen) {
throw new IllegalStateException("Tried to update repo generation to [" + newGen + "] but saw unexpected generation in state [" + meta + "]");
}
if (meta.pendingGeneration() != newGen) {
throw new IllegalStateException("Tried to update from unexpected pending repo generation [" + meta.pendingGeneration() + "] after write to generation [" + newGen + "]");
}
return ClusterState.builder(currentState).metadata(Metadata.builder(currentState.getMetadata()).putCustom(RepositoriesMetadata.TYPE, currentState.metadata().<RepositoriesMetadata>custom(RepositoriesMetadata.TYPE).withUpdatedGeneration(metadata.name(), newGen, newGen)).build()).build();
}
@Override
public void onFailure(String source, Exception e) {
listener.onFailure(new RepositoryException(metadata.name(), "Failed to execute cluster state update [" + source + "]", e));
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.run(listener, () -> {
// Delete all now outdated index files up to 1000 blobs back from the new generation.
// If there are more than 1000 dangling index-N cleanup functionality on repo delete will take care of them.
// Deleting one older than the current expectedGen is done for BwC reasons as older versions used to keep
// two index-N blobs around.
final List<String> oldIndexN = LongStream.range(Math.max(Math.max(expectedGen - 1, 0), newGen - 1000), newGen).mapToObj(gen -> INDEX_FILE_PREFIX + gen).collect(Collectors.toList());
try {
blobContainer().deleteBlobsIgnoringIfNotExists(oldIndexN);
} catch (IOException e) {
LOGGER.warn("Failed to clean up old index blobs {}", oldIndexN);
}
}));
}
});
}, listener::onFailure);
}
Aggregations