use of org.opensearch.snapshots.SnapshotException in project OpenSearch by opensearch-project.
the class TransportGetSnapshotsAction method snapshots.
/**
* Returns a list of snapshots from repository sorted by snapshot creation date
*
* @param snapshotsInProgress snapshots in progress in the cluster state
* @param repositoryName repository name
* @param snapshotIds snapshots for which to fetch snapshot information
* @param ignoreUnavailable if true, snapshots that could not be read will only be logged with a warning,
* if false, they will throw an error
* @return list of snapshots
*/
private List<SnapshotInfo> snapshots(@Nullable SnapshotsInProgress snapshotsInProgress, String repositoryName, List<SnapshotId> snapshotIds, boolean ignoreUnavailable) {
final Set<SnapshotInfo> snapshotSet = new HashSet<>();
final Set<SnapshotId> snapshotIdsToIterate = new HashSet<>(snapshotIds);
// first, look at the snapshots in progress
final List<SnapshotsInProgress.Entry> entries = SnapshotsService.currentSnapshots(snapshotsInProgress, repositoryName, snapshotIdsToIterate.stream().map(SnapshotId::getName).collect(Collectors.toList()));
for (SnapshotsInProgress.Entry entry : entries) {
snapshotSet.add(new SnapshotInfo(entry));
snapshotIdsToIterate.remove(entry.snapshot().getSnapshotId());
}
// then, look in the repository
final Repository repository = repositoriesService.repository(repositoryName);
for (SnapshotId snapshotId : snapshotIdsToIterate) {
try {
snapshotSet.add(repository.getSnapshotInfo(snapshotId));
} catch (Exception ex) {
if (ignoreUnavailable) {
logger.warn(() -> new ParameterizedMessage("failed to get snapshot [{}]", snapshotId), ex);
} else {
if (ex instanceof SnapshotException) {
throw ex;
}
throw new SnapshotException(repositoryName, snapshotId, "Snapshot could not be read", ex);
}
}
}
final ArrayList<SnapshotInfo> snapshotList = new ArrayList<>(snapshotSet);
CollectionUtil.timSort(snapshotList);
return unmodifiableList(snapshotList);
}
use of org.opensearch.snapshots.SnapshotException in project OpenSearch by opensearch-project.
the class ExceptionSerializationTests method testSnapshotException.
public void testSnapshotException() throws IOException {
final Snapshot snapshot = new Snapshot("repo", new SnapshotId("snap", UUIDs.randomBase64UUID()));
SnapshotException ex = serialize(new SnapshotException(snapshot, "no such snapshot", new NullPointerException()));
assertEquals(ex.getRepositoryName(), snapshot.getRepository());
assertEquals(ex.getSnapshotName(), snapshot.getSnapshotId().getName());
assertEquals(ex.getMessage(), "[" + snapshot + "] no such snapshot");
assertTrue(ex.getCause() instanceof NullPointerException);
ex = serialize(new SnapshotException(null, "no such snapshot", new NullPointerException()));
assertNull(ex.getRepositoryName());
assertNull(ex.getSnapshotName());
assertEquals(ex.getMessage(), "[_na] no such snapshot");
assertTrue(ex.getCause() instanceof NullPointerException);
}
use of org.opensearch.snapshots.SnapshotException in project OpenSearch by opensearch-project.
the class SnapshotDisruptionIT method testDisruptionAfterFinalization.
public void testDisruptionAfterFinalization() throws Exception {
final String idxName = "test";
internalCluster().startMasterOnlyNodes(3);
final String dataNode = internalCluster().startDataOnlyNode();
ensureStableCluster(4);
createRandomIndex(idxName);
createRepository("test-repo", "fs");
final String masterNode1 = internalCluster().getMasterName();
NetworkDisruption networkDisruption = isolateMasterDisruption(NetworkDisruption.UNRESPONSIVE);
internalCluster().setDisruptionScheme(networkDisruption);
ClusterService clusterService = internalCluster().clusterService(masterNode1);
CountDownLatch disruptionStarted = new CountDownLatch(1);
clusterService.addListener(new ClusterStateListener() {
@Override
public void clusterChanged(ClusterChangedEvent event) {
SnapshotsInProgress snapshots = event.state().custom(SnapshotsInProgress.TYPE);
if (snapshots != null && snapshots.entries().size() > 0) {
final SnapshotsInProgress.Entry snapshotEntry = snapshots.entries().get(0);
if (snapshotEntry.state() == SnapshotsInProgress.State.SUCCESS) {
final RepositoriesMetadata repoMeta = event.state().metadata().custom(RepositoriesMetadata.TYPE);
final RepositoryMetadata metadata = repoMeta.repository("test-repo");
if (metadata.pendingGeneration() > snapshotEntry.repositoryStateId()) {
logger.info("--> starting disruption");
networkDisruption.startDisrupting();
clusterService.removeListener(this);
disruptionStarted.countDown();
}
}
}
}
});
final String snapshot = "test-snap";
logger.info("--> starting snapshot");
ActionFuture<CreateSnapshotResponse> future = client(masterNode1).admin().cluster().prepareCreateSnapshot("test-repo", snapshot).setWaitForCompletion(true).setIndices(idxName).execute();
logger.info("--> waiting for disruption to start");
assertTrue(disruptionStarted.await(1, TimeUnit.MINUTES));
awaitNoMoreRunningOperations(dataNode);
logger.info("--> verify that snapshot was successful or no longer exist");
assertBusy(() -> {
try {
assertSnapshotExists("test-repo", snapshot);
} catch (SnapshotMissingException exception) {
logger.info("--> done verifying, snapshot doesn't exist");
}
}, 1, TimeUnit.MINUTES);
logger.info("--> stopping disrupting");
networkDisruption.stopDisrupting();
ensureStableCluster(4, masterNode1);
logger.info("--> done");
try {
future.get();
fail("Should have failed because the node disconnected from the cluster during snapshot finalization");
} catch (Exception ex) {
final SnapshotException sne = (SnapshotException) ExceptionsHelper.unwrap(ex, SnapshotException.class);
assertNotNull(sne);
assertThat(sne.getMessage(), either(endsWith(" Failed to update cluster state during snapshot finalization")).or(endsWith(" no longer master")));
assertThat(sne.getSnapshotName(), is(snapshot));
}
awaitNoMoreRunningOperations(dataNode);
}
use of org.opensearch.snapshots.SnapshotException in project OpenSearch by opensearch-project.
the class SnapshotDisruptionIT method testMasterFailOverDuringShardSnapshots.
public void testMasterFailOverDuringShardSnapshots() throws Exception {
internalCluster().startMasterOnlyNodes(3);
final String dataNode = internalCluster().startDataOnlyNode();
ensureStableCluster(4);
final String repoName = "test-repo";
createRepository(repoName, "mock");
final String indexName = "index-one";
createIndex(indexName);
client().prepareIndex(indexName).setSource("foo", "bar").get();
blockDataNode(repoName, dataNode);
logger.info("--> create snapshot via master node client");
final ActionFuture<CreateSnapshotResponse> snapshotResponse = internalCluster().masterClient().admin().cluster().prepareCreateSnapshot(repoName, "test-snap").setWaitForCompletion(true).execute();
waitForBlock(dataNode, repoName, TimeValue.timeValueSeconds(30L));
final NetworkDisruption networkDisruption = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
internalCluster().setDisruptionScheme(networkDisruption);
networkDisruption.startDisrupting();
ensureStableCluster(3, dataNode);
unblockNode(repoName, dataNode);
networkDisruption.stopDisrupting();
awaitNoMoreRunningOperations(dataNode);
logger.info("--> make sure isolated master responds to snapshot request");
final SnapshotException sne = expectThrows(SnapshotException.class, () -> snapshotResponse.actionGet(TimeValue.timeValueSeconds(30L)));
assertThat(sne.getMessage(), endsWith("no longer master"));
}
use of org.opensearch.snapshots.SnapshotException in project OpenSearch by opensearch-project.
the class BlobStoreRepository method finalizeSnapshot.
@Override
public void finalizeSnapshot(final ShardGenerations shardGenerations, final long repositoryStateId, final Metadata clusterMetadata, SnapshotInfo snapshotInfo, Version repositoryMetaVersion, Function<ClusterState, ClusterState> stateTransformer, final ActionListener<RepositoryData> listener) {
assert repositoryStateId > RepositoryData.UNKNOWN_REPO_GEN : "Must finalize based on a valid repository generation but received [" + repositoryStateId + "]";
final Collection<IndexId> indices = shardGenerations.indices();
final SnapshotId snapshotId = snapshotInfo.snapshotId();
// Once we are done writing the updated index-N blob we remove the now unreferenced index-${uuid} blobs in each shard
// directory if all nodes are at least at version SnapshotsService#SHARD_GEN_IN_REPO_DATA_VERSION
// If there are older version nodes in the cluster, we don't need to run this cleanup as it will have already happened
// when writing the index-${N} to each shard directory.
final boolean writeShardGens = SnapshotsService.useShardGenerations(repositoryMetaVersion);
final Consumer<Exception> onUpdateFailure = e -> listener.onFailure(new SnapshotException(metadata.name(), snapshotId, "failed to update snapshot in repository", e));
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
final boolean writeIndexGens = SnapshotsService.useIndexGenerations(repositoryMetaVersion);
final StepListener<RepositoryData> repoDataListener = new StepListener<>();
getRepositoryData(repoDataListener);
repoDataListener.whenComplete(existingRepositoryData -> {
final Map<IndexId, String> indexMetas;
final Map<String, String> indexMetaIdentifiers;
if (writeIndexGens) {
indexMetaIdentifiers = ConcurrentCollections.newConcurrentMap();
indexMetas = ConcurrentCollections.newConcurrentMap();
} else {
indexMetas = null;
indexMetaIdentifiers = null;
}
final ActionListener<Void> allMetaListener = new GroupedActionListener<>(ActionListener.wrap(v -> {
final RepositoryData updatedRepositoryData = existingRepositoryData.addSnapshot(snapshotId, snapshotInfo.state(), Version.CURRENT, shardGenerations, indexMetas, indexMetaIdentifiers);
writeIndexGen(updatedRepositoryData, repositoryStateId, repositoryMetaVersion, stateTransformer, ActionListener.wrap(newRepoData -> {
if (writeShardGens) {
cleanupOldShardGens(existingRepositoryData, updatedRepositoryData);
}
listener.onResponse(newRepoData);
}, onUpdateFailure));
}, onUpdateFailure), 2 + indices.size());
// We ignore all FileAlreadyExistsException when writing metadata since otherwise a master failover while in this method will
// mean that no snap-${uuid}.dat blob is ever written for this snapshot. This is safe because any updated version of the
// index or global metadata will be compatible with the segments written in this snapshot as well.
// Failing on an already existing index-${repoGeneration} below ensures that the index.latest blob is not updated in a way
// that decrements the generation it points at
// Write Global MetaData
executor.execute(ActionRunnable.run(allMetaListener, () -> GLOBAL_METADATA_FORMAT.write(clusterMetadata, blobContainer(), snapshotId.getUUID(), compress)));
// write the index metadata for each index in the snapshot
for (IndexId index : indices) {
executor.execute(ActionRunnable.run(allMetaListener, () -> {
final IndexMetadata indexMetaData = clusterMetadata.index(index.getName());
if (writeIndexGens) {
final String identifiers = IndexMetaDataGenerations.buildUniqueIdentifier(indexMetaData);
String metaUUID = existingRepositoryData.indexMetaDataGenerations().getIndexMetaBlobId(identifiers);
if (metaUUID == null) {
// We don't yet have this version of the metadata so we write it
metaUUID = UUIDs.base64UUID();
INDEX_METADATA_FORMAT.write(indexMetaData, indexContainer(index), metaUUID, compress);
indexMetaIdentifiers.put(identifiers, metaUUID);
}
indexMetas.put(index, identifiers);
} else {
INDEX_METADATA_FORMAT.write(clusterMetadata.index(index.getName()), indexContainer(index), snapshotId.getUUID(), compress);
}
}));
}
executor.execute(ActionRunnable.run(allMetaListener, () -> SNAPSHOT_FORMAT.write(snapshotInfo, blobContainer(), snapshotId.getUUID(), compress)));
}, onUpdateFailure);
}
Aggregations