Search in sources :

Example 1 with SnapshotIndexShardStatus

use of org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus in project OpenSearch by opensearch-project.

the class SharedClusterSnapshotRestoreIT method testDataFileFailureDuringSnapshot.

public void testDataFileFailureDuringSnapshot() throws Exception {
    disableRepoConsistencyCheck("This test intentionally leaves a broken repository");
    createRepository("test-repo", "mock", Settings.builder().put("location", randomRepoPath()).put("random", randomAlphaOfLength(10)).put("random_data_file_io_exception_rate", 0.3));
    createIndexWithRandomDocs("test-idx", 100);
    logger.info("--> snapshot");
    CreateSnapshotResponse createSnapshotResponse = clusterAdmin().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).setIndices("test-idx").get();
    if (createSnapshotResponse.getSnapshotInfo().totalShards() == createSnapshotResponse.getSnapshotInfo().successfulShards()) {
        logger.info("--> no failures");
        // If we are here, that means we didn't have any failures, let's check it
        assertThat(getFailureCount("test-repo"), equalTo(0L));
    } else {
        logger.info("--> some failures");
        assertThat(getFailureCount("test-repo"), greaterThan(0L));
        assertThat(createSnapshotResponse.getSnapshotInfo().shardFailures().size(), greaterThan(0));
        for (SnapshotShardFailure shardFailure : createSnapshotResponse.getSnapshotInfo().shardFailures()) {
            assertThat(shardFailure.nodeId(), notNullValue());
            assertThat(shardFailure.index(), equalTo("test-idx"));
        }
        SnapshotInfo snapshotInfo = getSnapshot("test-repo", "test-snap");
        assertThat(snapshotInfo.state(), equalTo(SnapshotState.PARTIAL));
        assertThat(snapshotInfo.shardFailures().size(), greaterThan(0));
        assertThat(snapshotInfo.totalShards(), greaterThan(snapshotInfo.successfulShards()));
        // Verify that snapshot status also contains the same failures
        SnapshotsStatusResponse snapshotsStatusResponse = clusterAdmin().prepareSnapshotStatus("test-repo").addSnapshots("test-snap").get();
        assertThat(snapshotsStatusResponse.getSnapshots().size(), equalTo(1));
        SnapshotStatus snapshotStatus = snapshotsStatusResponse.getSnapshots().get(0);
        assertThat(snapshotStatus.getIndices().size(), equalTo(1));
        SnapshotIndexStatus indexStatus = snapshotStatus.getIndices().get("test-idx");
        assertThat(indexStatus, notNullValue());
        assertThat(indexStatus.getShardsStats().getFailedShards(), equalTo(snapshotInfo.failedShards()));
        assertThat(indexStatus.getShardsStats().getDoneShards(), equalTo(snapshotInfo.successfulShards()));
        assertThat(indexStatus.getShards().size(), equalTo(snapshotInfo.totalShards()));
        int numberOfFailures = 0;
        for (SnapshotIndexShardStatus shardStatus : indexStatus.getShards().values()) {
            if (shardStatus.getStage() == SnapshotIndexShardStage.FAILURE) {
                assertThat(shardStatus.getFailure(), notNullValue());
                numberOfFailures++;
            } else {
                assertThat(shardStatus.getFailure(), nullValue());
            }
        }
        assertThat(indexStatus.getShardsStats().getFailedShards(), equalTo(numberOfFailures));
    }
}
Also used : SnapshotsStatusResponse(org.opensearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse) SnapshotStatus(org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus) CreateSnapshotResponse(org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse) SnapshotIndexStatus(org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus) SnapshotIndexShardStatus(org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus)

Example 2 with SnapshotIndexShardStatus

use of org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus in project OpenSearch by opensearch-project.

the class SnapshotStatusApisIT method testCorrectCountsForDoneShards.

/**
 * Tests the following sequence of steps:
 * 1. Start snapshot of two shards (both located on separate data nodes).
 * 2. Have one of the shards snapshot completely and the other block
 * 3. Restart the data node that completed its shard snapshot
 * 4. Make sure that snapshot status APIs show correct file-counts and -sizes
 *
 * @throws Exception on failure
 */
public void testCorrectCountsForDoneShards() throws Exception {
    final String indexOne = "index-1";
    final String indexTwo = "index-2";
    final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
    final String dataNodeOne = dataNodes.get(0);
    final String dataNodeTwo = dataNodes.get(1);
    createIndex(indexOne, singleShardOneNode(dataNodeOne));
    index(indexOne, "_doc", "some_doc_id", "foo", "bar");
    createIndex(indexTwo, singleShardOneNode(dataNodeTwo));
    index(indexTwo, "_doc", "some_doc_id", "foo", "bar");
    final String repoName = "test-repo";
    createRepository(repoName, "mock");
    blockDataNode(repoName, dataNodeOne);
    final String snapshotOne = "snap-1";
    // restarting a data node below so using a cluster-manager client here
    final ActionFuture<CreateSnapshotResponse> responseSnapshotOne = internalCluster().masterClient().admin().cluster().prepareCreateSnapshot(repoName, snapshotOne).setWaitForCompletion(true).execute();
    assertBusy(() -> {
        final SnapshotStatus snapshotStatusOne = getSnapshotStatus(repoName, snapshotOne);
        assertThat(snapshotStatusOne.getState(), is(SnapshotsInProgress.State.STARTED));
        final SnapshotIndexShardStatus snapshotShardState = stateFirstShard(snapshotStatusOne, indexTwo);
        assertThat(snapshotShardState.getStage(), is(SnapshotIndexShardStage.DONE));
        assertThat(snapshotShardState.getStats().getTotalFileCount(), greaterThan(0));
        assertThat(snapshotShardState.getStats().getTotalSize(), greaterThan(0L));
    }, 30L, TimeUnit.SECONDS);
    final SnapshotStats snapshotShardStats = stateFirstShard(getSnapshotStatus(repoName, snapshotOne), indexTwo).getStats();
    final int totalFiles = snapshotShardStats.getTotalFileCount();
    final long totalFileSize = snapshotShardStats.getTotalSize();
    internalCluster().restartNode(dataNodeTwo);
    final SnapshotIndexShardStatus snapshotShardStateAfterNodeRestart = stateFirstShard(getSnapshotStatus(repoName, snapshotOne), indexTwo);
    assertThat(snapshotShardStateAfterNodeRestart.getStage(), is(SnapshotIndexShardStage.DONE));
    assertThat(snapshotShardStateAfterNodeRestart.getStats().getTotalFileCount(), equalTo(totalFiles));
    assertThat(snapshotShardStateAfterNodeRestart.getStats().getTotalSize(), equalTo(totalFileSize));
    unblockAllDataNodes(repoName);
    assertThat(responseSnapshotOne.get().getSnapshotInfo().state(), is(SnapshotState.SUCCESS));
    // indexing another document to the second index so it will do writes during the snapshot and we can block on those writes
    index(indexTwo, "_doc", "some_other_doc_id", "foo", "other_bar");
    blockDataNode(repoName, dataNodeTwo);
    final String snapshotTwo = "snap-2";
    final ActionFuture<CreateSnapshotResponse> responseSnapshotTwo = client().admin().cluster().prepareCreateSnapshot(repoName, snapshotTwo).setWaitForCompletion(true).execute();
    waitForBlock(dataNodeTwo, repoName, TimeValue.timeValueSeconds(30L));
    assertBusy(() -> {
        final SnapshotStatus snapshotStatusOne = getSnapshotStatus(repoName, snapshotOne);
        final SnapshotStatus snapshotStatusTwo = getSnapshotStatus(repoName, snapshotTwo);
        final SnapshotIndexShardStatus snapshotShardStateOne = stateFirstShard(snapshotStatusOne, indexOne);
        final SnapshotIndexShardStatus snapshotShardStateTwo = stateFirstShard(snapshotStatusTwo, indexOne);
        assertThat(snapshotShardStateOne.getStage(), is(SnapshotIndexShardStage.DONE));
        assertThat(snapshotShardStateTwo.getStage(), is(SnapshotIndexShardStage.DONE));
        final int totalFilesShardOne = snapshotShardStateOne.getStats().getTotalFileCount();
        final long totalSizeShardOne = snapshotShardStateOne.getStats().getTotalSize();
        assertThat(totalFilesShardOne, greaterThan(0));
        assertThat(totalSizeShardOne, greaterThan(0L));
        assertThat(totalFilesShardOne, equalTo(snapshotShardStateTwo.getStats().getTotalFileCount()));
        assertThat(totalSizeShardOne, equalTo(snapshotShardStateTwo.getStats().getTotalSize()));
        assertThat(snapshotShardStateTwo.getStats().getIncrementalFileCount(), equalTo(0));
        assertThat(snapshotShardStateTwo.getStats().getIncrementalSize(), equalTo(0L));
    }, 30L, TimeUnit.SECONDS);
    unblockAllDataNodes(repoName);
    assertThat(responseSnapshotTwo.get().getSnapshotInfo().state(), is(SnapshotState.SUCCESS));
}
Also used : SnapshotStatus(org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus) CreateSnapshotResponse(org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse) SnapshotStats(org.opensearch.action.admin.cluster.snapshots.status.SnapshotStats) SnapshotIndexShardStatus(org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus)

Example 3 with SnapshotIndexShardStatus

use of org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus in project OpenSearch by opensearch-project.

the class SharedClusterSnapshotRestoreIT method testSnapshotStatus.

public void testSnapshotStatus() throws Exception {
    Client client = client();
    createRepository("test-repo", "mock", Settings.builder().put("location", randomRepoPath()).put("random", randomAlphaOfLength(10)).put("wait_after_unblock", 200));
    // Create index on 2 nodes and make sure each node has a primary by setting no replicas
    assertAcked(prepareCreate("test-idx", 2, Settings.builder().put("number_of_replicas", 0)));
    indexRandomDocs("test-idx", 100);
    // Pick one node and block it
    String blockedNode = blockNodeWithIndex("test-repo", "test-idx");
    logger.info("--> snapshot");
    client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIncludeGlobalState(false).setIndices("test-idx").get();
    logger.info("--> waiting for block to kick in");
    waitForBlock(blockedNode, "test-repo", TimeValue.timeValueSeconds(60));
    logger.info("--> execution was blocked on node [{}], checking snapshot status with specified repository and snapshot", blockedNode);
    SnapshotsStatusResponse response = client.admin().cluster().prepareSnapshotStatus("test-repo").execute().actionGet();
    assertThat(response.getSnapshots().size(), equalTo(1));
    SnapshotStatus snapshotStatus = response.getSnapshots().get(0);
    assertThat(snapshotStatus.getState(), equalTo(State.STARTED));
    assertThat(snapshotStatus.includeGlobalState(), equalTo(false));
    // We blocked the node during data write operation, so at least one shard snapshot should be in STARTED stage
    assertThat(snapshotStatus.getShardsStats().getStartedShards(), greaterThan(0));
    for (SnapshotIndexShardStatus shardStatus : snapshotStatus.getIndices().get("test-idx")) {
        if (shardStatus.getStage() == SnapshotIndexShardStage.STARTED) {
            assertThat(shardStatus.getNodeId(), notNullValue());
        }
    }
    logger.info("--> checking snapshot status for all currently running and snapshot with empty repository");
    response = client.admin().cluster().prepareSnapshotStatus().execute().actionGet();
    assertThat(response.getSnapshots().size(), equalTo(1));
    snapshotStatus = response.getSnapshots().get(0);
    assertThat(snapshotStatus.getState(), equalTo(State.STARTED));
    assertThat(snapshotStatus.includeGlobalState(), equalTo(false));
    // We blocked the node during data write operation, so at least one shard snapshot should be in STARTED stage
    assertThat(snapshotStatus.getShardsStats().getStartedShards(), greaterThan(0));
    for (SnapshotIndexShardStatus shardStatus : snapshotStatus.getIndices().get("test-idx")) {
        if (shardStatus.getStage() == SnapshotIndexShardStage.STARTED) {
            assertThat(shardStatus.getNodeId(), notNullValue());
        }
    }
    logger.info("--> checking that _current returns the currently running snapshot");
    GetSnapshotsResponse getResponse = client.admin().cluster().prepareGetSnapshots("test-repo").setCurrentSnapshot().execute().actionGet();
    assertThat(getResponse.getSnapshots().size(), equalTo(1));
    SnapshotInfo snapshotInfo = getResponse.getSnapshots().get(0);
    assertThat(snapshotInfo.state(), equalTo(SnapshotState.IN_PROGRESS));
    logger.info("--> unblocking blocked node");
    unblockNode("test-repo", blockedNode);
    snapshotInfo = waitForCompletion("test-repo", "test-snap", TimeValue.timeValueSeconds(600));
    logger.info("Number of failed shards [{}]", snapshotInfo.shardFailures().size());
    logger.info("--> done");
    logger.info("--> checking snapshot status again after snapshot is done");
    response = client.admin().cluster().prepareSnapshotStatus("test-repo").addSnapshots("test-snap").execute().actionGet();
    snapshotStatus = response.getSnapshots().get(0);
    assertThat(snapshotStatus.getIndices().size(), equalTo(1));
    assertThat(snapshotStatus.includeGlobalState(), equalTo(false));
    SnapshotIndexStatus indexStatus = snapshotStatus.getIndices().get("test-idx");
    assertThat(indexStatus, notNullValue());
    assertThat(indexStatus.getShardsStats().getInitializingShards(), equalTo(0));
    assertThat(indexStatus.getShardsStats().getFailedShards(), equalTo(snapshotInfo.failedShards()));
    assertThat(indexStatus.getShardsStats().getDoneShards(), equalTo(snapshotInfo.successfulShards()));
    assertThat(indexStatus.getShards().size(), equalTo(snapshotInfo.totalShards()));
    logger.info("--> checking snapshot status after it is done with empty repository");
    response = client.admin().cluster().prepareSnapshotStatus().execute().actionGet();
    assertThat(response.getSnapshots().size(), equalTo(0));
    logger.info("--> checking that _current no longer returns the snapshot");
    assertThat(client.admin().cluster().prepareGetSnapshots("test-repo").addSnapshots("_current").execute().actionGet().getSnapshots().isEmpty(), equalTo(true));
    // test that getting an unavailable snapshot status throws an exception if ignoreUnavailable is false on the request
    SnapshotMissingException ex = expectThrows(SnapshotMissingException.class, () -> client.admin().cluster().prepareSnapshotStatus("test-repo").addSnapshots("test-snap-doesnt-exist").get());
    assertEquals("[test-repo:test-snap-doesnt-exist] is missing", ex.getMessage());
    // test that getting an unavailable snapshot status does not throw an exception if ignoreUnavailable is true on the request
    response = client.admin().cluster().prepareSnapshotStatus("test-repo").addSnapshots("test-snap-doesnt-exist").setIgnoreUnavailable(true).get();
    assertTrue(response.getSnapshots().isEmpty());
    // test getting snapshot status for available and unavailable snapshots where ignoreUnavailable is true
    // (available one should be returned)
    response = client.admin().cluster().prepareSnapshotStatus("test-repo").addSnapshots("test-snap", "test-snap-doesnt-exist").setIgnoreUnavailable(true).get();
    assertEquals(1, response.getSnapshots().size());
    assertEquals("test-snap", response.getSnapshots().get(0).getSnapshot().getSnapshotId().getName());
}
Also used : SnapshotsStatusResponse(org.opensearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse) GetSnapshotsResponse(org.opensearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse) SnapshotStatus(org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus) SnapshotIndexShardStatus(org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus) SnapshotIndexStatus(org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus) Matchers.containsString(org.hamcrest.Matchers.containsString) Client(org.opensearch.client.Client)

Example 4 with SnapshotIndexShardStatus

use of org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus in project OpenSearch by opensearch-project.

the class SharedClusterSnapshotRestoreIT method testSnapshotMoreThanOnce.

public void testSnapshotMoreThanOnce() throws InterruptedException {
    Client client = client();
    createRepository("test-repo", "fs");
    // only one shard
    final Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build();
    assertAcked(prepareCreate("test").setSettings(indexSettings));
    ensureGreen();
    indexRandomDocs("test", randomIntBetween(10, 100));
    assertNoFailures(client().admin().indices().prepareForceMerge("test").setFlush(true).setMaxNumSegments(1).get());
    createSnapshot("test-repo", "test", Collections.singletonList("test"));
    assertThat(getSnapshot("test-repo", "test").state(), equalTo(SnapshotState.SUCCESS));
    {
        SnapshotStatus snapshotStatus = client.admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test").get().getSnapshots().get(0);
        List<SnapshotIndexShardStatus> shards = snapshotStatus.getShards();
        for (SnapshotIndexShardStatus status : shards) {
            assertThat(status.getStats().getProcessedFileCount(), greaterThan(1));
        }
    }
    createSnapshot("test-repo", "test-1", Collections.singletonList("test"));
    assertThat(getSnapshot("test-repo", "test-1").state(), equalTo(SnapshotState.SUCCESS));
    {
        SnapshotStatus snapshotStatus = client.admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-1").get().getSnapshots().get(0);
        List<SnapshotIndexShardStatus> shards = snapshotStatus.getShards();
        for (SnapshotIndexShardStatus status : shards) {
            assertThat(status.getStats().getProcessedFileCount(), equalTo(0));
        }
    }
    client().prepareDelete("test", "1").get();
    createSnapshot("test-repo", "test-2", Collections.singletonList("test"));
    assertThat(getSnapshot("test-repo", "test-2").state(), equalTo(SnapshotState.SUCCESS));
    {
        SnapshotStatus snapshotStatus = client.admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-2").get().getSnapshots().get(0);
        Settings settings = client.admin().indices().prepareGetSettings("test").get().getIndexToSettings().get("test");
        List<SnapshotIndexShardStatus> shards = snapshotStatus.getShards();
        for (SnapshotIndexShardStatus status : shards) {
            // we flush before the snapshot such that we have to process the segments_N files plus the .del file
            // soft-delete generates DV files.
            assertThat(status.getStats().getProcessedFileCount(), greaterThan(2));
        }
    }
}
Also used : SnapshotStatus(org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus) List(java.util.List) ArrayList(java.util.ArrayList) SnapshotIndexShardStatus(org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus) Client(org.opensearch.client.Client) Settings(org.opensearch.common.settings.Settings)

Aggregations

SnapshotIndexShardStatus (org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexShardStatus)4 SnapshotStatus (org.opensearch.action.admin.cluster.snapshots.status.SnapshotStatus)4 CreateSnapshotResponse (org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse)2 SnapshotIndexStatus (org.opensearch.action.admin.cluster.snapshots.status.SnapshotIndexStatus)2 SnapshotsStatusResponse (org.opensearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse)2 Client (org.opensearch.client.Client)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Matchers.containsString (org.hamcrest.Matchers.containsString)1 GetSnapshotsResponse (org.opensearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse)1 SnapshotStats (org.opensearch.action.admin.cluster.snapshots.status.SnapshotStats)1 Settings (org.opensearch.common.settings.Settings)1