Search in sources :

Example 1 with RestartCallback

use of org.elasticsearch.test.InternalTestCluster.RestartCallback in project elasticsearch by elastic.

the class RecoveryFromGatewayIT method testReuseInFileBasedPeerRecovery.

public void testReuseInFileBasedPeerRecovery() throws Exception {
    internalCluster().startMasterOnlyNode();
    final String primaryNode = internalCluster().startDataOnlyNode(nodeSettings(0));
    // create the index with our mapping
    client(primaryNode).admin().indices().prepareCreate("test").setSettings(Settings.builder().put("number_of_shards", 1).put("number_of_replicas", 1)).get();
    logger.info("--> indexing docs");
    for (int i = 0; i < randomIntBetween(1, 1024); i++) {
        client(primaryNode).prepareIndex("test", "type").setSource("field", "value").execute().actionGet();
    }
    client(primaryNode).admin().indices().prepareFlush("test").setForce(true).get();
    // start the replica node; we do this after indexing so a file-based recovery is triggered to ensure the files are identical
    final String replicaNode = internalCluster().startDataOnlyNode(nodeSettings(1));
    ensureGreen();
    final RecoveryResponse initialRecoveryReponse = client().admin().indices().prepareRecoveries("test").get();
    final Set<String> files = new HashSet<>();
    for (final RecoveryState recoveryState : initialRecoveryReponse.shardRecoveryStates().get("test")) {
        if (recoveryState.getTargetNode().getName().equals(replicaNode)) {
            for (final RecoveryState.File file : recoveryState.getIndex().fileDetails()) {
                files.add(file.name());
            }
            break;
        }
    }
    logger.info("--> restart replica node");
    internalCluster().restartNode(replicaNode, new RestartCallback() {

        @Override
        public Settings onNodeStopped(String nodeName) throws Exception {
            // index some more documents; we expect to reuse the files that already exist on the replica
            for (int i = 0; i < randomIntBetween(1, 1024); i++) {
                client(primaryNode).prepareIndex("test", "type").setSource("field", "value").execute().actionGet();
            }
            // prevent a sequence-number-based recovery from being possible
            client(primaryNode).admin().indices().prepareFlush("test").setForce(true).get();
            return super.onNodeStopped(nodeName);
        }
    });
    ensureGreen();
    final RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries("test").get();
    for (final RecoveryState recoveryState : recoveryResponse.shardRecoveryStates().get("test")) {
        long recovered = 0;
        long reused = 0;
        int filesRecovered = 0;
        int filesReused = 0;
        for (final RecoveryState.File file : recoveryState.getIndex().fileDetails()) {
            if (files.contains(file.name()) == false) {
                recovered += file.length();
                filesRecovered++;
            } else {
                reused += file.length();
                filesReused++;
            }
        }
        if (recoveryState.getPrimary()) {
            assertThat(recoveryState.getIndex().recoveredBytes(), equalTo(0L));
            assertThat(recoveryState.getIndex().reusedBytes(), equalTo(recoveryState.getIndex().totalBytes()));
            assertThat(recoveryState.getIndex().recoveredFileCount(), equalTo(0));
            assertThat(recoveryState.getIndex().reusedFileCount(), equalTo(recoveryState.getIndex().totalFileCount()));
        } else {
            logger.info("--> replica shard {} recovered from {} to {}, recovered {}, reuse {}", recoveryState.getShardId().getId(), recoveryState.getSourceNode().getName(), recoveryState.getTargetNode().getName(), recoveryState.getIndex().recoveredBytes(), recoveryState.getIndex().reusedBytes());
            assertThat("bytes should have been recovered", recoveryState.getIndex().recoveredBytes(), equalTo(recovered));
            assertThat("data should have been reused", recoveryState.getIndex().reusedBytes(), greaterThan(0L));
            // we have to recover the segments file since we commit the translog ID on engine startup
            assertThat("all existing files should be reused, byte count mismatch", recoveryState.getIndex().reusedBytes(), equalTo(reused));
            assertThat(recoveryState.getIndex().reusedBytes(), equalTo(recoveryState.getIndex().totalBytes() - recovered));
            assertThat("the segment from the last round of indexing should be recovered", recoveryState.getIndex().recoveredFileCount(), equalTo(filesRecovered));
            assertThat("all existing files should be reused, file count mismatch", recoveryState.getIndex().reusedFileCount(), equalTo(filesReused));
            assertThat(recoveryState.getIndex().reusedFileCount(), equalTo(recoveryState.getIndex().totalFileCount() - filesRecovered));
            assertThat("> 0 files should be reused", recoveryState.getIndex().reusedFileCount(), greaterThan(0));
            assertThat("no translog ops should be recovered", recoveryState.getTranslog().recoveredOperations(), equalTo(0));
        }
    }
}
Also used : RestartCallback(org.elasticsearch.test.InternalTestCluster.RestartCallback) RecoveryState(org.elasticsearch.indices.recovery.RecoveryState) Settings(org.elasticsearch.common.settings.Settings) RecoveryResponse(org.elasticsearch.action.admin.indices.recovery.RecoveryResponse) HashSet(java.util.HashSet)

Example 2 with RestartCallback

use of org.elasticsearch.test.InternalTestCluster.RestartCallback in project elasticsearch by elastic.

the class QuorumGatewayIT method testQuorumRecovery.

public void testQuorumRecovery() throws Exception {
    logger.info("--> starting 3 nodes");
    // we are shutting down nodes - make sure we don't have 2 clusters if we test network
    internalCluster().startNodes(3);
    createIndex("test");
    ensureGreen();
    final NumShards test = getNumShards("test");
    logger.info("--> indexing...");
    client().prepareIndex("test", "type1", "1").setSource(jsonBuilder().startObject().field("field", "value1").endObject()).get();
    //We don't check for failures in the flush response: if we do we might get the following:
    // FlushNotAllowedEngineException[[test][1] recovery is in progress, flush [COMMIT_TRANSLOG] is not allowed]
    flush();
    client().prepareIndex("test", "type1", "2").setSource(jsonBuilder().startObject().field("field", "value2").endObject()).get();
    refresh();
    for (int i = 0; i < 10; i++) {
        assertHitCount(client().prepareSearch().setSize(0).setQuery(matchAllQuery()).get(), 2L);
    }
    logger.info("--> restart all nodes");
    internalCluster().fullRestart(new RestartCallback() {

        @Override
        public Settings onNodeStopped(String nodeName) throws Exception {
            return null;
        }

        @Override
        public void doAfterNodes(int numNodes, final Client activeClient) throws Exception {
            if (numNodes == 1) {
                assertTrue(awaitBusy(() -> {
                    logger.info("--> running cluster_health (wait for the shards to startup)");
                    ClusterHealthResponse clusterHealth = activeClient.admin().cluster().health(clusterHealthRequest().waitForYellowStatus().waitForNodes("2").waitForActiveShards(test.numPrimaries * 2)).actionGet();
                    logger.info("--> done cluster_health, status {}", clusterHealth.getStatus());
                    return (!clusterHealth.isTimedOut()) && clusterHealth.getStatus() == ClusterHealthStatus.YELLOW;
                }, 30, TimeUnit.SECONDS));
                logger.info("--> one node is closed -- index 1 document into the remaining nodes");
                activeClient.prepareIndex("test", "type1", "3").setSource(jsonBuilder().startObject().field("field", "value3").endObject()).get();
                assertNoFailures(activeClient.admin().indices().prepareRefresh().get());
                for (int i = 0; i < 10; i++) {
                    assertHitCount(activeClient.prepareSearch().setSize(0).setQuery(matchAllQuery()).get(), 3L);
                }
            }
        }
    });
    logger.info("--> all nodes are started back, verifying we got the latest version");
    logger.info("--> running cluster_health (wait for the shards to startup)");
    ensureGreen();
    for (int i = 0; i < 10; i++) {
        assertHitCount(client().prepareSearch().setSize(0).setQuery(matchAllQuery()).get(), 3L);
    }
}
Also used : ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) RestartCallback(org.elasticsearch.test.InternalTestCluster.RestartCallback) Client(org.elasticsearch.client.Client) Settings(org.elasticsearch.common.settings.Settings)

Example 3 with RestartCallback

use of org.elasticsearch.test.InternalTestCluster.RestartCallback in project elasticsearch by elastic.

the class RecoveryFromGatewayIT method testTwoNodeFirstNodeCleared.

public void testTwoNodeFirstNodeCleared() throws Exception {
    final String firstNode = internalCluster().startNode();
    internalCluster().startNode();
    client().prepareIndex("test", "type1", "1").setSource(jsonBuilder().startObject().field("field", "value1").endObject()).execute().actionGet();
    flush();
    client().prepareIndex("test", "type1", "2").setSource(jsonBuilder().startObject().field("field", "value2").endObject()).execute().actionGet();
    refresh();
    logger.info("Running Cluster Health (wait for the shards to startup)");
    ensureGreen();
    for (int i = 0; i < 10; i++) {
        assertHitCount(client().prepareSearch().setSize(0).setQuery(matchAllQuery()).execute().actionGet(), 2);
    }
    Map<String, long[]> primaryTerms = assertAndCapturePrimaryTerms(null);
    internalCluster().fullRestart(new RestartCallback() {

        @Override
        public Settings onNodeStopped(String nodeName) throws Exception {
            return Settings.builder().put("gateway.recover_after_nodes", 2).build();
        }

        @Override
        public boolean clearData(String nodeName) {
            return firstNode.equals(nodeName);
        }
    });
    logger.info("Running Cluster Health (wait for the shards to startup)");
    ensureGreen();
    primaryTerms = assertAndCapturePrimaryTerms(primaryTerms);
    for (int i = 0; i < 10; i++) {
        assertHitCount(client().prepareSearch().setSize(0).setQuery(matchAllQuery()).execute().actionGet(), 2);
    }
}
Also used : RestartCallback(org.elasticsearch.test.InternalTestCluster.RestartCallback) Settings(org.elasticsearch.common.settings.Settings)

Example 4 with RestartCallback

use of org.elasticsearch.test.InternalTestCluster.RestartCallback in project elasticsearch by elastic.

the class RecoveryFromGatewayIT method testStartedShardFoundIfStateNotYetProcessed.

public void testStartedShardFoundIfStateNotYetProcessed() throws Exception {
    // nodes may need to report the shards they processed the initial recovered cluster state from the master
    final String nodeName = internalCluster().startNode();
    assertAcked(prepareCreate("test").setSettings(SETTING_NUMBER_OF_SHARDS, 1));
    final Index index = resolveIndex("test");
    final ShardId shardId = new ShardId(index, 0);
    index("test", "type", "1");
    flush("test");
    final boolean corrupt = randomBoolean();
    internalCluster().fullRestart(new RestartCallback() {

        @Override
        public Settings onNodeStopped(String nodeName) throws Exception {
            // make sure state is not recovered
            return Settings.builder().put(GatewayService.RECOVER_AFTER_NODES_SETTING.getKey(), 2).build();
        }
    });
    if (corrupt) {
        for (Path path : internalCluster().getInstance(NodeEnvironment.class, nodeName).availableShardPaths(shardId)) {
            final Path indexPath = path.resolve(ShardPath.INDEX_FOLDER_NAME);
            if (Files.exists(indexPath)) {
                // multi data path might only have one path in use
                try (DirectoryStream<Path> stream = Files.newDirectoryStream(indexPath)) {
                    for (Path item : stream) {
                        if (item.getFileName().toString().startsWith("segments_")) {
                            logger.debug("--> deleting [{}]", item);
                            Files.delete(item);
                        }
                    }
                }
            }
        }
    }
    DiscoveryNode node = internalCluster().getInstance(ClusterService.class, nodeName).localNode();
    TransportNodesListGatewayStartedShards.NodesGatewayStartedShards response;
    response = internalCluster().getInstance(TransportNodesListGatewayStartedShards.class).execute(new TransportNodesListGatewayStartedShards.Request(shardId, new DiscoveryNode[] { node })).get();
    assertThat(response.getNodes(), hasSize(1));
    assertThat(response.getNodes().get(0).allocationId(), notNullValue());
    if (corrupt) {
        assertThat(response.getNodes().get(0).storeException(), notNullValue());
    } else {
        assertThat(response.getNodes().get(0).storeException(), nullValue());
    }
    // start another node so cluster consistency checks won't time out due to the lack of state
    internalCluster().startNode();
}
Also used : Path(java.nio.file.Path) ShardPath(org.elasticsearch.index.shard.ShardPath) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) NodeEnvironment(org.elasticsearch.env.NodeEnvironment) Index(org.elasticsearch.index.Index) RestartCallback(org.elasticsearch.test.InternalTestCluster.RestartCallback) ShardId(org.elasticsearch.index.shard.ShardId) ClusterService(org.elasticsearch.cluster.service.ClusterService) Settings(org.elasticsearch.common.settings.Settings)

Example 5 with RestartCallback

use of org.elasticsearch.test.InternalTestCluster.RestartCallback in project elasticsearch by elastic.

the class MetaDataWriteDataNodesIT method testMetaWrittenWhenIndexIsClosedAndMetaUpdated.

public void testMetaWrittenWhenIndexIsClosedAndMetaUpdated() throws Exception {
    String masterNode = internalCluster().startMasterOnlyNode(Settings.EMPTY);
    final String dataNode = internalCluster().startDataOnlyNode(Settings.EMPTY);
    final String index = "index";
    assertAcked(prepareCreate(index).setSettings(Settings.builder().put("index.number_of_replicas", 0)));
    logger.info("--> wait for green index");
    ensureGreen();
    logger.info("--> wait for meta state written for index");
    assertIndexInMetaState(dataNode, index);
    assertIndexInMetaState(masterNode, index);
    logger.info("--> close index");
    client().admin().indices().prepareClose(index).get();
    // close the index
    ClusterStateResponse clusterStateResponse = client().admin().cluster().prepareState().get();
    assertThat(clusterStateResponse.getState().getMetaData().index(index).getState().name(), equalTo(IndexMetaData.State.CLOSE.name()));
    // update the mapping. this should cause the new meta data to be written although index is closed
    client().admin().indices().preparePutMapping(index).setType("doc").setSource(jsonBuilder().startObject().startObject("properties").startObject("integer_field").field("type", "integer").endObject().endObject().endObject()).get();
    GetMappingsResponse getMappingsResponse = client().admin().indices().prepareGetMappings(index).addTypes("doc").get();
    assertNotNull(((LinkedHashMap) (getMappingsResponse.getMappings().get(index).get("doc").getSourceAsMap().get("properties"))).get("integer_field"));
    // make sure it was also written on red node although index is closed
    ImmutableOpenMap<String, IndexMetaData> indicesMetaData = getIndicesMetaDataOnNode(dataNode);
    assertNotNull(((LinkedHashMap) (indicesMetaData.get(index).getMappings().get("doc").getSourceAsMap().get("properties"))).get("integer_field"));
    assertThat(indicesMetaData.get(index).getState(), equalTo(IndexMetaData.State.CLOSE));
    /* Try the same and see if this also works if node was just restarted.
         * Each node holds an array of indices it knows of and checks if it should
         * write new meta data by looking up in this array. We need it because if an
         * index is closed it will not appear in the shard routing and we therefore
         * need to keep track of what we wrote before. However, when the node is
         * restarted this array is empty and we have to fill it before we decide
         * what we write. This is why we explicitly test for it.
         */
    internalCluster().restartNode(dataNode, new RestartCallback());
    client().admin().indices().preparePutMapping(index).setType("doc").setSource(jsonBuilder().startObject().startObject("properties").startObject("float_field").field("type", "float").endObject().endObject().endObject()).get();
    getMappingsResponse = client().admin().indices().prepareGetMappings(index).addTypes("doc").get();
    assertNotNull(((LinkedHashMap) (getMappingsResponse.getMappings().get(index).get("doc").getSourceAsMap().get("properties"))).get("float_field"));
    // make sure it was also written on red node although index is closed
    indicesMetaData = getIndicesMetaDataOnNode(dataNode);
    assertNotNull(((LinkedHashMap) (indicesMetaData.get(index).getMappings().get("doc").getSourceAsMap().get("properties"))).get("float_field"));
    assertThat(indicesMetaData.get(index).getState(), equalTo(IndexMetaData.State.CLOSE));
    // finally check that meta data is also written of index opened again
    assertAcked(client().admin().indices().prepareOpen(index).get());
    // make sure index is fully initialized and nothing is changed anymore
    ensureGreen();
    indicesMetaData = getIndicesMetaDataOnNode(dataNode);
    assertThat(indicesMetaData.get(index).getState(), equalTo(IndexMetaData.State.OPEN));
}
Also used : ClusterStateResponse(org.elasticsearch.action.admin.cluster.state.ClusterStateResponse) RestartCallback(org.elasticsearch.test.InternalTestCluster.RestartCallback) GetMappingsResponse(org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData)

Aggregations

RestartCallback (org.elasticsearch.test.InternalTestCluster.RestartCallback)7 Settings (org.elasticsearch.common.settings.Settings)5 Path (java.nio.file.Path)2 Client (org.elasticsearch.client.Client)2 NodeEnvironment (org.elasticsearch.env.NodeEnvironment)2 Matchers.containsString (org.hamcrest.Matchers.containsString)2 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 ElasticsearchException (org.elasticsearch.ElasticsearchException)1 ClusterHealthResponse (org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse)1 ClusterStateResponse (org.elasticsearch.action.admin.cluster.state.ClusterStateResponse)1 GetMappingsResponse (org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse)1 RecoveryResponse (org.elasticsearch.action.admin.indices.recovery.RecoveryResponse)1 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)1 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)1 ClusterService (org.elasticsearch.cluster.service.ClusterService)1 Index (org.elasticsearch.index.Index)1 MapperParsingException (org.elasticsearch.index.mapper.MapperParsingException)1 ShardId (org.elasticsearch.index.shard.ShardId)1 ShardPath (org.elasticsearch.index.shard.ShardPath)1