Search in sources :

Example 1 with ServiceDisruptionScheme

use of org.elasticsearch.test.disruption.ServiceDisruptionScheme in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method testElectMasterWithLatestVersion.

public void testElectMasterWithLatestVersion() throws Exception {
    configureCluster(3, null, 2);
    final Set<String> nodes = new HashSet<>(internalCluster().startNodes(3));
    ensureStableCluster(3);
    ServiceDisruptionScheme isolateAllNodes = new NetworkDisruption(new NetworkDisruption.IsolateAllNodes(nodes), new NetworkDisconnect());
    internalCluster().setDisruptionScheme(isolateAllNodes);
    logger.info("--> forcing a complete election to make sure \"preferred\" master is elected");
    isolateAllNodes.startDisrupting();
    for (String node : nodes) {
        assertNoMaster(node);
    }
    internalCluster().clearDisruptionScheme();
    ensureStableCluster(3);
    final String preferredMasterName = internalCluster().getMasterName();
    final DiscoveryNode preferredMaster = internalCluster().clusterService(preferredMasterName).localNode();
    for (String node : nodes) {
        DiscoveryNode discoveryNode = internalCluster().clusterService(node).localNode();
        assertThat(discoveryNode.getId(), greaterThanOrEqualTo(preferredMaster.getId()));
    }
    logger.info("--> preferred master is {}", preferredMaster);
    final Set<String> nonPreferredNodes = new HashSet<>(nodes);
    nonPreferredNodes.remove(preferredMasterName);
    final ServiceDisruptionScheme isolatePreferredMaster = new NetworkDisruption(new NetworkDisruption.TwoPartitions(Collections.singleton(preferredMasterName), nonPreferredNodes), new NetworkDisconnect());
    internalCluster().setDisruptionScheme(isolatePreferredMaster);
    isolatePreferredMaster.startDisrupting();
    assertAcked(client(randomFrom(nonPreferredNodes)).admin().indices().prepareCreate("test").setSettings(INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1, INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0));
    internalCluster().clearDisruptionScheme(false);
    internalCluster().setDisruptionScheme(isolateAllNodes);
    logger.info("--> forcing a complete election again");
    isolateAllNodes.startDisrupting();
    for (String node : nodes) {
        assertNoMaster(node);
    }
    isolateAllNodes.stopDisrupting();
    final ClusterState state = client().admin().cluster().prepareState().get().getState();
    if (state.metaData().hasIndex("test") == false) {
        fail("index 'test' was lost. current cluster state: " + state);
    }
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) ServiceDisruptionScheme(org.elasticsearch.test.disruption.ServiceDisruptionScheme) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect) HashSet(java.util.HashSet)

Example 2 with ServiceDisruptionScheme

use of org.elasticsearch.test.disruption.ServiceDisruptionScheme in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method testAckedIndexing.

/**
     * Test that we do not loose document whose indexing request was successful, under a randomly selected disruption scheme
     * We also collect &amp; report the type of indexing failures that occur.
     * <p>
     * This test is a superset of tests run in the Jepsen test suite, with the exception of versioned updates
     */
@TestLogging("_root:DEBUG,org.elasticsearch.action.bulk:TRACE,org.elasticsearch.action.get:TRACE,discovery:TRACE," + "org.elasticsearch.cluster.service:TRACE,org.elasticsearch.indices.recovery:TRACE," + "org.elasticsearch.indices.cluster:TRACE,org.elasticsearch.index.shard:TRACE")
public void testAckedIndexing() throws Exception {
    final int seconds = !(TEST_NIGHTLY && rarely()) ? 1 : 5;
    final String timeout = seconds + "s";
    final List<String> nodes = startCluster(rarely() ? 5 : 3);
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2)).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2)).put(IndexSettings.INDEX_SEQ_NO_CHECKPOINT_SYNC_INTERVAL.getKey(), randomBoolean() ? "5s" : "200ms")));
    ensureGreen();
    ServiceDisruptionScheme disruptionScheme = addRandomDisruptionScheme();
    logger.info("disruption scheme [{}] added", disruptionScheme);
    // id -> node sent.
    final ConcurrentHashMap<String, String> ackedDocs = new ConcurrentHashMap<>();
    final AtomicBoolean stop = new AtomicBoolean(false);
    List<Thread> indexers = new ArrayList<>(nodes.size());
    List<Semaphore> semaphores = new ArrayList<>(nodes.size());
    final AtomicInteger idGenerator = new AtomicInteger(0);
    final AtomicReference<CountDownLatch> countDownLatchRef = new AtomicReference<>();
    final List<Exception> exceptedExceptions = Collections.synchronizedList(new ArrayList<Exception>());
    logger.info("starting indexers");
    try {
        for (final String node : nodes) {
            final Semaphore semaphore = new Semaphore(0);
            semaphores.add(semaphore);
            final Client client = client(node);
            final String name = "indexer_" + indexers.size();
            final int numPrimaries = getNumShards("test").numPrimaries;
            Thread thread = new Thread(() -> {
                while (!stop.get()) {
                    String id = null;
                    try {
                        if (!semaphore.tryAcquire(10, TimeUnit.SECONDS)) {
                            continue;
                        }
                        logger.info("[{}] Acquired semaphore and it has {} permits left", name, semaphore.availablePermits());
                        try {
                            id = Integer.toString(idGenerator.incrementAndGet());
                            int shard = Math.floorMod(Murmur3HashFunction.hash(id), numPrimaries);
                            logger.trace("[{}] indexing id [{}] through node [{}] targeting shard [{}]", name, id, node, shard);
                            IndexResponse response = client.prepareIndex("test", "type", id).setSource("{}", XContentType.JSON).setTimeout(timeout).get(timeout);
                            assertEquals(DocWriteResponse.Result.CREATED, response.getResult());
                            ackedDocs.put(id, node);
                            logger.trace("[{}] indexed id [{}] through node [{}]", name, id, node);
                        } catch (ElasticsearchException e) {
                            exceptedExceptions.add(e);
                            final String docId = id;
                            logger.trace((Supplier<?>) () -> new ParameterizedMessage("[{}] failed id [{}] through node [{}]", name, docId, node), e);
                        } finally {
                            countDownLatchRef.get().countDown();
                            logger.trace("[{}] decreased counter : {}", name, countDownLatchRef.get().getCount());
                        }
                    } catch (InterruptedException e) {
                    // fine - semaphore interrupt
                    } catch (AssertionError | Exception e) {
                        logger.info((Supplier<?>) () -> new ParameterizedMessage("unexpected exception in background thread of [{}]", node), e);
                    }
                }
            });
            thread.setName(name);
            thread.start();
            indexers.add(thread);
        }
        int docsPerIndexer = randomInt(3);
        logger.info("indexing {} docs per indexer before partition", docsPerIndexer);
        countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size()));
        for (Semaphore semaphore : semaphores) {
            semaphore.release(docsPerIndexer);
        }
        assertTrue(countDownLatchRef.get().await(1, TimeUnit.MINUTES));
        for (int iter = 1 + randomInt(2); iter > 0; iter--) {
            logger.info("starting disruptions & indexing (iteration [{}])", iter);
            disruptionScheme.startDisrupting();
            docsPerIndexer = 1 + randomInt(5);
            logger.info("indexing {} docs per indexer during partition", docsPerIndexer);
            countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size()));
            Collections.shuffle(semaphores, random());
            for (Semaphore semaphore : semaphores) {
                assertThat(semaphore.availablePermits(), equalTo(0));
                semaphore.release(docsPerIndexer);
            }
            logger.info("waiting for indexing requests to complete");
            assertTrue(countDownLatchRef.get().await(docsPerIndexer * seconds * 1000 + 2000, TimeUnit.MILLISECONDS));
            logger.info("stopping disruption");
            disruptionScheme.stopDisrupting();
            for (String node : internalCluster().getNodeNames()) {
                ensureStableCluster(nodes.size(), TimeValue.timeValueMillis(disruptionScheme.expectedTimeToHeal().millis() + DISRUPTION_HEALING_OVERHEAD.millis()), true, node);
            }
            ensureGreen("test");
            logger.info("validating successful docs");
            assertBusy(() -> {
                for (String node : nodes) {
                    try {
                        logger.debug("validating through node [{}] ([{}] acked docs)", node, ackedDocs.size());
                        for (String id : ackedDocs.keySet()) {
                            assertTrue("doc [" + id + "] indexed via node [" + ackedDocs.get(id) + "] not found", client(node).prepareGet("test", "type", id).setPreference("_local").get().isExists());
                        }
                    } catch (AssertionError | NoShardAvailableActionException e) {
                        throw new AssertionError(e.getMessage() + " (checked via node [" + node + "]", e);
                    }
                }
            }, 30, TimeUnit.SECONDS);
            logger.info("done validating (iteration [{}])", iter);
        }
    } finally {
        if (exceptedExceptions.size() > 0) {
            StringBuilder sb = new StringBuilder();
            for (Exception e : exceptedExceptions) {
                sb.append("\n").append(e.getMessage());
            }
            logger.debug("Indexing exceptions during disruption: {}", sb);
        }
        logger.info("shutting down indexers");
        stop.set(true);
        for (Thread indexer : indexers) {
            indexer.interrupt();
            indexer.join(60000);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) ServiceDisruptionScheme(org.elasticsearch.test.disruption.ServiceDisruptionScheme) Semaphore(java.util.concurrent.Semaphore) ElasticsearchException(org.elasticsearch.ElasticsearchException) Supplier(org.apache.logging.log4j.util.Supplier) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Client(org.elasticsearch.client.Client) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ElasticsearchException(org.elasticsearch.ElasticsearchException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NoShardAvailableActionException(org.elasticsearch.action.NoShardAvailableActionException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) NoShardAvailableActionException(org.elasticsearch.action.NoShardAvailableActionException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IndexResponse(org.elasticsearch.action.index.IndexResponse) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) TestLogging(org.elasticsearch.test.junit.annotations.TestLogging)

Example 3 with ServiceDisruptionScheme

use of org.elasticsearch.test.disruption.ServiceDisruptionScheme in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method addRandomDisruptionScheme.

private ServiceDisruptionScheme addRandomDisruptionScheme() {
    // TODO: add partial partitions
    final DisruptedLinks disruptedLinks;
    if (randomBoolean()) {
        disruptedLinks = TwoPartitions.random(random(), internalCluster().getNodeNames());
    } else {
        disruptedLinks = Bridge.random(random(), internalCluster().getNodeNames());
    }
    final NetworkLinkDisruptionType disruptionType;
    switch(randomInt(2)) {
        case 0:
            disruptionType = new NetworkUnresponsive();
            break;
        case 1:
            disruptionType = new NetworkDisconnect();
            break;
        case 2:
            disruptionType = NetworkDelay.random(random());
            break;
        default:
            throw new IllegalArgumentException();
    }
    final ServiceDisruptionScheme scheme;
    if (rarely()) {
        scheme = new SlowClusterStateProcessing(random());
    } else {
        scheme = new NetworkDisruption(disruptedLinks, disruptionType);
    }
    setDisruptionScheme(scheme);
    return scheme;
}
Also used : SlowClusterStateProcessing(org.elasticsearch.test.disruption.SlowClusterStateProcessing) NetworkLinkDisruptionType(org.elasticsearch.test.disruption.NetworkDisruption.NetworkLinkDisruptionType) DisruptedLinks(org.elasticsearch.test.disruption.NetworkDisruption.DisruptedLinks) NetworkUnresponsive(org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive) ServiceDisruptionScheme(org.elasticsearch.test.disruption.ServiceDisruptionScheme) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect)

Aggregations

ServiceDisruptionScheme (org.elasticsearch.test.disruption.ServiceDisruptionScheme)3 NetworkDisruption (org.elasticsearch.test.disruption.NetworkDisruption)2 NetworkDisconnect (org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 ExecutionException (java.util.concurrent.ExecutionException)1 Semaphore (java.util.concurrent.Semaphore)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)1 Supplier (org.apache.logging.log4j.util.Supplier)1 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)1 ElasticsearchException (org.elasticsearch.ElasticsearchException)1 NoShardAvailableActionException (org.elasticsearch.action.NoShardAvailableActionException)1 IndexResponse (org.elasticsearch.action.index.IndexResponse)1 Client (org.elasticsearch.client.Client)1