Search in sources :

Example 26 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class IndexShardTests method testRelocatedShardCanNotBeRevivedConcurrently.

public void testRelocatedShardCanNotBeRevivedConcurrently() throws IOException, InterruptedException, BrokenBarrierException {
    final IndexShard shard = newStartedShard(true);
    final ShardRouting originalRouting = shard.routingEntry();
    shard.updateRoutingEntry(ShardRoutingHelper.relocate(originalRouting, "other_node"));
    CyclicBarrier cyclicBarrier = new CyclicBarrier(3);
    AtomicReference<Exception> relocationException = new AtomicReference<>();
    Thread relocationThread = new Thread(new AbstractRunnable() {

        @Override
        public void onFailure(Exception e) {
            relocationException.set(e);
        }

        @Override
        protected void doRun() throws Exception {
            cyclicBarrier.await();
            shard.relocated("test");
        }
    });
    relocationThread.start();
    AtomicReference<Exception> cancellingException = new AtomicReference<>();
    Thread cancellingThread = new Thread(new AbstractRunnable() {

        @Override
        public void onFailure(Exception e) {
            cancellingException.set(e);
        }

        @Override
        protected void doRun() throws Exception {
            cyclicBarrier.await();
            shard.updateRoutingEntry(originalRouting);
        }
    });
    cancellingThread.start();
    cyclicBarrier.await();
    relocationThread.join();
    cancellingThread.join();
    if (shard.state() == IndexShardState.RELOCATED) {
        logger.debug("shard was relocated successfully");
        assertThat(cancellingException.get(), instanceOf(IllegalIndexShardStateException.class));
        assertThat("current routing:" + shard.routingEntry(), shard.routingEntry().relocating(), equalTo(true));
        assertThat(relocationException.get(), nullValue());
    } else {
        logger.debug("shard relocation was cancelled");
        assertThat(relocationException.get(), instanceOf(IllegalIndexShardStateException.class));
        assertThat("current routing:" + shard.routingEntry(), shard.routingEntry().relocating(), equalTo(false));
        assertThat(cancellingException.get(), nullValue());
    }
    closeShards(shard);
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) AtomicReference(java.util.concurrent.atomic.AtomicReference) TestShardRouting(org.elasticsearch.cluster.routing.TestShardRouting) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) EngineException(org.elasticsearch.index.engine.EngineException) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ExecutionException(java.util.concurrent.ExecutionException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) CyclicBarrier(java.util.concurrent.CyclicBarrier)

Example 27 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class TranslogTests method testConcurrentWriteViewsAndSnapshot.

/**
     * Tests that concurrent readers and writes maintain view and snapshot semantics
     */
public void testConcurrentWriteViewsAndSnapshot() throws Throwable {
    final Thread[] writers = new Thread[randomIntBetween(1, 10)];
    final Thread[] readers = new Thread[randomIntBetween(1, 10)];
    final int flushEveryOps = randomIntBetween(5, 100);
    // used to notify main thread that so many operations have been written so it can simulate a flush
    final AtomicReference<CountDownLatch> writtenOpsLatch = new AtomicReference<>(new CountDownLatch(0));
    final AtomicLong idGenerator = new AtomicLong();
    final CyclicBarrier barrier = new CyclicBarrier(writers.length + readers.length + 1);
    // a map of all written ops and their returned location.
    final Map<Translog.Operation, Translog.Location> writtenOps = ConcurrentCollections.newConcurrentMap();
    // a signal for all threads to stop
    final AtomicBoolean run = new AtomicBoolean(true);
    // any errors on threads
    final List<Exception> errors = new CopyOnWriteArrayList<>();
    logger.debug("using [{}] readers. [{}] writers. flushing every ~[{}] ops.", readers.length, writers.length, flushEveryOps);
    for (int i = 0; i < writers.length; i++) {
        final String threadName = "writer_" + i;
        final int threadId = i;
        writers[i] = new Thread(new AbstractRunnable() {

            @Override
            public void doRun() throws BrokenBarrierException, InterruptedException, IOException {
                barrier.await();
                int counter = 0;
                while (run.get()) {
                    long id = idGenerator.incrementAndGet();
                    final Translog.Operation op;
                    final Translog.Operation.Type type = Translog.Operation.Type.values()[((int) (id % Translog.Operation.Type.values().length))];
                    switch(type) {
                        case CREATE:
                        case INDEX:
                            op = new Translog.Index("type", "" + id, new byte[] { (byte) id });
                            break;
                        case DELETE:
                            op = new Translog.Delete(newUid("" + id));
                            break;
                        case NO_OP:
                            op = new Translog.NoOp(id, id, Long.toString(id));
                            break;
                        default:
                            throw new AssertionError("unsupported operation type [" + type + "]");
                    }
                    Translog.Location location = translog.add(op);
                    Translog.Location existing = writtenOps.put(op, location);
                    if (existing != null) {
                        fail("duplicate op [" + op + "], old entry at " + location);
                    }
                    if (id % writers.length == threadId) {
                        translog.ensureSynced(location);
                    }
                    writtenOpsLatch.get().countDown();
                    counter++;
                }
                logger.debug("--> [{}] done. wrote [{}] ops.", threadName, counter);
            }

            @Override
            public void onFailure(Exception e) {
                logger.error((Supplier<?>) () -> new ParameterizedMessage("--> writer [{}] had an error", threadName), e);
                errors.add(e);
            }
        }, threadName);
        writers[i].start();
    }
    for (int i = 0; i < readers.length; i++) {
        final String threadId = "reader_" + i;
        readers[i] = new Thread(new AbstractRunnable() {

            Translog.View view = null;

            Set<Translog.Operation> writtenOpsAtView;

            @Override
            public void onFailure(Exception e) {
                logger.error((Supplier<?>) () -> new ParameterizedMessage("--> reader [{}] had an error", threadId), e);
                errors.add(e);
                try {
                    closeView();
                } catch (IOException inner) {
                    inner.addSuppressed(e);
                    logger.error("unexpected error while closing view, after failure", inner);
                }
            }

            void closeView() throws IOException {
                if (view != null) {
                    view.close();
                }
            }

            void newView() throws IOException {
                closeView();
                view = translog.newView();
                // captures the currently written ops so we know what to expect from the view
                writtenOpsAtView = new HashSet<>(writtenOps.keySet());
                logger.debug("--> [{}] opened view from [{}]", threadId, view.minTranslogGeneration());
            }

            @Override
            protected void doRun() throws Exception {
                barrier.await();
                int iter = 0;
                while (run.get()) {
                    if (iter++ % 10 == 0) {
                        newView();
                    }
                    // captures al views that are written since the view was created (with a small caveat see bellow)
                    // these are what we expect the snapshot to return (and potentially some more).
                    Set<Translog.Operation> expectedOps = new HashSet<>(writtenOps.keySet());
                    expectedOps.removeAll(writtenOpsAtView);
                    Translog.Snapshot snapshot = view.snapshot();
                    Translog.Operation op;
                    while ((op = snapshot.next()) != null) {
                        expectedOps.remove(op);
                    }
                    if (expectedOps.isEmpty() == false) {
                        StringBuilder missed = new StringBuilder("missed ").append(expectedOps.size()).append(" operations");
                        boolean failed = false;
                        for (Translog.Operation expectedOp : expectedOps) {
                            final Translog.Location loc = writtenOps.get(expectedOp);
                            if (loc.generation < view.minTranslogGeneration()) {
                                // may yet be available in writtenOpsAtView, meaning we will erroneously expect them
                                continue;
                            }
                            failed = true;
                            missed.append("\n --> [").append(expectedOp).append("] written at ").append(loc);
                        }
                        if (failed) {
                            fail(missed.toString());
                        }
                    }
                    // slow down things a bit and spread out testing..
                    writtenOpsLatch.get().await(200, TimeUnit.MILLISECONDS);
                }
                closeView();
                logger.debug("--> [{}] done. tested [{}] snapshots", threadId, iter);
            }
        }, threadId);
        readers[i].start();
    }
    barrier.await();
    try {
        for (int iterations = scaledRandomIntBetween(10, 200); iterations > 0 && errors.isEmpty(); iterations--) {
            writtenOpsLatch.set(new CountDownLatch(flushEveryOps));
            while (writtenOpsLatch.get().await(200, TimeUnit.MILLISECONDS) == false) {
                if (errors.size() > 0) {
                    break;
                }
            }
            translog.commit();
        }
    } finally {
        run.set(false);
        logger.debug("--> waiting for threads to stop");
        for (Thread thread : writers) {
            thread.join();
        }
        for (Thread thread : readers) {
            thread.join();
        }
        if (errors.size() > 0) {
            Throwable e = errors.get(0);
            for (Throwable suppress : errors.subList(1, errors.size())) {
                e.addSuppressed(suppress);
            }
            throw e;
        }
        logger.info("--> test done. total ops written [{}]", writtenOps.size());
    }
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) Set(java.util.Set) HashSet(java.util.HashSet) Matchers.hasToString(org.hamcrest.Matchers.hasToString) Matchers.containsString(org.hamcrest.Matchers.containsString) Location(org.elasticsearch.index.translog.Translog.Location) HashSet(java.util.HashSet) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) AlreadyClosedException(org.apache.lucene.store.AlreadyClosedException) EOFException(java.io.EOFException) InvalidPathException(java.nio.file.InvalidPathException) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) CyclicBarrier(java.util.concurrent.CyclicBarrier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AtomicLong(java.util.concurrent.atomic.AtomicLong) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Location(org.elasticsearch.index.translog.Translog.Location) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 28 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class LongGCDisruption method startDisrupting.

@Override
public synchronized void startDisrupting() {
    if (suspendedThreads == null) {
        boolean success = false;
        try {
            suspendedThreads = ConcurrentHashMap.newKeySet();
            final String currentThreadName = Thread.currentThread().getName();
            assert isDisruptedNodeThread(currentThreadName) == false : "current thread match pattern. thread name: " + currentThreadName + ", node: " + disruptedNode;
            // we spawn a background thread to protect against deadlock which can happen
            // if there are shared resources between caller thread and and suspended threads
            // see unsafeClasses to how to avoid that
            final AtomicReference<Exception> stoppingError = new AtomicReference<>();
            final Thread stoppingThread = new Thread(new AbstractRunnable() {

                @Override
                public void onFailure(Exception e) {
                    stoppingError.set(e);
                }

                @Override
                protected void doRun() throws Exception {
                    // keep trying to stop threads, until no new threads are discovered.
                    while (stopNodeThreads(suspendedThreads)) {
                        if (Thread.interrupted()) {
                            return;
                        }
                    }
                }
            });
            stoppingThread.setName(currentThreadName + "[LongGCDisruption][threadStopper]");
            stoppingThread.start();
            try {
                stoppingThread.join(getStoppingTimeoutInMillis());
            } catch (InterruptedException e) {
                // best effort to signal stopping
                stoppingThread.interrupt();
                throw new RuntimeException(e);
            }
            if (stoppingError.get() != null) {
                throw new RuntimeException("unknown error while stopping threads", stoppingError.get());
            }
            if (stoppingThread.isAlive()) {
                logger.warn("failed to stop node [{}]'s threads within [{}] millis. Stopping thread stack trace:\n {}", disruptedNode, getStoppingTimeoutInMillis(), stackTrace(stoppingThread.getStackTrace()));
                // best effort;
                stoppingThread.interrupt();
                throw new RuntimeException("stopping node threads took too long");
            }
            // of the threads that was suspended
            if (isBlockDetectionSupported()) {
                blockDetectionThread = new Thread(new AbstractRunnable() {

                    @Override
                    public void onFailure(Exception e) {
                        if (e instanceof InterruptedException == false) {
                            throw new AssertionError("unexpected exception in blockDetectionThread", e);
                        }
                    }

                    @Override
                    protected void doRun() throws Exception {
                        while (Thread.currentThread().isInterrupted() == false) {
                            ThreadInfo[] threadInfos = threadBean.dumpAllThreads(true, true);
                            for (ThreadInfo threadInfo : threadInfos) {
                                if (isDisruptedNodeThread(threadInfo.getThreadName()) == false && threadInfo.getLockOwnerName() != null && isDisruptedNodeThread(threadInfo.getLockOwnerName())) {
                                    // find ThreadInfo object of the blocking thread (if available)
                                    ThreadInfo blockingThreadInfo = null;
                                    for (ThreadInfo otherThreadInfo : threadInfos) {
                                        if (otherThreadInfo.getThreadId() == threadInfo.getLockOwnerId()) {
                                            blockingThreadInfo = otherThreadInfo;
                                            break;
                                        }
                                    }
                                    onBlockDetected(threadInfo, blockingThreadInfo);
                                }
                            }
                            Thread.sleep(getBlockDetectionIntervalInMillis());
                        }
                    }
                });
                blockDetectionThread.setName(currentThreadName + "[LongGCDisruption][blockDetection]");
                blockDetectionThread.start();
            }
            success = true;
        } finally {
            if (success == false) {
                stopBlockDetection();
                // resume threads if failed
                resumeThreads(suspendedThreads);
                suspendedThreads = null;
            }
        }
    } else {
        throw new IllegalStateException("can't disrupt twice, call stopDisrupting() first");
    }
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) AtomicReference(java.util.concurrent.atomic.AtomicReference) ThreadInfo(java.lang.management.ThreadInfo)

Example 29 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class LocalCheckpointTrackerTests method testConcurrentReplica.

public void testConcurrentReplica() throws InterruptedException {
    Thread[] threads = new Thread[randomIntBetween(2, 5)];
    final int opsPerThread = randomIntBetween(10, 20);
    final int maxOps = opsPerThread * threads.length;
    // make sure we always index the last seqNo to simplify maxSeq checks
    final long unFinishedSeq = randomIntBetween(0, maxOps - 2);
    Set<Integer> seqNos = IntStream.range(0, maxOps).boxed().collect(Collectors.toSet());
    final Integer[][] seqNoPerThread = new Integer[threads.length][];
    for (int t = 0; t < threads.length - 1; t++) {
        int size = Math.min(seqNos.size(), randomIntBetween(opsPerThread - 4, opsPerThread + 4));
        seqNoPerThread[t] = randomSubsetOf(size, seqNos).toArray(new Integer[size]);
        seqNos.removeAll(Arrays.asList(seqNoPerThread[t]));
    }
    seqNoPerThread[threads.length - 1] = seqNos.toArray(new Integer[seqNos.size()]);
    logger.info("--> will run [{}] threads, maxOps [{}], unfinished seq no [{}]", threads.length, maxOps, unFinishedSeq);
    final CyclicBarrier barrier = new CyclicBarrier(threads.length);
    for (int t = 0; t < threads.length; t++) {
        final int threadId = t;
        threads[t] = new Thread(new AbstractRunnable() {

            @Override
            public void onFailure(Exception e) {
                throw new ElasticsearchException("failure in background thread", e);
            }

            @Override
            protected void doRun() throws Exception {
                barrier.await();
                Integer[] ops = seqNoPerThread[threadId];
                for (int seqNo : ops) {
                    if (seqNo != unFinishedSeq) {
                        tracker.markSeqNoAsCompleted(seqNo);
                        logger.info("[t{}] completed [{}]", threadId, seqNo);
                    }
                }
            }
        }, "testConcurrentReplica_" + threadId);
        threads[t].start();
    }
    for (Thread thread : threads) {
        thread.join();
    }
    assertThat(tracker.getMaxSeqNo(), equalTo(maxOps - 1L));
    assertThat(tracker.getCheckpoint(), equalTo(unFinishedSeq - 1L));
    tracker.markSeqNoAsCompleted(unFinishedSeq);
    assertThat(tracker.getCheckpoint(), equalTo(maxOps - 1L));
    assertThat(tracker.firstProcessedSeqNo, equalTo(((long) maxOps / SMALL_CHUNK_SIZE) * SMALL_CHUNK_SIZE));
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) ElasticsearchException(org.elasticsearch.ElasticsearchException) ElasticsearchException(org.elasticsearch.ElasticsearchException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) CyclicBarrier(java.util.concurrent.CyclicBarrier)

Example 30 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class MockTransportService method addUnresponsiveRule.

/**
     * Adds a rule that will cause ignores each send request, simulating an unresponsive node
     * and failing to connect once the rule was added.
     *
     * @param duration the amount of time to delay sending and connecting.
     */
public void addUnresponsiveRule(TransportAddress transportAddress, final TimeValue duration) {
    final long startTime = System.currentTimeMillis();
    addDelegate(transportAddress, new ClearableTransport(original) {

        private final Queue<Runnable> requestsToSendWhenCleared = new LinkedBlockingDeque<Runnable>();

        private boolean cleared = false;

        TimeValue getDelay() {
            return new TimeValue(duration.millis() - (System.currentTimeMillis() - startTime));
        }

        @Override
        public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile, CheckedBiConsumer<Connection, ConnectionProfile, IOException> connectionValidator) throws ConnectTransportException {
            if (original.nodeConnected(node)) {
                // connecting to an already connected node is a no-op
                return;
            }
            TimeValue delay = getDelay();
            if (delay.millis() <= 0) {
                original.connectToNode(node, connectionProfile, connectionValidator);
                return;
            }
            // TODO: Replace with proper setting
            TimeValue connectingTimeout = NetworkService.TcpSettings.TCP_CONNECT_TIMEOUT.getDefault(Settings.EMPTY);
            try {
                if (delay.millis() < connectingTimeout.millis()) {
                    Thread.sleep(delay.millis());
                    original.connectToNode(node, connectionProfile, connectionValidator);
                } else {
                    Thread.sleep(connectingTimeout.millis());
                    throw new ConnectTransportException(node, "UNRESPONSIVE: simulated");
                }
            } catch (InterruptedException e) {
                throw new ConnectTransportException(node, "UNRESPONSIVE: simulated");
            }
        }

        @Override
        protected void sendRequest(Connection connection, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException {
            // delayed sending - even if larger then the request timeout to simulated a potential late response from target node
            TimeValue delay = getDelay();
            if (delay.millis() <= 0) {
                connection.sendRequest(requestId, action, request, options);
                return;
            }
            // poor mans request cloning...
            RequestHandlerRegistry reg = MockTransportService.this.getRequestHandler(action);
            BytesStreamOutput bStream = new BytesStreamOutput();
            request.writeTo(bStream);
            final TransportRequest clonedRequest = reg.newRequest();
            clonedRequest.readFrom(bStream.bytes().streamInput());
            Runnable runnable = new AbstractRunnable() {

                AtomicBoolean requestSent = new AtomicBoolean();

                @Override
                public void onFailure(Exception e) {
                    logger.debug("failed to send delayed request", e);
                }

                @Override
                protected void doRun() throws IOException {
                    if (requestSent.compareAndSet(false, true)) {
                        connection.sendRequest(requestId, action, clonedRequest, options);
                    }
                }
            };
            // store the request to send it once the rule is cleared.
            synchronized (this) {
                if (cleared) {
                    runnable.run();
                } else {
                    requestsToSendWhenCleared.add(runnable);
                    threadPool.schedule(delay, ThreadPool.Names.GENERIC, runnable);
                }
            }
        }

        @Override
        public void clearRule() {
            synchronized (this) {
                assert cleared == false;
                cleared = true;
                requestsToSendWhenCleared.forEach(Runnable::run);
            }
        }
    });
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) LinkedBlockingDeque(java.util.concurrent.LinkedBlockingDeque) TransportRequest(org.elasticsearch.transport.TransportRequest) ConnectionProfile(org.elasticsearch.transport.ConnectionProfile) IOException(java.io.IOException) BytesStreamOutput(org.elasticsearch.common.io.stream.BytesStreamOutput) ConnectTransportException(org.elasticsearch.transport.ConnectTransportException) TransportException(org.elasticsearch.transport.TransportException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) RequestHandlerRegistry(org.elasticsearch.transport.RequestHandlerRegistry) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ConnectTransportException(org.elasticsearch.transport.ConnectTransportException) AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) TransportRequestOptions(org.elasticsearch.transport.TransportRequestOptions) TimeValue(org.elasticsearch.common.unit.TimeValue)

Aggregations

AbstractRunnable (org.elasticsearch.common.util.concurrent.AbstractRunnable)33 IOException (java.io.IOException)19 ExecutionException (java.util.concurrent.ExecutionException)11 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)10 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)9 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)9 CountDownLatch (java.util.concurrent.CountDownLatch)8 CyclicBarrier (java.util.concurrent.CyclicBarrier)8 AtomicReference (java.util.concurrent.atomic.AtomicReference)8 TimeValue (org.elasticsearch.common.unit.TimeValue)8 ElasticsearchException (org.elasticsearch.ElasticsearchException)7 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)6 Supplier (org.apache.logging.log4j.util.Supplier)6 EsRejectedExecutionException (org.elasticsearch.common.util.concurrent.EsRejectedExecutionException)6 TestThreadPool (org.elasticsearch.threadpool.TestThreadPool)5 UnknownHostException (java.net.UnknownHostException)4 ArrayList (java.util.ArrayList)4 AlreadyClosedException (org.apache.lucene.store.AlreadyClosedException)4 ClusterState (org.elasticsearch.cluster.ClusterState)4 NotMasterException (org.elasticsearch.cluster.NotMasterException)4