Search in sources :

Example 21 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class GatewayService method performStateRecovery.

private void performStateRecovery(boolean enforceRecoverAfterTime, String reason) {
    final Gateway.GatewayStateRecoveredListener recoveryListener = new GatewayRecoveryListener();
    if (enforceRecoverAfterTime && recoverAfterTime != null) {
        if (scheduledRecovery.compareAndSet(false, true)) {
            logger.info("delaying initial state recovery for [{}]. {}", recoverAfterTime, reason);
            threadPool.schedule(recoverAfterTime, ThreadPool.Names.GENERIC, () -> {
                if (recovered.compareAndSet(false, true)) {
                    logger.info("recover_after_time [{}] elapsed. performing state recovery...", recoverAfterTime);
                    gateway.performStateRecovery(recoveryListener);
                }
            });
        }
    } else {
        if (recovered.compareAndSet(false, true)) {
            threadPool.generic().execute(new AbstractRunnable() {

                @Override
                public void onFailure(Exception e) {
                    logger.warn("Recovery failed", e);
                    // we reset `recovered` in the listener don't reset it here otherwise there might be a race
                    // that resets it to false while a new recover is already running?
                    recoveryListener.onFailure("state recovery failed: " + e.getMessage());
                }

                @Override
                protected void doRun() throws Exception {
                    gateway.performStateRecovery(recoveryListener);
                }
            });
        }
    }
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable)

Example 22 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class SnapshotShardsService method processIndexShardSnapshots.

/**
     * Checks if any new shards should be snapshotted on this node
     *
     * @param event cluster state changed event
     */
private void processIndexShardSnapshots(ClusterChangedEvent event) {
    SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
    Map<Snapshot, SnapshotShards> survivors = new HashMap<>();
    // First, remove snapshots that are no longer there
    for (Map.Entry<Snapshot, SnapshotShards> entry : shardSnapshots.entrySet()) {
        final Snapshot snapshot = entry.getKey();
        if (snapshotsInProgress != null && snapshotsInProgress.snapshot(snapshot) != null) {
            survivors.put(entry.getKey(), entry.getValue());
        } else {
            // state update, which is being processed here
            for (IndexShardSnapshotStatus snapshotStatus : entry.getValue().shards.values()) {
                if (snapshotStatus.stage() == Stage.INIT || snapshotStatus.stage() == Stage.STARTED) {
                    snapshotStatus.abort();
                }
            }
        }
    }
    // For now we will be mostly dealing with a single snapshot at a time but might have multiple simultaneously running
    // snapshots in the future
    Map<Snapshot, Map<ShardId, IndexShardSnapshotStatus>> newSnapshots = new HashMap<>();
    // Now go through all snapshots and update existing or create missing
    final String localNodeId = event.state().nodes().getLocalNodeId();
    final DiscoveryNode masterNode = event.state().nodes().getMasterNode();
    final Map<Snapshot, Map<String, IndexId>> snapshotIndices = new HashMap<>();
    if (snapshotsInProgress != null) {
        for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
            snapshotIndices.put(entry.snapshot(), entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())));
            if (entry.state() == State.STARTED) {
                Map<ShardId, IndexShardSnapshotStatus> startedShards = new HashMap<>();
                SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshot());
                for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
                    // Add all new shards to start processing on
                    if (localNodeId.equals(shard.value.nodeId())) {
                        if (shard.value.state() == State.INIT && (snapshotShards == null || !snapshotShards.shards.containsKey(shard.key))) {
                            logger.trace("[{}] - Adding shard to the queue", shard.key);
                            startedShards.put(shard.key, new IndexShardSnapshotStatus());
                        }
                    }
                }
                if (!startedShards.isEmpty()) {
                    newSnapshots.put(entry.snapshot(), startedShards);
                    if (snapshotShards != null) {
                        // We already saw this snapshot but we need to add more started shards
                        Map<ShardId, IndexShardSnapshotStatus> shards = new HashMap<>();
                        // Put all shards that were already running on this node
                        shards.putAll(snapshotShards.shards);
                        // Put all newly started shards
                        shards.putAll(startedShards);
                        survivors.put(entry.snapshot(), new SnapshotShards(unmodifiableMap(shards)));
                    } else {
                        // Brand new snapshot that we haven't seen before
                        survivors.put(entry.snapshot(), new SnapshotShards(unmodifiableMap(startedShards)));
                    }
                }
            } else if (entry.state() == State.ABORTED) {
                // Abort all running shards for this snapshot
                SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshot());
                if (snapshotShards != null) {
                    for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shard : entry.shards()) {
                        IndexShardSnapshotStatus snapshotStatus = snapshotShards.shards.get(shard.key);
                        if (snapshotStatus != null) {
                            switch(snapshotStatus.stage()) {
                                case INIT:
                                case STARTED:
                                    snapshotStatus.abort();
                                    break;
                                case FINALIZE:
                                    logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, letting it finish", entry.snapshot(), shard.key);
                                    break;
                                case DONE:
                                    logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, updating status on the master", entry.snapshot(), shard.key);
                                    updateIndexShardSnapshotStatus(entry.snapshot(), shard.key, new ShardSnapshotStatus(localNodeId, State.SUCCESS), masterNode);
                                    break;
                                case FAILURE:
                                    logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, updating status on the master", entry.snapshot(), shard.key);
                                    updateIndexShardSnapshotStatus(entry.snapshot(), shard.key, new ShardSnapshotStatus(localNodeId, State.FAILED, snapshotStatus.failure()), masterNode);
                                    break;
                                default:
                                    throw new IllegalStateException("Unknown snapshot shard stage " + snapshotStatus.stage());
                            }
                        }
                    }
                }
            }
        }
    }
    // Update the list of snapshots that we saw and tried to started
    // If startup of these shards fails later, we don't want to try starting these shards again
    shutdownLock.lock();
    try {
        shardSnapshots = unmodifiableMap(survivors);
        if (shardSnapshots.isEmpty()) {
            // Notify all waiting threads that no more snapshots
            shutdownCondition.signalAll();
        }
    } finally {
        shutdownLock.unlock();
    }
    // We have new shards to starts
    if (newSnapshots.isEmpty() == false) {
        Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
        for (final Map.Entry<Snapshot, Map<ShardId, IndexShardSnapshotStatus>> entry : newSnapshots.entrySet()) {
            Map<String, IndexId> indicesMap = snapshotIndices.get(entry.getKey());
            assert indicesMap != null;
            for (final Map.Entry<ShardId, IndexShardSnapshotStatus> shardEntry : entry.getValue().entrySet()) {
                final ShardId shardId = shardEntry.getKey();
                try {
                    final IndexShard indexShard = indicesService.indexServiceSafe(shardId.getIndex()).getShardOrNull(shardId.id());
                    final IndexId indexId = indicesMap.get(shardId.getIndexName());
                    assert indexId != null;
                    executor.execute(new AbstractRunnable() {

                        @Override
                        public void doRun() {
                            snapshot(indexShard, entry.getKey(), indexId, shardEntry.getValue());
                            updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.SUCCESS), masterNode);
                        }

                        @Override
                        public void onFailure(Exception e) {
                            logger.warn((Supplier<?>) () -> new ParameterizedMessage("[{}] [{}] failed to create snapshot", shardId, entry.getKey()), e);
                            updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.FAILED, ExceptionsHelper.detailedMessage(e)), masterNode);
                        }
                    });
                } catch (Exception e) {
                    updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, State.FAILED, ExceptionsHelper.detailedMessage(e)), masterNode);
                }
            }
        }
    }
}
Also used : IndexShardSnapshotStatus(org.elasticsearch.index.snapshots.IndexShardSnapshotStatus) AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) HashMap(java.util.HashMap) ShardId(org.elasticsearch.index.shard.ShardId) Executor(java.util.concurrent.Executor) ClusterStateTaskExecutor(org.elasticsearch.cluster.ClusterStateTaskExecutor) Supplier(org.apache.logging.log4j.util.Supplier) IndexId(org.elasticsearch.repositories.IndexId) IndexShard(org.elasticsearch.index.shard.IndexShard) IndexShardSnapshotFailedException(org.elasticsearch.index.snapshots.IndexShardSnapshotFailedException) SnapshotFailedEngineException(org.elasticsearch.index.engine.SnapshotFailedEngineException) IOException(java.io.IOException) SnapshotsInProgress(org.elasticsearch.cluster.SnapshotsInProgress) ShardSnapshotStatus(org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) IndexShardSnapshotStatus(org.elasticsearch.index.snapshots.IndexShardSnapshotStatus) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ObjectObjectCursor(com.carrotsearch.hppc.cursors.ObjectObjectCursor) Map(java.util.Map) ImmutableOpenMap(org.elasticsearch.common.collect.ImmutableOpenMap) HashMap(java.util.HashMap) Collections.emptyMap(java.util.Collections.emptyMap) Collections.unmodifiableMap(java.util.Collections.unmodifiableMap)

Example 23 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class WorkingBulkByScrollTaskTests method testDelayAndRethrottle.

/**
     * Furiously rethrottles a delayed request to make sure that we never run it twice.
     */
public void testDelayAndRethrottle() throws IOException, InterruptedException {
    List<Throwable> errors = new CopyOnWriteArrayList<>();
    AtomicBoolean done = new AtomicBoolean();
    int threads = between(1, 10);
    CyclicBarrier waitForShutdown = new CyclicBarrier(threads);
    /*
         * We never end up waiting this long because the test rethrottles over and over again, ratcheting down the delay a random amount
         * each time.
         */
    float originalRequestsPerSecond = (float) randomDoubleBetween(1, 10000, true);
    task.rethrottle(originalRequestsPerSecond);
    TimeValue maxDelay = timeValueSeconds(between(1, 5));
    assertThat(maxDelay.nanos(), greaterThanOrEqualTo(0L));
    int batchSizeForMaxDelay = (int) (maxDelay.seconds() * originalRequestsPerSecond);
    ThreadPool threadPool = new TestThreadPool(getTestName()) {

        @Override
        public ScheduledFuture<?> schedule(TimeValue delay, String name, Runnable command) {
            assertThat(delay.nanos(), both(greaterThanOrEqualTo(0L)).and(lessThanOrEqualTo(maxDelay.nanos())));
            return super.schedule(delay, name, command);
        }
    };
    try {
        task.delayPrepareBulkRequest(threadPool, timeValueNanos(System.nanoTime()), batchSizeForMaxDelay, new AbstractRunnable() {

            @Override
            protected void doRun() throws Exception {
                boolean oldValue = done.getAndSet(true);
                if (oldValue) {
                    throw new RuntimeException("Ran twice oh no!");
                }
            }

            @Override
            public void onFailure(Exception e) {
                errors.add(e);
            }
        });
        // Rethrottle on a random number of threads, on of which is this thread.
        Runnable test = () -> {
            try {
                int rethrottles = 0;
                while (false == done.get()) {
                    float requestsPerSecond = (float) randomDoubleBetween(0, originalRequestsPerSecond * 2, true);
                    task.rethrottle(requestsPerSecond);
                    rethrottles += 1;
                }
                logger.info("Rethrottled [{}] times", rethrottles);
                waitForShutdown.await();
            } catch (Exception e) {
                errors.add(e);
            }
        };
        for (int i = 1; i < threads; i++) {
            threadPool.generic().execute(test);
        }
        test.run();
    } finally {
        // Other threads should finish up quickly as they are checking the same AtomicBoolean.
        threadPool.shutdown();
        threadPool.awaitTermination(10, TimeUnit.SECONDS);
    }
    assertThat(errors, empty());
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) ThreadPool(org.elasticsearch.threadpool.ThreadPool) TestThreadPool(org.elasticsearch.threadpool.TestThreadPool) TestThreadPool(org.elasticsearch.threadpool.TestThreadPool) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) CyclicBarrier(java.util.concurrent.CyclicBarrier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) TimeValue(org.elasticsearch.common.unit.TimeValue) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 24 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class ListenableActionFutureTests method testListenerIsCallableFromNetworkThreads.

public void testListenerIsCallableFromNetworkThreads() throws Throwable {
    ThreadPool threadPool = new TestThreadPool("testListenerIsCallableFromNetworkThreads");
    try {
        final PlainListenableActionFuture<Object> future = new PlainListenableActionFuture<>(threadPool);
        final CountDownLatch listenerCalled = new CountDownLatch(1);
        final AtomicReference<Throwable> error = new AtomicReference<>();
        final Object response = new Object();
        future.addListener(new ActionListener<Object>() {

            @Override
            public void onResponse(Object o) {
                listenerCalled.countDown();
            }

            @Override
            public void onFailure(Exception e) {
                error.set(e);
                listenerCalled.countDown();
            }
        });
        Thread networkThread = new Thread(new AbstractRunnable() {

            @Override
            public void onFailure(Exception e) {
                error.set(e);
                listenerCalled.countDown();
            }

            @Override
            protected void doRun() throws Exception {
                future.onResponse(response);
            }
        }, Transports.TEST_MOCK_TRANSPORT_THREAD_PREFIX + "_testListenerIsCallableFromNetworkThread");
        networkThread.start();
        networkThread.join();
        listenerCalled.await();
        if (error.get() != null) {
            throw error.get();
        }
    } finally {
        ThreadPool.terminate(threadPool, 10, TimeUnit.SECONDS);
    }
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) TestThreadPool(org.elasticsearch.threadpool.TestThreadPool) ThreadPool(org.elasticsearch.threadpool.ThreadPool) AtomicReference(java.util.concurrent.atomic.AtomicReference) TestThreadPool(org.elasticsearch.threadpool.TestThreadPool) CountDownLatch(java.util.concurrent.CountDownLatch)

Example 25 with AbstractRunnable

use of org.elasticsearch.common.util.concurrent.AbstractRunnable in project elasticsearch by elastic.

the class NodeJoinControllerTests method testSimpleMasterElectionWithoutRequiredJoins.

public void testSimpleMasterElectionWithoutRequiredJoins() throws InterruptedException, ExecutionException {
    DiscoveryNodes.Builder nodes = DiscoveryNodes.builder(clusterService.state().nodes()).masterNodeId(null);
    setState(clusterService, ClusterState.builder(clusterService.state()).nodes(nodes));
    int nodeId = 0;
    final int requiredJoins = 0;
    logger.debug("--> using requiredJoins [{}]", requiredJoins);
    // initial (failing) joins shouldn't count
    for (int i = randomInt(5); i > 0; i--) {
        try {
            joinNode(newNode(nodeId++));
            fail("failed to fail node join when not a master");
        } catch (ExecutionException e) {
            assertThat(e.getCause(), instanceOf(NotMasterException.class));
        }
    }
    nodeJoinController.startElectionContext();
    final SimpleFuture electionFuture = new SimpleFuture("master election");
    final Thread masterElection = new Thread(new AbstractRunnable() {

        @Override
        public void onFailure(Exception e) {
            logger.error("unexpected error from waitToBeElectedAsMaster", e);
            electionFuture.markAsFailed(e);
        }

        @Override
        protected void doRun() throws Exception {
            nodeJoinController.waitToBeElectedAsMaster(requiredJoins, TimeValue.timeValueHours(30), new NodeJoinController.ElectionCallback() {

                @Override
                public void onElectedAsMaster(ClusterState state) {
                    assertThat("callback called with elected as master, but state disagrees", state.nodes().isLocalNodeElectedMaster(), equalTo(true));
                    electionFuture.markAsDone();
                }

                @Override
                public void onFailure(Throwable t) {
                    logger.error("unexpected error while waiting to be elected as master", t);
                    electionFuture.markAsFailed(t);
                }
            });
        }
    });
    masterElection.start();
    logger.debug("--> requiredJoins is set to 0. verifying election finished");
    electionFuture.get();
}
Also used : AbstractRunnable(org.elasticsearch.common.util.concurrent.AbstractRunnable) ClusterState(org.elasticsearch.cluster.ClusterState) ExecutionException(java.util.concurrent.ExecutionException) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes) NotMasterException(org.elasticsearch.cluster.NotMasterException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

AbstractRunnable (org.elasticsearch.common.util.concurrent.AbstractRunnable)33 IOException (java.io.IOException)19 ExecutionException (java.util.concurrent.ExecutionException)11 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)10 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)9 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)9 CountDownLatch (java.util.concurrent.CountDownLatch)8 CyclicBarrier (java.util.concurrent.CyclicBarrier)8 AtomicReference (java.util.concurrent.atomic.AtomicReference)8 TimeValue (org.elasticsearch.common.unit.TimeValue)8 ElasticsearchException (org.elasticsearch.ElasticsearchException)7 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)6 Supplier (org.apache.logging.log4j.util.Supplier)6 EsRejectedExecutionException (org.elasticsearch.common.util.concurrent.EsRejectedExecutionException)6 TestThreadPool (org.elasticsearch.threadpool.TestThreadPool)5 UnknownHostException (java.net.UnknownHostException)4 ArrayList (java.util.ArrayList)4 AlreadyClosedException (org.apache.lucene.store.AlreadyClosedException)4 ClusterState (org.elasticsearch.cluster.ClusterState)4 NotMasterException (org.elasticsearch.cluster.NotMasterException)4