Search in sources :

Example 1 with LocalClusterUpdateTask

use of org.elasticsearch.cluster.LocalClusterUpdateTask in project elasticsearch by elastic.

the class SlowClusterStateProcessing method interruptClusterStateProcessing.

private boolean interruptClusterStateProcessing(final TimeValue duration) throws InterruptedException {
    final String disruptionNodeCopy = disruptedNode;
    if (disruptionNodeCopy == null) {
        return false;
    }
    logger.info("delaying cluster state updates on node [{}] for [{}]", disruptionNodeCopy, duration);
    final CountDownLatch countDownLatch = new CountDownLatch(1);
    ClusterService clusterService = cluster.getInstance(ClusterService.class, disruptionNodeCopy);
    if (clusterService == null) {
        return false;
    }
    final AtomicBoolean stopped = new AtomicBoolean(false);
    clusterService.submitStateUpdateTask("service_disruption_delay", new LocalClusterUpdateTask(Priority.IMMEDIATE) {

        @Override
        public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) throws Exception {
            long count = duration.millis() / 200;
            // wait while checking for a stopped
            for (; count > 0 && !stopped.get(); count--) {
                Thread.sleep(200);
            }
            if (!stopped.get()) {
                Thread.sleep(duration.millis() % 200);
            }
            countDownLatch.countDown();
            return unchanged();
        }

        @Override
        public void onFailure(String source, Exception e) {
            countDownLatch.countDown();
        }
    });
    try {
        countDownLatch.await();
    } catch (InterruptedException e) {
        stopped.set(true);
        // try to wait again, we really want the cluster state thread to be freed up when stopping disruption
        countDownLatch.await();
    }
    return true;
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ClusterState(org.elasticsearch.cluster.ClusterState) ClusterService(org.elasticsearch.cluster.service.ClusterService) LocalClusterUpdateTask(org.elasticsearch.cluster.LocalClusterUpdateTask) CountDownLatch(java.util.concurrent.CountDownLatch)

Example 2 with LocalClusterUpdateTask

use of org.elasticsearch.cluster.LocalClusterUpdateTask in project elasticsearch by elastic.

the class ZenDiscovery method processNextPendingClusterState.

void processNextPendingClusterState(String reason) {
    clusterService.submitStateUpdateTask("zen-disco-receive(from master [" + reason + "])", new LocalClusterUpdateTask(Priority.URGENT) {

        ClusterState newClusterState = null;

        @Override
        public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) {
            newClusterState = publishClusterState.pendingStatesQueue().getNextClusterStateToProcess();
            // all pending states have been processed
            if (newClusterState == null) {
                return unchanged();
            }
            assert newClusterState.nodes().getMasterNode() != null : "received a cluster state without a master";
            assert !newClusterState.blocks().hasGlobalBlock(discoverySettings.getNoMasterBlock()) : "received a cluster state with a master block";
            if (currentState.nodes().isLocalNodeElectedMaster()) {
                return handleAnotherMaster(currentState, newClusterState.nodes().getMasterNode(), newClusterState.version(), "via a new cluster state");
            }
            if (shouldIgnoreOrRejectNewClusterState(logger, currentState, newClusterState)) {
                return unchanged();
            }
            if (currentState.blocks().hasGlobalBlock(discoverySettings.getNoMasterBlock())) {
                // its a fresh update from the master as we transition from a start of not having a master to having one
                logger.debug("got first state from fresh master [{}]", newClusterState.nodes().getMasterNodeId());
                return newState(newClusterState);
            }
            // some optimizations to make sure we keep old objects where possible
            ClusterState.Builder builder = ClusterState.builder(newClusterState);
            // if the routing table did not change, use the original one
            if (newClusterState.routingTable().version() == currentState.routingTable().version()) {
                builder.routingTable(currentState.routingTable());
            }
            // same for metadata
            if (newClusterState.metaData().version() == currentState.metaData().version()) {
                builder.metaData(currentState.metaData());
            } else {
                // if its not the same version, only copy over new indices or ones that changed the version
                MetaData.Builder metaDataBuilder = MetaData.builder(newClusterState.metaData()).removeAllIndices();
                for (IndexMetaData indexMetaData : newClusterState.metaData()) {
                    IndexMetaData currentIndexMetaData = currentState.metaData().index(indexMetaData.getIndex());
                    if (currentIndexMetaData != null && currentIndexMetaData.isSameUUID(indexMetaData.getIndexUUID()) && currentIndexMetaData.getVersion() == indexMetaData.getVersion()) {
                        // safe to reuse
                        metaDataBuilder.put(currentIndexMetaData, false);
                    } else {
                        metaDataBuilder.put(indexMetaData, false);
                    }
                }
                builder.metaData(metaDataBuilder);
            }
            return newState(builder.build());
        }

        @Override
        public void onFailure(String source, Exception e) {
            logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
            if (newClusterState != null) {
                try {
                    publishClusterState.pendingStatesQueue().markAsFailed(newClusterState, e);
                } catch (Exception inner) {
                    inner.addSuppressed(e);
                    logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected exception while failing [{}]", source), inner);
                }
            }
        }

        @Override
        public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
            try {
                if (newClusterState != null) {
                    // check to see that we monitor the correct master of the cluster
                    if (masterFD.masterNode() == null || !masterFD.masterNode().equals(newClusterState.nodes().getMasterNode())) {
                        masterFD.restart(newClusterState.nodes().getMasterNode(), "new cluster state received and we are monitoring the wrong master [" + masterFD.masterNode() + "]");
                    }
                    publishClusterState.pendingStatesQueue().markAsProcessed(newClusterState);
                }
            } catch (Exception e) {
                onFailure(source, e);
            }
        }
    });
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) LocalClusterUpdateTask(org.elasticsearch.cluster.LocalClusterUpdateTask) Supplier(org.apache.logging.log4j.util.Supplier) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ElasticsearchException(org.elasticsearch.ElasticsearchException) TransportException(org.elasticsearch.transport.TransportException) NotMasterException(org.elasticsearch.cluster.NotMasterException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData)

Example 3 with LocalClusterUpdateTask

use of org.elasticsearch.cluster.LocalClusterUpdateTask in project elasticsearch by elastic.

the class ZenDiscovery method innerJoinCluster.

/**
     * the main function of a join thread. This function is guaranteed to join the cluster
     * or spawn a new join thread upon failure to do so.
     */
private void innerJoinCluster() {
    DiscoveryNode masterNode = null;
    final Thread currentThread = Thread.currentThread();
    nodeJoinController.startElectionContext();
    while (masterNode == null && joinThreadControl.joinThreadActive(currentThread)) {
        masterNode = findMaster();
    }
    if (!joinThreadControl.joinThreadActive(currentThread)) {
        logger.trace("thread is no longer in currentJoinThread. Stopping.");
        return;
    }
    if (clusterService.localNode().equals(masterNode)) {
        // we count as one
        final int requiredJoins = Math.max(0, electMaster.minimumMasterNodes() - 1);
        logger.debug("elected as master, waiting for incoming joins ([{}] needed)", requiredJoins);
        nodeJoinController.waitToBeElectedAsMaster(requiredJoins, masterElectionWaitForJoinsTimeout, new NodeJoinController.ElectionCallback() {

            @Override
            public void onElectedAsMaster(ClusterState state) {
                joinThreadControl.markThreadAsDone(currentThread);
                // we only starts nodesFD if we are master (it may be that we received a cluster state while pinging)
                // start the nodes FD
                nodesFD.updateNodesAndPing(state);
            }

            @Override
            public void onFailure(Throwable t) {
                logger.trace("failed while waiting for nodes to join, rejoining", t);
                joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
            }
        });
    } else {
        // process any incoming joins (they will fail because we are not the master)
        nodeJoinController.stopElectionContext(masterNode + " elected");
        // send join request
        final boolean success = joinElectedMaster(masterNode);
        // finalize join through the cluster state update thread
        final DiscoveryNode finalMasterNode = masterNode;
        clusterService.submitStateUpdateTask("finalize_join (" + masterNode + ")", new LocalClusterUpdateTask() {

            @Override
            public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) throws Exception {
                if (!success) {
                    // failed to join. Try again...
                    joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
                    return unchanged();
                }
                if (currentState.getNodes().getMasterNode() == null) {
                    // Post 1.3.0, the master should publish a new cluster state before acking our join request. we now should have
                    // a valid master.
                    logger.debug("no master node is set, despite of join request completing. retrying pings.");
                    joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
                    return unchanged();
                }
                if (!currentState.getNodes().getMasterNode().equals(finalMasterNode)) {
                    return joinThreadControl.stopRunningThreadAndRejoin(currentState, "master_switched_while_finalizing_join");
                }
                // Note: we do not have to start master fault detection here because it's set at {@link #processNextPendingClusterState }
                // when the first cluster state arrives.
                joinThreadControl.markThreadAsDone(currentThread);
                return unchanged();
            }

            @Override
            public void onFailure(String source, @Nullable Exception e) {
                logger.error("unexpected error while trying to finalize cluster join", e);
                joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
            }
        });
    }
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) LocalClusterUpdateTask(org.elasticsearch.cluster.LocalClusterUpdateTask) ElasticsearchException(org.elasticsearch.ElasticsearchException) TransportException(org.elasticsearch.transport.TransportException) NotMasterException(org.elasticsearch.cluster.NotMasterException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 4 with LocalClusterUpdateTask

use of org.elasticsearch.cluster.LocalClusterUpdateTask in project elasticsearch by elastic.

the class BlockClusterStateProcessing method startDisrupting.

@Override
public void startDisrupting() {
    final String disruptionNodeCopy = disruptedNode;
    if (disruptionNodeCopy == null) {
        return;
    }
    ClusterService clusterService = cluster.getInstance(ClusterService.class, disruptionNodeCopy);
    if (clusterService == null) {
        return;
    }
    logger.info("delaying cluster state updates on node [{}]", disruptionNodeCopy);
    boolean success = disruptionLatch.compareAndSet(null, new CountDownLatch(1));
    assert success : "startDisrupting called without waiting on stopDisrupting to complete";
    final CountDownLatch started = new CountDownLatch(1);
    clusterService.submitStateUpdateTask("service_disruption_block", new LocalClusterUpdateTask(Priority.IMMEDIATE) {

        @Override
        public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) throws Exception {
            started.countDown();
            CountDownLatch latch = disruptionLatch.get();
            if (latch != null) {
                latch.await();
            }
            return unchanged();
        }

        @Override
        public void onFailure(String source, Exception e) {
            logger.error("unexpected error during disruption", e);
        }
    });
    try {
        started.await();
    } catch (InterruptedException e) {
    }
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) ClusterService(org.elasticsearch.cluster.service.ClusterService) LocalClusterUpdateTask(org.elasticsearch.cluster.LocalClusterUpdateTask) CountDownLatch(java.util.concurrent.CountDownLatch)

Example 5 with LocalClusterUpdateTask

use of org.elasticsearch.cluster.LocalClusterUpdateTask in project elasticsearch by elastic.

the class ZenDiscovery method handleMasterGone.

private void handleMasterGone(final DiscoveryNode masterNode, final Throwable cause, final String reason) {
    if (lifecycleState() != Lifecycle.State.STARTED) {
        // not started, ignore a master failure
        return;
    }
    if (localNodeMaster()) {
        // we might get this on both a master telling us shutting down, and then the disconnect failure
        return;
    }
    logger.info((Supplier<?>) () -> new ParameterizedMessage("master_left [{}], reason [{}]", masterNode, reason), cause);
    clusterService.submitStateUpdateTask("master_failed (" + masterNode + ")", new LocalClusterUpdateTask(Priority.IMMEDIATE) {

        @Override
        public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) {
            if (!masterNode.equals(currentState.nodes().getMasterNode())) {
                // master got switched on us, no need to send anything
                return unchanged();
            }
            // flush any pending cluster states from old master, so it will not be set as master again
            publishClusterState.pendingStatesQueue().failAllStatesAndClear(new ElasticsearchException("master left [{}]", reason));
            return rejoin(currentState, "master left (reason = " + reason + ")");
        }

        @Override
        public void onFailure(String source, Exception e) {
            logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
        }
    });
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) LocalClusterUpdateTask(org.elasticsearch.cluster.LocalClusterUpdateTask) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) Supplier(org.apache.logging.log4j.util.Supplier) ElasticsearchException(org.elasticsearch.ElasticsearchException) ElasticsearchException(org.elasticsearch.ElasticsearchException) TransportException(org.elasticsearch.transport.TransportException) NotMasterException(org.elasticsearch.cluster.NotMasterException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

ClusterState (org.elasticsearch.cluster.ClusterState)11 LocalClusterUpdateTask (org.elasticsearch.cluster.LocalClusterUpdateTask)11 IOException (java.io.IOException)6 CountDownLatch (java.util.concurrent.CountDownLatch)6 ElasticsearchException (org.elasticsearch.ElasticsearchException)6 ExecutionException (java.util.concurrent.ExecutionException)5 NotMasterException (org.elasticsearch.cluster.NotMasterException)4 TransportException (org.elasticsearch.transport.TransportException)4 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)3 Supplier (org.apache.logging.log4j.util.Supplier)3 ClusterService (org.elasticsearch.cluster.service.ClusterService)3 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)2 ClusterStateUpdateTask (org.elasticsearch.cluster.ClusterStateUpdateTask)2 Matchers.containsString (org.hamcrest.Matchers.containsString)2 HashSet (java.util.HashSet)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 ClusterHealthRequest (org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest)1 ClusterHealthResponse (org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse)1 TransportClusterHealthAction (org.elasticsearch.action.admin.cluster.health.TransportClusterHealthAction)1 ClusterStateResponse (org.elasticsearch.action.admin.cluster.state.ClusterStateResponse)1