use of org.elasticsearch.cluster.LocalClusterUpdateTask in project elasticsearch by elastic.
the class SlowClusterStateProcessing method interruptClusterStateProcessing.
private boolean interruptClusterStateProcessing(final TimeValue duration) throws InterruptedException {
final String disruptionNodeCopy = disruptedNode;
if (disruptionNodeCopy == null) {
return false;
}
logger.info("delaying cluster state updates on node [{}] for [{}]", disruptionNodeCopy, duration);
final CountDownLatch countDownLatch = new CountDownLatch(1);
ClusterService clusterService = cluster.getInstance(ClusterService.class, disruptionNodeCopy);
if (clusterService == null) {
return false;
}
final AtomicBoolean stopped = new AtomicBoolean(false);
clusterService.submitStateUpdateTask("service_disruption_delay", new LocalClusterUpdateTask(Priority.IMMEDIATE) {
@Override
public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) throws Exception {
long count = duration.millis() / 200;
// wait while checking for a stopped
for (; count > 0 && !stopped.get(); count--) {
Thread.sleep(200);
}
if (!stopped.get()) {
Thread.sleep(duration.millis() % 200);
}
countDownLatch.countDown();
return unchanged();
}
@Override
public void onFailure(String source, Exception e) {
countDownLatch.countDown();
}
});
try {
countDownLatch.await();
} catch (InterruptedException e) {
stopped.set(true);
// try to wait again, we really want the cluster state thread to be freed up when stopping disruption
countDownLatch.await();
}
return true;
}
use of org.elasticsearch.cluster.LocalClusterUpdateTask in project elasticsearch by elastic.
the class ZenDiscovery method processNextPendingClusterState.
void processNextPendingClusterState(String reason) {
clusterService.submitStateUpdateTask("zen-disco-receive(from master [" + reason + "])", new LocalClusterUpdateTask(Priority.URGENT) {
ClusterState newClusterState = null;
@Override
public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) {
newClusterState = publishClusterState.pendingStatesQueue().getNextClusterStateToProcess();
// all pending states have been processed
if (newClusterState == null) {
return unchanged();
}
assert newClusterState.nodes().getMasterNode() != null : "received a cluster state without a master";
assert !newClusterState.blocks().hasGlobalBlock(discoverySettings.getNoMasterBlock()) : "received a cluster state with a master block";
if (currentState.nodes().isLocalNodeElectedMaster()) {
return handleAnotherMaster(currentState, newClusterState.nodes().getMasterNode(), newClusterState.version(), "via a new cluster state");
}
if (shouldIgnoreOrRejectNewClusterState(logger, currentState, newClusterState)) {
return unchanged();
}
if (currentState.blocks().hasGlobalBlock(discoverySettings.getNoMasterBlock())) {
// its a fresh update from the master as we transition from a start of not having a master to having one
logger.debug("got first state from fresh master [{}]", newClusterState.nodes().getMasterNodeId());
return newState(newClusterState);
}
// some optimizations to make sure we keep old objects where possible
ClusterState.Builder builder = ClusterState.builder(newClusterState);
// if the routing table did not change, use the original one
if (newClusterState.routingTable().version() == currentState.routingTable().version()) {
builder.routingTable(currentState.routingTable());
}
// same for metadata
if (newClusterState.metaData().version() == currentState.metaData().version()) {
builder.metaData(currentState.metaData());
} else {
// if its not the same version, only copy over new indices or ones that changed the version
MetaData.Builder metaDataBuilder = MetaData.builder(newClusterState.metaData()).removeAllIndices();
for (IndexMetaData indexMetaData : newClusterState.metaData()) {
IndexMetaData currentIndexMetaData = currentState.metaData().index(indexMetaData.getIndex());
if (currentIndexMetaData != null && currentIndexMetaData.isSameUUID(indexMetaData.getIndexUUID()) && currentIndexMetaData.getVersion() == indexMetaData.getVersion()) {
// safe to reuse
metaDataBuilder.put(currentIndexMetaData, false);
} else {
metaDataBuilder.put(indexMetaData, false);
}
}
builder.metaData(metaDataBuilder);
}
return newState(builder.build());
}
@Override
public void onFailure(String source, Exception e) {
logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
if (newClusterState != null) {
try {
publishClusterState.pendingStatesQueue().markAsFailed(newClusterState, e);
} catch (Exception inner) {
inner.addSuppressed(e);
logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected exception while failing [{}]", source), inner);
}
}
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
try {
if (newClusterState != null) {
// check to see that we monitor the correct master of the cluster
if (masterFD.masterNode() == null || !masterFD.masterNode().equals(newClusterState.nodes().getMasterNode())) {
masterFD.restart(newClusterState.nodes().getMasterNode(), "new cluster state received and we are monitoring the wrong master [" + masterFD.masterNode() + "]");
}
publishClusterState.pendingStatesQueue().markAsProcessed(newClusterState);
}
} catch (Exception e) {
onFailure(source, e);
}
}
});
}
use of org.elasticsearch.cluster.LocalClusterUpdateTask in project elasticsearch by elastic.
the class ZenDiscovery method innerJoinCluster.
/**
* the main function of a join thread. This function is guaranteed to join the cluster
* or spawn a new join thread upon failure to do so.
*/
private void innerJoinCluster() {
DiscoveryNode masterNode = null;
final Thread currentThread = Thread.currentThread();
nodeJoinController.startElectionContext();
while (masterNode == null && joinThreadControl.joinThreadActive(currentThread)) {
masterNode = findMaster();
}
if (!joinThreadControl.joinThreadActive(currentThread)) {
logger.trace("thread is no longer in currentJoinThread. Stopping.");
return;
}
if (clusterService.localNode().equals(masterNode)) {
// we count as one
final int requiredJoins = Math.max(0, electMaster.minimumMasterNodes() - 1);
logger.debug("elected as master, waiting for incoming joins ([{}] needed)", requiredJoins);
nodeJoinController.waitToBeElectedAsMaster(requiredJoins, masterElectionWaitForJoinsTimeout, new NodeJoinController.ElectionCallback() {
@Override
public void onElectedAsMaster(ClusterState state) {
joinThreadControl.markThreadAsDone(currentThread);
// we only starts nodesFD if we are master (it may be that we received a cluster state while pinging)
// start the nodes FD
nodesFD.updateNodesAndPing(state);
}
@Override
public void onFailure(Throwable t) {
logger.trace("failed while waiting for nodes to join, rejoining", t);
joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
}
});
} else {
// process any incoming joins (they will fail because we are not the master)
nodeJoinController.stopElectionContext(masterNode + " elected");
// send join request
final boolean success = joinElectedMaster(masterNode);
// finalize join through the cluster state update thread
final DiscoveryNode finalMasterNode = masterNode;
clusterService.submitStateUpdateTask("finalize_join (" + masterNode + ")", new LocalClusterUpdateTask() {
@Override
public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) throws Exception {
if (!success) {
// failed to join. Try again...
joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
return unchanged();
}
if (currentState.getNodes().getMasterNode() == null) {
// Post 1.3.0, the master should publish a new cluster state before acking our join request. we now should have
// a valid master.
logger.debug("no master node is set, despite of join request completing. retrying pings.");
joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
return unchanged();
}
if (!currentState.getNodes().getMasterNode().equals(finalMasterNode)) {
return joinThreadControl.stopRunningThreadAndRejoin(currentState, "master_switched_while_finalizing_join");
}
// Note: we do not have to start master fault detection here because it's set at {@link #processNextPendingClusterState }
// when the first cluster state arrives.
joinThreadControl.markThreadAsDone(currentThread);
return unchanged();
}
@Override
public void onFailure(String source, @Nullable Exception e) {
logger.error("unexpected error while trying to finalize cluster join", e);
joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
}
});
}
}
use of org.elasticsearch.cluster.LocalClusterUpdateTask in project elasticsearch by elastic.
the class BlockClusterStateProcessing method startDisrupting.
@Override
public void startDisrupting() {
final String disruptionNodeCopy = disruptedNode;
if (disruptionNodeCopy == null) {
return;
}
ClusterService clusterService = cluster.getInstance(ClusterService.class, disruptionNodeCopy);
if (clusterService == null) {
return;
}
logger.info("delaying cluster state updates on node [{}]", disruptionNodeCopy);
boolean success = disruptionLatch.compareAndSet(null, new CountDownLatch(1));
assert success : "startDisrupting called without waiting on stopDisrupting to complete";
final CountDownLatch started = new CountDownLatch(1);
clusterService.submitStateUpdateTask("service_disruption_block", new LocalClusterUpdateTask(Priority.IMMEDIATE) {
@Override
public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) throws Exception {
started.countDown();
CountDownLatch latch = disruptionLatch.get();
if (latch != null) {
latch.await();
}
return unchanged();
}
@Override
public void onFailure(String source, Exception e) {
logger.error("unexpected error during disruption", e);
}
});
try {
started.await();
} catch (InterruptedException e) {
}
}
use of org.elasticsearch.cluster.LocalClusterUpdateTask in project elasticsearch by elastic.
the class ZenDiscovery method handleMasterGone.
private void handleMasterGone(final DiscoveryNode masterNode, final Throwable cause, final String reason) {
if (lifecycleState() != Lifecycle.State.STARTED) {
// not started, ignore a master failure
return;
}
if (localNodeMaster()) {
// we might get this on both a master telling us shutting down, and then the disconnect failure
return;
}
logger.info((Supplier<?>) () -> new ParameterizedMessage("master_left [{}], reason [{}]", masterNode, reason), cause);
clusterService.submitStateUpdateTask("master_failed (" + masterNode + ")", new LocalClusterUpdateTask(Priority.IMMEDIATE) {
@Override
public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) {
if (!masterNode.equals(currentState.nodes().getMasterNode())) {
// master got switched on us, no need to send anything
return unchanged();
}
// flush any pending cluster states from old master, so it will not be set as master again
publishClusterState.pendingStatesQueue().failAllStatesAndClear(new ElasticsearchException("master left [{}]", reason));
return rejoin(currentState, "master left (reason = " + reason + ")");
}
@Override
public void onFailure(String source, Exception e) {
logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
}
});
}
Aggregations