Search in sources :

Example 56 with DiscoveryNodes

use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.

the class Coordinator method publish.

@Override
public void publish(ClusterChangedEvent clusterChangedEvent, ActionListener<Void> publishListener, AckListener ackListener) {
    try {
        synchronized (mutex) {
            if (mode != Mode.LEADER || getCurrentTerm() != clusterChangedEvent.state().term()) {
                LOGGER.debug(() -> new ParameterizedMessage("[{}] failed publication as node is no longer master for term {}", clusterChangedEvent.source(), clusterChangedEvent.state().term()));
                publishListener.onFailure(new FailedToCommitClusterStateException("node is no longer master for term " + clusterChangedEvent.state().term() + " while handling publication"));
                return;
            }
            if (currentPublication.isPresent()) {
                assert false : "[" + currentPublication.get() + "] in progress, cannot start new publication";
                LOGGER.warn(() -> new ParameterizedMessage("[{}] failed publication as already publication in progress", clusterChangedEvent.source()));
                publishListener.onFailure(new FailedToCommitClusterStateException("publication " + currentPublication.get() + " already in progress"));
                return;
            }
            assert assertPreviousStateConsistency(clusterChangedEvent);
            final ClusterState clusterState = clusterChangedEvent.state();
            assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId())) : getLocalNode() + " should be in published " + clusterState;
            final PublicationTransportHandler.PublicationContext publicationContext = publicationHandler.newPublicationContext(clusterChangedEvent);
            final PublishRequest publishRequest = coordinationState.get().handleClientValue(clusterState);
            final CoordinatorPublication publication = new CoordinatorPublication(publishRequest, publicationContext, new ListenableFuture<>(), ackListener, publishListener);
            currentPublication = Optional.of(publication);
            final DiscoveryNodes publishNodes = publishRequest.getAcceptedState().nodes();
            leaderChecker.setCurrentNodes(publishNodes);
            followersChecker.setCurrentNodes(publishNodes);
            lagDetector.setTrackedNodes(publishNodes);
            publication.start(followersChecker.getFaultyNodes());
        }
    } catch (Exception e) {
        LOGGER.debug(() -> new ParameterizedMessage("[{}] publishing failed", clusterChangedEvent.source()), e);
        publishListener.onFailure(new FailedToCommitClusterStateException("publishing failed", e));
    }
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes) IOException(java.io.IOException)

Example 57 with DiscoveryNodes

use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.

the class Coordinator method clusterStateWithNoMasterBlock.

private ClusterState clusterStateWithNoMasterBlock(ClusterState clusterState) {
    if (clusterState.nodes().getMasterNodeId() != null) {
        // remove block if it already exists before adding new one
        assert clusterState.blocks().hasGlobalBlockWithId(NO_MASTER_BLOCK_ID) == false : "NO_MASTER_BLOCK should only be added by Coordinator";
        final ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(clusterState.blocks()).addGlobalBlock(noMasterBlockService.getNoMasterBlock()).build();
        final DiscoveryNodes discoveryNodes = new DiscoveryNodes.Builder(clusterState.nodes()).masterNodeId(null).build();
        return ClusterState.builder(clusterState).blocks(clusterBlocks).nodes(discoveryNodes).build();
    } else {
        return clusterState;
    }
}
Also used : ClusterBlocks(org.elasticsearch.cluster.block.ClusterBlocks) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes)

Example 58 with DiscoveryNodes

use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.

the class VerifyNodeRepositoryAction method verify.

public void verify(String repository, boolean readOnly, String verificationToken, final ActionListener<List<DiscoveryNode>> listener) {
    final DiscoveryNodes discoNodes = clusterService.state().nodes();
    final DiscoveryNode localNode = discoNodes.getLocalNode();
    final ObjectContainer<DiscoveryNode> masterAndDataNodes = discoNodes.getMasterAndDataNodes().values();
    final List<DiscoveryNode> nodes = new ArrayList<>();
    for (ObjectCursor<DiscoveryNode> cursor : masterAndDataNodes) {
        DiscoveryNode node = cursor.value;
        if (readOnly && node.getVersion().before(Version.V_4_2_0)) {
            continue;
        }
        nodes.add(node);
    }
    final CopyOnWriteArrayList<VerificationFailure> errors = new CopyOnWriteArrayList<>();
    final AtomicInteger counter = new AtomicInteger(nodes.size());
    for (final DiscoveryNode node : nodes) {
        if (node.equals(localNode)) {
            try {
                doVerify(repository, verificationToken, localNode);
            } catch (Exception e) {
                LOGGER.warn(() -> new ParameterizedMessage("[{}] failed to verify repository", repository), e);
                errors.add(new VerificationFailure(node.getId(), e));
            }
            if (counter.decrementAndGet() == 0) {
                finishVerification(repository, listener, nodes, errors);
            }
        } else {
            transportService.sendRequest(node, ACTION_NAME, new VerifyNodeRepositoryRequest(repository, verificationToken), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {

                @Override
                public void handleResponse(TransportResponse.Empty response) {
                    if (counter.decrementAndGet() == 0) {
                        finishVerification(repository, listener, nodes, errors);
                    }
                }

                @Override
                public void handleException(TransportException exp) {
                    errors.add(new VerificationFailure(node.getId(), exp));
                    if (counter.decrementAndGet() == 0) {
                        finishVerification(repository, listener, nodes, errors);
                    }
                }
            });
        }
    }
}
Also used : DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) TransportResponse(org.elasticsearch.transport.TransportResponse) TransportException(org.elasticsearch.transport.TransportException) IOException(java.io.IOException) TransportException(org.elasticsearch.transport.TransportException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) EmptyTransportResponseHandler(org.elasticsearch.transport.EmptyTransportResponseHandler) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 59 with DiscoveryNodes

use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.

the class SnapshotsService method processSnapshotsOnRemovedNodes.

/**
 * Cleans up shard snapshots that were running on removed nodes
 */
private void processSnapshotsOnRemovedNodes() {
    clusterService.submitStateUpdateTask("update snapshot state after node removal", new ClusterStateUpdateTask() {

        @Override
        public ClusterState execute(ClusterState currentState) {
            DiscoveryNodes nodes = currentState.nodes();
            SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
            if (snapshots == null) {
                return currentState;
            }
            boolean changed = false;
            ArrayList<SnapshotsInProgress.Entry> entries = new ArrayList<>();
            for (final SnapshotsInProgress.Entry snapshot : snapshots.entries()) {
                SnapshotsInProgress.Entry updatedSnapshot = snapshot;
                if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) {
                    ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shards = ImmutableOpenMap.builder();
                    boolean snapshotChanged = false;
                    for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardEntry : snapshot.shards()) {
                        final ShardSnapshotStatus shardStatus = shardEntry.value;
                        final ShardId shardId = shardEntry.key;
                        if (!shardStatus.state().completed() && shardStatus.nodeId() != null) {
                            if (nodes.nodeExists(shardStatus.nodeId())) {
                                shards.put(shardId, shardStatus);
                            } else {
                                // TODO: Restart snapshot on another node?
                                snapshotChanged = true;
                                LOGGER.warn("failing snapshot of shard [{}] on closed node [{}]", shardId, shardStatus.nodeId());
                                shards.put(shardId, new ShardSnapshotStatus(shardStatus.nodeId(), ShardState.FAILED, "node shutdown", shardStatus.generation()));
                            }
                        } else {
                            shards.put(shardId, shardStatus);
                        }
                    }
                    if (snapshotChanged) {
                        changed = true;
                        ImmutableOpenMap<ShardId, ShardSnapshotStatus> shardsMap = shards.build();
                        if (!snapshot.state().completed() && completed(shardsMap.values())) {
                            updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.SUCCESS, shardsMap);
                        } else {
                            updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, snapshot.state(), shardsMap);
                        }
                    }
                    entries.add(updatedSnapshot);
                } else if (snapshot.state() == State.INIT && initializingSnapshots.contains(snapshot.snapshot()) == false) {
                    changed = true;
                    // Mark the snapshot as aborted as it failed to start from the previous master
                    updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.ABORTED, snapshot.shards());
                    entries.add(updatedSnapshot);
                    // Clean up the snapshot that failed to start from the old master
                    deleteSnapshot(snapshot.snapshot(), new ActionListener<Void>() {

                        @Override
                        public void onResponse(Void aVoid) {
                            LOGGER.debug("cleaned up abandoned snapshot {} in INIT state", snapshot.snapshot());
                        }

                        @Override
                        public void onFailure(Exception e) {
                            LOGGER.warn("failed to clean up abandoned snapshot {} in INIT state", snapshot.snapshot());
                        }
                    }, updatedSnapshot.repositoryStateId(), false);
                }
                assert updatedSnapshot.shards().size() == snapshot.shards().size() : "Shard count changed during snapshot status update from [" + snapshot + "] to [" + updatedSnapshot + "]";
            }
            if (changed) {
                return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, new SnapshotsInProgress(unmodifiableList(entries))).build();
            }
            return currentState;
        }

        @Override
        public void onFailure(String source, Exception e) {
            LOGGER.warn("failed to update snapshot state after node removal");
        }
    });
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) ClusterStateUpdateTask(org.elasticsearch.cluster.ClusterStateUpdateTask) ImmutableOpenMap(org.elasticsearch.common.collect.ImmutableOpenMap) FailedToCommitClusterStateException(org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException) RepositoryException(org.elasticsearch.repositories.RepositoryException) RepositoryMissingException(org.elasticsearch.repositories.RepositoryMissingException) NotMasterException(org.elasticsearch.cluster.NotMasterException) ShardId(org.elasticsearch.index.shard.ShardId) SnapshotsInProgress(org.elasticsearch.cluster.SnapshotsInProgress) ShardSnapshotStatus(org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) ObjectObjectCursor(com.carrotsearch.hppc.cursors.ObjectObjectCursor) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes)

Example 60 with DiscoveryNodes

use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.

the class GatewayService method clusterChanged.

@Override
public void clusterChanged(final ClusterChangedEvent event) {
    if (lifecycle.stoppedOrClosed()) {
        return;
    }
    final ClusterState state = event.state();
    if (state.nodes().isLocalNodeElectedMaster() == false) {
        // not our job to recover
        return;
    }
    if (state.blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) == false) {
        // already recovered
        return;
    }
    final DiscoveryNodes nodes = state.nodes();
    if (state.nodes().getMasterNodeId() == null) {
        LOGGER.debug("not recovering from gateway, no master elected yet");
    } else if (recoverAfterNodes != -1 && (nodes.getMasterAndDataNodes().size()) < recoverAfterNodes) {
        LOGGER.debug("not recovering from gateway, nodes_size (data+master) [{}] < recover_after_nodes [{}]", nodes.getMasterAndDataNodes().size(), recoverAfterNodes);
    } else if (recoverAfterDataNodes != -1 && nodes.getDataNodes().size() < recoverAfterDataNodes) {
        LOGGER.debug("not recovering from gateway, nodes_size (data) [{}] < recover_after_data_nodes [{}]", nodes.getDataNodes().size(), recoverAfterDataNodes);
    } else if (recoverAfterMasterNodes != -1 && nodes.getMasterNodes().size() < recoverAfterMasterNodes) {
        LOGGER.debug("not recovering from gateway, nodes_size (master) [{}] < recover_after_master_nodes [{}]", nodes.getMasterNodes().size(), recoverAfterMasterNodes);
    } else {
        boolean enforceRecoverAfterTime;
        String reason;
        if (expectedNodes == -1 && expectedMasterNodes == -1 && expectedDataNodes == -1) {
            // no expected is set, honor the setting if they are there
            enforceRecoverAfterTime = true;
            reason = "recover_after_time was set to [" + recoverAfterTime + "]";
        } else {
            // one of the expected is set, see if all of them meet the need, and ignore the timeout in this case
            enforceRecoverAfterTime = false;
            reason = "";
            if (expectedNodes != -1 && (nodes.getMasterAndDataNodes().size() < expectedNodes)) {
                // does not meet the expected...
                enforceRecoverAfterTime = true;
                reason = "expecting [" + expectedNodes + "] nodes, but only have [" + nodes.getMasterAndDataNodes().size() + "]";
            } else if (expectedDataNodes != -1 && (nodes.getDataNodes().size() < expectedDataNodes)) {
                // does not meet the expected...
                enforceRecoverAfterTime = true;
                reason = "expecting [" + expectedDataNodes + "] data nodes, but only have [" + nodes.getDataNodes().size() + "]";
            } else if (expectedMasterNodes != -1 && (nodes.getMasterNodes().size() < expectedMasterNodes)) {
                // does not meet the expected...
                enforceRecoverAfterTime = true;
                reason = "expecting [" + expectedMasterNodes + "] master nodes, but only have [" + nodes.getMasterNodes().size() + "]";
            }
        }
        performStateRecovery(enforceRecoverAfterTime, reason);
    }
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes)

Aggregations

DiscoveryNodes (org.elasticsearch.cluster.node.DiscoveryNodes)129 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)74 ClusterState (org.elasticsearch.cluster.ClusterState)45 Settings (org.elasticsearch.common.settings.Settings)37 ArrayList (java.util.ArrayList)32 IOException (java.io.IOException)27 HashSet (java.util.HashSet)25 List (java.util.List)24 Map (java.util.Map)23 TransportService (org.elasticsearch.transport.TransportService)23 Version (org.elasticsearch.Version)22 HashMap (java.util.HashMap)20 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)20 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)20 Set (java.util.Set)19 TransportException (org.elasticsearch.transport.TransportException)19 Collections (java.util.Collections)18 ThreadPool (org.elasticsearch.threadpool.ThreadPool)18 CountDownLatch (java.util.concurrent.CountDownLatch)16 Collectors (java.util.stream.Collectors)16