use of org.apache.cassandra.locator.InetAddressAndPort in project cassandra by apache.
the class Gossiper method stop.
public void stop() {
EndpointState mystate = endpointStateMap.get(getBroadcastAddressAndPort());
if (mystate != null && !isSilentShutdownState(mystate) && StorageService.instance.isJoined()) {
logger.info("Announcing shutdown");
addLocalApplicationState(ApplicationState.STATUS_WITH_PORT, StorageService.instance.valueFactory.shutdown(true));
addLocalApplicationState(ApplicationState.STATUS, StorageService.instance.valueFactory.shutdown(true));
Message message = Message.out(Verb.GOSSIP_SHUTDOWN, noPayload);
for (InetAddressAndPort ep : liveEndpoints) MessagingService.instance().send(message, ep);
Uninterruptibles.sleepUninterruptibly(SHUTDOWN_ANNOUNCE_DELAY_IN_MS.getInt(), TimeUnit.MILLISECONDS);
} else
logger.warn("No local state, state is in silent shutdown, or node hasn't joined, not announcing shutdown");
if (scheduledGossipTask != null)
scheduledGossipTask.cancel(false);
}
use of org.apache.cassandra.locator.InetAddressAndPort in project cassandra by apache.
the class Gossiper method assassinateEndpoint.
/**
* Do not call this method unless you know what you are doing.
* It will try extremely hard to obliterate any endpoint from the ring,
* even if it does not know about it.
*
* @param address
* @throws UnknownHostException
*/
public void assassinateEndpoint(String address) throws UnknownHostException {
InetAddressAndPort endpoint = InetAddressAndPort.getByName(address);
runInGossipStageBlocking(() -> {
EndpointState epState = endpointStateMap.get(endpoint);
logger.warn("Assassinating {} via gossip", endpoint);
if (epState == null) {
epState = new EndpointState(new HeartBeatState((int) ((currentTimeMillis() + 60000) / 1000), 9999));
} else {
int generation = epState.getHeartBeatState().getGeneration();
int heartbeat = epState.getHeartBeatState().getHeartBeatVersion();
logger.info("Sleeping for {}ms to ensure {} does not change", StorageService.RING_DELAY, endpoint);
Uninterruptibles.sleepUninterruptibly(StorageService.RING_DELAY, TimeUnit.MILLISECONDS);
// make sure it did not change
EndpointState newState = endpointStateMap.get(endpoint);
if (newState == null)
logger.warn("Endpoint {} disappeared while trying to assassinate, continuing anyway", endpoint);
else if (newState.getHeartBeatState().getGeneration() != generation)
throw new RuntimeException("Endpoint still alive: " + endpoint + " generation changed while trying to assassinate it");
else if (newState.getHeartBeatState().getHeartBeatVersion() != heartbeat)
throw new RuntimeException("Endpoint still alive: " + endpoint + " heartbeat changed while trying to assassinate it");
// make sure we don't evict it too soon
epState.updateTimestamp();
epState.getHeartBeatState().forceNewerGenerationUnsafe();
}
Collection<Token> tokens = null;
try {
tokens = StorageService.instance.getTokenMetadata().getTokens(endpoint);
} catch (Throwable th) {
JVMStabilityInspector.inspectThrowable(th);
}
if (tokens == null || tokens.isEmpty()) {
logger.warn("Trying to assassinate an endpoint {} that does not have any tokens assigned. This should not have happened, trying to continue with a random token.", address);
tokens = Collections.singletonList(StorageService.instance.getTokenMetadata().partitioner.getRandomToken());
}
long expireTime = computeExpireTime();
epState.addApplicationState(ApplicationState.STATUS_WITH_PORT, StorageService.instance.valueFactory.left(tokens, expireTime));
epState.addApplicationState(ApplicationState.STATUS, StorageService.instance.valueFactory.left(tokens, computeExpireTime()));
handleMajorStateChange(endpoint, epState);
Uninterruptibles.sleepUninterruptibly(intervalInMillis * 4, TimeUnit.MILLISECONDS);
logger.warn("Finished assassinating {}", endpoint);
});
}
use of org.apache.cassandra.locator.InetAddressAndPort in project cassandra by apache.
the class Gossiper method doShadowRound.
/**
* Do a single 'shadow' round of gossip by retrieving endpoint states that will be stored exclusively in the
* map return value, instead of endpointStateMap.
*
* Used when preparing to join the ring:
* <ul>
* <li>when replacing a node, to get and assume its tokens</li>
* <li>when joining, to check that the local host id matches any previous id for the endpoint address</li>
* </ul>
*
* Method is synchronized, as we use an in-progress flag to indicate that shadow round must be cleared
* again by calling {@link Gossiper#maybeFinishShadowRound(InetAddressAndPort, boolean, Map)}. This will update
* {@link Gossiper#endpointShadowStateMap} with received values, in order to return an immutable copy to the
* caller of {@link Gossiper#doShadowRound()}. Therefor only a single shadow round execution is permitted at
* the same time.
*
* @param peers Additional peers to try gossiping with.
* @return endpoint states gathered during shadow round or empty map
*/
public synchronized Map<InetAddressAndPort, EndpointState> doShadowRound(Set<InetAddressAndPort> peers) {
buildSeedsList();
// list in which case, attempting a shadow round is pointless
if (seeds.isEmpty() && peers.isEmpty())
return endpointShadowStateMap;
boolean isSeed = DatabaseDescriptor.getSeeds().contains(getBroadcastAddressAndPort());
// We double RING_DELAY if we're not a seed to increase chance of successful startup during a full cluster bounce,
// giving the seeds a chance to startup before we fail the shadow round
int shadowRoundDelay = isSeed ? StorageService.RING_DELAY : StorageService.RING_DELAY * 2;
seedsInShadowRound.clear();
endpointShadowStateMap.clear();
// send a completely empty syn
List<GossipDigest> gDigests = new ArrayList<>();
GossipDigestSyn digestSynMessage = new GossipDigestSyn(getClusterName(), getPartitionerName(), gDigests);
Message<GossipDigestSyn> message = Message.out(GOSSIP_DIGEST_SYN, digestSynMessage);
inShadowRound = true;
boolean includePeers = false;
int slept = 0;
try {
while (true) {
if (slept % 5000 == 0) {
// CASSANDRA-8072, retry at the beginning and every 5 seconds
logger.trace("Sending shadow round GOSSIP DIGEST SYN to seeds {}", seeds);
for (InetAddressAndPort seed : seeds) MessagingService.instance().send(message, seed);
// Send to any peers we already know about, but only if a seed didn't respond.
if (includePeers) {
logger.trace("Sending shadow round GOSSIP DIGEST SYN to known peers {}", peers);
for (InetAddressAndPort peer : peers) MessagingService.instance().send(message, peer);
}
includePeers = true;
}
Thread.sleep(1000);
if (!inShadowRound)
break;
slept += 1000;
if (slept > shadowRoundDelay) {
// if we got here no peers could be gossiped to. If we're a seed that's OK, but otherwise we stop. See CASSANDRA-13851
if (!isSeed)
throw new RuntimeException("Unable to gossip with any peers");
inShadowRound = false;
break;
}
}
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
}
return ImmutableMap.copyOf(endpointShadowStateMap);
}
use of org.apache.cassandra.locator.InetAddressAndPort in project cassandra by apache.
the class Gossiper method markAlive.
private void markAlive(final InetAddressAndPort addr, final EndpointState localState) {
localState.markDead();
Message<NoPayload> echoMessage = Message.out(ECHO_REQ, noPayload);
logger.trace("Sending ECHO_REQ to {}", addr);
RequestCallback echoHandler = msg -> {
// force processing of the echo response onto the gossip stage, as it comes in on the REQUEST_RESPONSE stage
runInGossipStageBlocking(() -> realMarkAlive(addr, localState));
};
MessagingService.instance().sendWithCallback(echoMessage, addr, echoHandler);
GossiperDiagnostics.markedAlive(this, addr, localState);
}
use of org.apache.cassandra.locator.InetAddressAndPort in project cassandra by apache.
the class Gossiper method doStatusCheck.
@VisibleForTesting
void doStatusCheck() {
if (logger.isTraceEnabled())
logger.trace("Performing status check ...");
long now = currentTimeMillis();
long nowNano = nanoTime();
long pending = Stage.GOSSIP.executor().getPendingTaskCount();
if (pending > 0 && lastProcessedMessageAt < now - 1000) {
// if some new messages just arrived, give the executor some time to work on them
Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
// still behind? something's broke
if (lastProcessedMessageAt < now - 1000) {
logger.warn("Gossip stage has {} pending tasks; skipping status check (no nodes will be marked down)", pending);
return;
}
}
Set<InetAddressAndPort> eps = endpointStateMap.keySet();
for (InetAddressAndPort endpoint : eps) {
if (endpoint.equals(getBroadcastAddressAndPort()))
continue;
FailureDetector.instance.interpret(endpoint);
EndpointState epState = endpointStateMap.get(endpoint);
if (epState != null) {
// gossip after FatClientTimeout. Do not remove dead states here.
if (isGossipOnlyMember(endpoint) && !justRemovedEndpoints.containsKey(endpoint) && TimeUnit.NANOSECONDS.toMillis(nowNano - epState.getUpdateTimestamp()) > fatClientTimeout) {
logger.info("FatClient {} has been silent for {}ms, removing from gossip", endpoint, fatClientTimeout);
runInGossipStageBlocking(() -> {
if (!isGossipOnlyMember(endpoint)) {
// updating gossip and token metadata are not atomic, but rely on the single threaded gossip stage
// since status checks are done outside the gossip stage, need to confirm the state of the endpoint
// to make sure that the previous read data was correct
logger.info("Race condition marking {} as a FatClient; ignoring", endpoint);
return;
}
// will put it in justRemovedEndpoints to respect quarantine delay
removeEndpoint(endpoint);
// can get rid of the state immediately
evictFromMembership(endpoint);
});
}
// check for dead state removal
long expireTime = getExpireTimeForEndpoint(endpoint);
if (!epState.isAlive() && (now > expireTime) && (!StorageService.instance.getTokenMetadata().isMember(endpoint))) {
if (logger.isDebugEnabled()) {
logger.debug("time is expiring for endpoint : {} ({})", endpoint, expireTime);
}
runInGossipStageBlocking(() -> evictFromMembership(endpoint));
}
}
}
if (!justRemovedEndpoints.isEmpty()) {
for (Entry<InetAddressAndPort, Long> entry : justRemovedEndpoints.entrySet()) {
if ((now - entry.getValue()) > QUARANTINE_DELAY) {
if (logger.isDebugEnabled())
logger.debug("{} elapsed, {} gossip quarantine over", QUARANTINE_DELAY, entry.getKey());
justRemovedEndpoints.remove(entry.getKey());
}
}
}
}
Aggregations