use of org.apache.cassandra.net.RequestCallback in project cassandra by apache.
the class Gossiper method markAlive.
private void markAlive(final InetAddressAndPort addr, final EndpointState localState) {
localState.markDead();
Message<NoPayload> echoMessage = Message.out(ECHO_REQ, noPayload);
logger.trace("Sending ECHO_REQ to {}", addr);
RequestCallback echoHandler = msg -> {
// force processing of the echo response onto the gossip stage, as it comes in on the REQUEST_RESPONSE stage
runInGossipStageBlocking(() -> realMarkAlive(addr, localState));
};
MessagingService.instance().sendWithCallback(echoMessage, addr, echoHandler);
GossiperDiagnostics.markedAlive(this, addr, localState);
}
use of org.apache.cassandra.net.RequestCallback in project cassandra by apache.
the class SimulatedAction method applyToMessage.
Action applyToMessage(IInvokableInstance from, IInvokableInstance to, IMessage message) {
Executor executor = to.executorFor(message.verb());
if (executor instanceof ImmediateExecutor)
executor = to.executor();
InterceptedExecution.InterceptedTaskExecution task = new InterceptedRunnableExecution((InterceptingExecutor) executor, () -> to.receiveMessageWithInvokingThread(message));
Verb verb = Verb.fromId(message.verb());
Modifiers self = verbModifiers.getOrDefault(verb, NONE);
int fromNum = from.config().num();
int toNum = to.config().num();
Deliver deliver;
if (is(Modifier.RELIABLE) || self.is(Modifier.RELIABLE))
deliver = DELIVER;
else
deliver = simulated.futureScheduler.shouldDeliver(fromNum, toNum);
Action action;
switch(deliver) {
default:
throw new AssertionError();
case DELIVER:
{
Object description = lazy(() -> String.format("%s(%d) from %s to %s", Verb.fromId(message.verb()), message.id(), message.from(), to.broadcastAddress()));
OrderOn orderOn = task.executor.orderAppliesAfterScheduling();
action = applyTo(description, MESSAGE, orderOn, self, verb, task);
action.setDeadline(simulated.futureScheduler.messageDeadlineNanos(fromNum, toNum));
break;
}
case FAILURE:
case TIMEOUT:
{
task.cancel();
self = DROP.with(self);
InetSocketAddress failedOn;
IInvokableInstance notify;
if (verb.isResponse()) {
failedOn = from.broadcastAddress();
notify = to;
} else {
failedOn = to.broadcastAddress();
notify = from;
}
InterceptedExecution.InterceptedTaskExecution failTask = new InterceptedRunnableExecution((InterceptingExecutor) notify.executorFor(verb.id), () -> notify.unsafeApplyOnThisThread((socketAddress, id, isTimeout) -> {
InetAddressAndPort address = InetAddressAndPort.getByAddress(socketAddress);
RequestCallbacks.CallbackInfo callback = instance().callbacks.remove(id, address);
if (callback != null) {
RequestCallback<?> invokeOn = (RequestCallback<?>) callback.callback;
RequestFailureReason reason = isTimeout ? RequestFailureReason.TIMEOUT : RequestFailureReason.UNKNOWN;
invokeOn.onFailure(address, reason);
}
return null;
}, failedOn, message.id(), deliver == TIMEOUT));
Object description = (lazy(() -> String.format("Report Timeout of %s(%d) from %s to %s", Verb.fromId(message.verb()), message.id(), failedOn, notify.broadcastAddress())));
OrderOn orderOn = failTask.executor.orderAppliesAfterScheduling();
action = applyTo(description, MESSAGE, orderOn, self, failTask);
switch(deliver) {
default:
throw new AssertionError();
case TIMEOUT:
long expiresAfterNanos = from.unsafeApplyOnThisThread(id -> Verb.fromId(id).expiresAfterNanos(), (verb.isResponse() ? forVerb : verb).id);
action.setDeadline(simulated.futureScheduler.messageTimeoutNanos(expiresAfterNanos));
break;
case FAILURE:
action.setDeadline(simulated.futureScheduler.messageFailureNanos(toNum, fromNum));
break;
}
break;
}
}
return action;
}
use of org.apache.cassandra.net.RequestCallback in project cassandra by apache.
the class StorageProxy method describeSchemaVersions.
/**
* initiate a request/response session with each live node to check whether or not everybody is using the same
* migration id. This is useful for determining if a schema change has propagated through the cluster. Disagreement
* is assumed if any node fails to respond.
*/
public static Map<String, List<String>> describeSchemaVersions(boolean withPort) {
final String myVersion = Schema.instance.getVersion().toString();
final Map<InetAddressAndPort, UUID> versions = new ConcurrentHashMap<>();
final Set<InetAddressAndPort> liveHosts = Gossiper.instance.getLiveMembers();
final CountDownLatch latch = newCountDownLatch(liveHosts.size());
RequestCallback<UUID> cb = message -> {
// record the response from the remote node.
versions.put(message.from(), message.payload);
latch.decrement();
};
// an empty message acts as a request to the SchemaVersionVerbHandler.
Message message = out(SCHEMA_VERSION_REQ, noPayload);
for (InetAddressAndPort endpoint : liveHosts) MessagingService.instance().sendWithCallback(message, endpoint, cb);
try {
// wait for as long as possible. timeout-1s if possible.
latch.await(DatabaseDescriptor.getRpcTimeout(NANOSECONDS), NANOSECONDS);
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
}
// maps versions to hosts that are on that version.
Map<String, List<String>> results = new HashMap<String, List<String>>();
Iterable<InetAddressAndPort> allHosts = concat(Gossiper.instance.getLiveMembers(), Gossiper.instance.getUnreachableMembers());
for (InetAddressAndPort host : allHosts) {
UUID version = versions.get(host);
String stringVersion = version == null ? UNREACHABLE : version.toString();
List<String> hosts = results.get(stringVersion);
if (hosts == null) {
hosts = new ArrayList<String>();
results.put(stringVersion, hosts);
}
hosts.add(host.getHostAddress(withPort));
}
// we're done: the results map is ready to return to the client. the rest is just debug logging:
if (results.get(UNREACHABLE) != null)
logger.debug("Hosts not in agreement. Didn't get a response from everybody: {}", join(results.get(UNREACHABLE), ","));
for (Map.Entry<String, List<String>> entry : results.entrySet()) {
// check for version disagreement. log the hosts that don't agree.
if (entry.getKey().equals(UNREACHABLE) || entry.getKey().equals(myVersion))
continue;
for (String host : entry.getValue()) logger.debug("{} disagrees ({})", host, entry.getKey());
}
if (results.size() == 1)
logger.debug("Schemas are in agreement.");
return results;
}
use of org.apache.cassandra.net.RequestCallback in project cassandra by apache.
the class ActiveRepairService method cleanUp.
/**
* Send Verb.CLEANUP_MSG to the given endpoints. This results in removing parent session object from the
* endpoint's cache.
* This method does not throw an exception in case of a messaging failure.
*/
public void cleanUp(UUID parentRepairSession, Set<InetAddressAndPort> endpoints) {
for (InetAddressAndPort endpoint : endpoints) {
try {
if (FailureDetector.instance.isAlive(endpoint)) {
CleanupMessage message = new CleanupMessage(parentRepairSession);
Message<CleanupMessage> msg = Message.out(Verb.CLEANUP_MSG, message);
RequestCallback loggingCallback = new RequestCallback() {
@Override
public void onResponse(Message msg) {
logger.trace("Successfully cleaned up {} parent repair session on {}.", parentRepairSession, endpoint);
}
@Override
public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) {
logger.debug("Failed to clean up parent repair session {} on {}. The uncleaned sessions will " + "be removed on a node restart. This should not be a problem unless you see thousands " + "of messages like this.", parentRepairSession, endpoint);
}
};
MessagingService.instance().sendWithCallback(msg, endpoint, loggingCallback);
}
} catch (Exception exc) {
logger.warn("Failed to send a clean up message to {}", endpoint, exc);
}
}
}
use of org.apache.cassandra.net.RequestCallback in project cassandra by apache.
the class ActiveRepairService method prepareForRepair.
public UUID prepareForRepair(UUID parentRepairSession, InetAddressAndPort coordinator, Set<InetAddressAndPort> endpoints, RepairOption options, boolean isForcedRepair, List<ColumnFamilyStore> columnFamilyStores) {
if (!verifyCompactionsPendingThreshold(parentRepairSession, options.getPreviewKind()))
// failRepair throws exception
failRepair(parentRepairSession, "Rejecting incoming repair, pending compactions above threshold");
long repairedAt = getRepairedAt(options, isForcedRepair);
registerParentRepairSession(parentRepairSession, coordinator, columnFamilyStores, options.getRanges(), options.isIncremental(), repairedAt, options.isGlobal(), options.getPreviewKind());
final CountDownLatch prepareLatch = newCountDownLatch(endpoints.size());
final AtomicBoolean status = new AtomicBoolean(true);
final Set<String> failedNodes = synchronizedSet(new HashSet<String>());
final AtomicInteger timeouts = new AtomicInteger(0);
RequestCallback callback = new RequestCallback() {
@Override
public void onResponse(Message msg) {
prepareLatch.decrement();
}
@Override
public void onFailure(InetAddressAndPort from, RequestFailureReason failureReason) {
status.set(false);
failedNodes.add(from.toString());
if (failureReason == RequestFailureReason.TIMEOUT)
timeouts.incrementAndGet();
prepareLatch.decrement();
}
@Override
public boolean invokeOnFailure() {
return true;
}
};
List<TableId> tableIds = new ArrayList<>(columnFamilyStores.size());
for (ColumnFamilyStore cfs : columnFamilyStores) tableIds.add(cfs.metadata.id);
for (InetAddressAndPort neighbour : endpoints) {
if (FailureDetector.instance.isAlive(neighbour)) {
PrepareMessage message = new PrepareMessage(parentRepairSession, tableIds, options.getRanges(), options.isIncremental(), repairedAt, options.isGlobal(), options.getPreviewKind());
Message<RepairMessage> msg = out(PREPARE_MSG, message);
MessagingService.instance().sendWithCallback(msg, neighbour, callback);
} else {
// remaining ones go down, we still want to fail so we don't create repair sessions that can't complete
if (isForcedRepair && !options.isIncremental()) {
prepareLatch.decrement();
} else {
// bailout early to avoid potentially waiting for a long time.
failRepair(parentRepairSession, "Endpoint not alive: " + neighbour);
}
}
}
try {
if (!prepareLatch.await(getRpcTimeout(MILLISECONDS), MILLISECONDS) || timeouts.get() > 0)
failRepair(parentRepairSession, "Did not get replies from all endpoints.");
} catch (InterruptedException e) {
failRepair(parentRepairSession, "Interrupted while waiting for prepare repair response.");
}
if (!status.get()) {
failRepair(parentRepairSession, "Got negative replies from endpoints " + failedNodes);
}
return parentRepairSession;
}
Aggregations