use of org.apache.cassandra.utils.concurrent.CountDownLatch in project cassandra by apache.
the class StorageProxy method describeSchemaVersions.
/**
* initiate a request/response session with each live node to check whether or not everybody is using the same
* migration id. This is useful for determining if a schema change has propagated through the cluster. Disagreement
* is assumed if any node fails to respond.
*/
public static Map<String, List<String>> describeSchemaVersions(boolean withPort) {
final String myVersion = Schema.instance.getVersion().toString();
final Map<InetAddressAndPort, UUID> versions = new ConcurrentHashMap<>();
final Set<InetAddressAndPort> liveHosts = Gossiper.instance.getLiveMembers();
final CountDownLatch latch = newCountDownLatch(liveHosts.size());
RequestCallback<UUID> cb = message -> {
// record the response from the remote node.
versions.put(message.from(), message.payload);
latch.decrement();
};
// an empty message acts as a request to the SchemaVersionVerbHandler.
Message message = out(SCHEMA_VERSION_REQ, noPayload);
for (InetAddressAndPort endpoint : liveHosts) MessagingService.instance().sendWithCallback(message, endpoint, cb);
try {
// wait for as long as possible. timeout-1s if possible.
latch.await(DatabaseDescriptor.getRpcTimeout(NANOSECONDS), NANOSECONDS);
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
}
// maps versions to hosts that are on that version.
Map<String, List<String>> results = new HashMap<String, List<String>>();
Iterable<InetAddressAndPort> allHosts = concat(Gossiper.instance.getLiveMembers(), Gossiper.instance.getUnreachableMembers());
for (InetAddressAndPort host : allHosts) {
UUID version = versions.get(host);
String stringVersion = version == null ? UNREACHABLE : version.toString();
List<String> hosts = results.get(stringVersion);
if (hosts == null) {
hosts = new ArrayList<String>();
results.put(stringVersion, hosts);
}
hosts.add(host.getHostAddress(withPort));
}
// we're done: the results map is ready to return to the client. the rest is just debug logging:
if (results.get(UNREACHABLE) != null)
logger.debug("Hosts not in agreement. Didn't get a response from everybody: {}", join(results.get(UNREACHABLE), ","));
for (Map.Entry<String, List<String>> entry : results.entrySet()) {
// check for version disagreement. log the hosts that don't agree.
if (entry.getKey().equals(UNREACHABLE) || entry.getKey().equals(myVersion))
continue;
for (String host : entry.getValue()) logger.debug("{} disagrees ({})", host, entry.getKey());
}
if (results.size() == 1)
logger.debug("Schemas are in agreement.");
return results;
}
use of org.apache.cassandra.utils.concurrent.CountDownLatch in project cassandra by apache.
the class StartupClusterConnectivityChecker method execute.
/**
* @param peers The currently known peers in the cluster; argument is not modified.
* @param getDatacenterSource A function for mapping peers to their datacenter.
* @return true if the requested percentage of peers are marked ALIVE in gossip and have their connections opened;
* else false.
*/
public boolean execute(Set<InetAddressAndPort> peers, Function<InetAddressAndPort, String> getDatacenterSource) {
if (peers == null || this.timeoutNanos < 0)
return true;
// make a copy of the set, to avoid mucking with the input (in case it's a sensitive collection)
peers = new HashSet<>(peers);
InetAddressAndPort localAddress = FBUtilities.getBroadcastAddressAndPort();
String localDc = getDatacenterSource.apply(localAddress);
peers.remove(localAddress);
if (peers.isEmpty())
return true;
// make a copy of the datacenter mapping (in case gossip updates happen during this method or some such)
Map<InetAddressAndPort, String> peerToDatacenter = new HashMap<>();
SetMultimap<String, InetAddressAndPort> datacenterToPeers = HashMultimap.create();
for (InetAddressAndPort peer : peers) {
String datacenter = getDatacenterSource.apply(peer);
peerToDatacenter.put(peer, datacenter);
datacenterToPeers.put(datacenter, peer);
}
// on the remaining local datacenter.
if (!blockForRemoteDcs) {
datacenterToPeers.keySet().retainAll(Collections.singleton(localDc));
logger.info("Blocking coordination until only a single peer is DOWN in the local datacenter, timeout={}s", TimeUnit.NANOSECONDS.toSeconds(timeoutNanos));
} else {
logger.info("Blocking coordination until only a single peer is DOWN in each datacenter, timeout={}s", TimeUnit.NANOSECONDS.toSeconds(timeoutNanos));
}
// The threshold is 3 because for each peer we want to have 3 acks,
// one for small message connection, one for large message connnection and one for alive event from gossip.
AckMap acks = new AckMap(3, peers);
Map<String, CountDownLatch> dcToRemainingPeers = new HashMap<>(datacenterToPeers.size());
for (String datacenter : datacenterToPeers.keys()) {
dcToRemainingPeers.put(datacenter, newCountDownLatch(Math.max(datacenterToPeers.get(datacenter).size() - 1, 0)));
}
long startNanos = nanoTime();
// set up a listener to react to new nodes becoming alive (in gossip), and account for all the nodes that are already alive
Set<InetAddressAndPort> alivePeers = Collections.newSetFromMap(new ConcurrentHashMap<>());
AliveListener listener = new AliveListener(alivePeers, dcToRemainingPeers, acks, peerToDatacenter::get);
Gossiper.instance.register(listener);
// send out a ping message to open up the non-gossip connections to all peers. Note that this sends the
// ping messages to _all_ peers, not just the ones we block for in dcToRemainingPeers.
sendPingMessages(peers, dcToRemainingPeers, acks, peerToDatacenter::get);
for (InetAddressAndPort peer : peers) {
if (Gossiper.instance.isAlive(peer) && alivePeers.add(peer) && acks.incrementAndCheck(peer)) {
String datacenter = peerToDatacenter.get(peer);
// We have to check because we might only have the local DC in the map
if (dcToRemainingPeers.containsKey(datacenter))
dcToRemainingPeers.get(datacenter).decrement();
}
}
boolean succeeded = true;
for (CountDownLatch countDownLatch : dcToRemainingPeers.values()) {
long remainingNanos = Math.max(1, timeoutNanos - (nanoTime() - startNanos));
// noinspection UnstableApiUsage
succeeded &= countDownLatch.awaitUninterruptibly(remainingNanos, TimeUnit.NANOSECONDS);
}
Gossiper.instance.unregister(listener);
if (succeeded) {
logger.info("Ensured sufficient healthy connections with {} after {} milliseconds", dcToRemainingPeers.keySet(), TimeUnit.NANOSECONDS.toMillis(nanoTime() - startNanos));
} else {
// dc -> missing peer host addresses
Map<String, List<String>> peersDown = acks.getMissingPeers().stream().collect(groupingBy(peer -> {
String dc = peerToDatacenter.get(peer);
if (dc != null)
return dc;
return StringUtils.defaultString(getDatacenterSource.apply(peer), "unknown");
}, mapping(InetAddressAndPort::getHostAddressAndPort, toList())));
logger.warn("Timed out after {} milliseconds, was waiting for remaining peers to connect: {}", TimeUnit.NANOSECONDS.toMillis(nanoTime() - startNanos), peersDown);
}
return succeeded;
}
use of org.apache.cassandra.utils.concurrent.CountDownLatch in project cassandra by apache.
the class StartupClusterConnectivityChecker method sendPingMessages.
/**
* Sends a "connection warmup" message to each peer in the collection, on every {@link ConnectionType}
* used for internode messaging (that is not gossip).
*/
private void sendPingMessages(Set<InetAddressAndPort> peers, Map<String, CountDownLatch> dcToRemainingPeers, AckMap acks, Function<InetAddressAndPort, String> getDatacenter) {
RequestCallback responseHandler = msg -> {
if (acks.incrementAndCheck(msg.from())) {
String datacenter = getDatacenter.apply(msg.from());
// We have to check because we might only have the local DC in the map
if (dcToRemainingPeers.containsKey(datacenter))
dcToRemainingPeers.get(datacenter).decrement();
}
};
Message<PingRequest> small = Message.out(PING_REQ, PingRequest.forSmall);
Message<PingRequest> large = Message.out(PING_REQ, PingRequest.forLarge);
for (InetAddressAndPort peer : peers) {
MessagingService.instance().sendWithCallback(small, peer, responseHandler, SMALL_MESSAGES);
MessagingService.instance().sendWithCallback(large, peer, responseHandler, LARGE_MESSAGES);
}
}
use of org.apache.cassandra.utils.concurrent.CountDownLatch in project cassandra by apache.
the class PerSSTableIndexWriter method complete.
public void complete() {
if (isComplete)
return;
currentKey = null;
try {
CountDownLatch latch = newCountDownLatch(indexes.size());
for (Index index : indexes.values()) index.complete(latch);
latch.awaitUninterruptibly();
} finally {
indexes.clear();
isComplete = true;
}
}
use of org.apache.cassandra.utils.concurrent.CountDownLatch in project cassandra by apache.
the class TermIterator method build.
@SuppressWarnings("resource")
public static TermIterator build(final Expression e, Set<SSTableIndex> perSSTableIndexes) {
final List<RangeIterator<Long, Token>> tokens = new CopyOnWriteArrayList<>();
final AtomicLong tokenCount = new AtomicLong(0);
RangeIterator<Long, Token> memtableIterator = e.index.searchMemtable(e);
if (memtableIterator != null) {
tokens.add(memtableIterator);
tokenCount.addAndGet(memtableIterator.getCount());
}
final Set<SSTableIndex> referencedIndexes = new CopyOnWriteArraySet<>();
try {
final CountDownLatch latch = newCountDownLatch(perSSTableIndexes.size());
final ExecutorService searchExecutor = SEARCH_EXECUTOR.get();
for (final SSTableIndex index : perSSTableIndexes) {
if (e.getOp() == PREFIX && index.mode() == CONTAINS && !index.hasMarkedPartials())
throw new UnsupportedOperationException(format("The index %s has not yet been upgraded " + "to support prefix queries in CONTAINS mode. " + "Wait for compaction or rebuild the index.", index.getPath()));
if (!index.reference()) {
latch.decrement();
continue;
}
// add to referenced right after the reference was acquired,
// that helps to release index if something goes bad inside of the search
referencedIndexes.add(index);
searchExecutor.submit((Runnable) () -> {
try {
e.checkpoint();
RangeIterator<Long, Token> keyIterator = index.search(e);
if (keyIterator == null) {
releaseIndex(referencedIndexes, index);
return;
}
tokens.add(keyIterator);
tokenCount.getAndAdd(keyIterator.getCount());
} catch (Throwable e1) {
releaseIndex(referencedIndexes, index);
if (logger.isDebugEnabled())
logger.debug(format("Failed search an index %s, skipping.", index.getPath()), e1);
} finally {
latch.decrement();
}
});
}
latch.awaitUninterruptibly();
// checkpoint right away after all indexes complete search because we might have crossed the quota
e.checkpoint();
RangeIterator<Long, Token> ranges = RangeUnionIterator.build(tokens);
return new TermIterator(e, ranges, referencedIndexes);
} catch (Throwable ex) {
// if execution quota was exceeded while opening indexes or something else happened
// local (yet to be tracked) indexes should be released first before re-throwing exception
referencedIndexes.forEach(TermIterator::releaseQuietly);
throw ex;
}
}
Aggregations