use of com.palantir.logsafe.exceptions.SafeIllegalStateException in project atlasdb by palantir.
the class CassandraService method getRandomGoodHostForPredicate.
public Optional<CassandraClientPoolingContainer> getRandomGoodHostForPredicate(Predicate<CassandraServer> predicate, Set<CassandraServer> triedNodes) {
Map<CassandraServer, CassandraClientPoolingContainer> pools = currentPools;
Set<CassandraServer> hostsMatchingPredicate = pools.keySet().stream().filter(predicate).collect(Collectors.toSet());
Map<String, Long> triedDatacenters = triedNodes.stream().map(hostToDatacenter::get).filter(Objects::nonNull).collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
Optional<Long> maximumAttemptsPerDatacenter = triedDatacenters.values().stream().max(Long::compareTo);
Set<String> maximallyAttemptedDatacenters = KeyedStream.stream(triedDatacenters).filter(attempts -> Objects.equals(attempts, maximumAttemptsPerDatacenter.orElseThrow(() -> new SafeIllegalStateException("Unexpectedly could not find the max attempts per datacenter")))).keys().collect(Collectors.toSet());
Set<CassandraServer> hostsInPermittedDatacenters = hostsMatchingPredicate.stream().filter(pool -> {
String datacenter = hostToDatacenter.get(pool);
return datacenter == null || !maximallyAttemptedDatacenters.contains(datacenter);
}).collect(Collectors.toSet());
Set<CassandraServer> filteredHosts = hostsInPermittedDatacenters.isEmpty() ? hostsMatchingPredicate : hostsInPermittedDatacenters;
if (filteredHosts.isEmpty()) {
log.info("No hosts match the provided predicate.");
return Optional.empty();
}
Set<CassandraServer> livingHosts = blacklist.filterBlacklistedHostsFrom(filteredHosts);
if (livingHosts.isEmpty()) {
log.info("There are no known live hosts in the connection pool matching the predicate. We're choosing" + " one at random in a last-ditch attempt at forward progress.");
livingHosts = filteredHosts;
}
Optional<CassandraServer> randomLivingHost = getRandomHostByActiveConnections(livingHosts);
return randomLivingHost.map(pools::get);
}
use of com.palantir.logsafe.exceptions.SafeIllegalStateException in project atlasdb by palantir.
the class CassandraClientPoolImpl method sanityCheckRingConsistency.
// This method exists to verify a particularly nasty bug where cassandra doesn't have a
// consistent ring across all of it's nodes. One node will think it owns more than the others
// think it does and they will not send writes to it, but it will respond to requests
// acting like it does.
private void sanityCheckRingConsistency() {
Multimap<Set<TokenRange>, CassandraServer> tokenRangesToServer = HashMultimap.create();
for (CassandraServer host : getCachedServers()) {
try (CassandraClient client = CassandraClientFactory.getClientInternal(host.proxy(), clientConfig)) {
try {
client.describe_keyspace(config.getKeyspaceOrThrow());
} catch (NotFoundException e) {
// don't care to check for ring consistency when we're not even fully initialized
return;
}
tokenRangesToServer.put(ImmutableSet.copyOf(client.describe_ring(config.getKeyspaceOrThrow())), host);
} catch (Exception e) {
log.warn("Failed to get ring info from host: {}", SafeArg.of("host", host.cassandraHostName()), SafeArg.of("proxy", CassandraLogHelper.host(host.proxy())), e);
}
}
if (tokenRangesToServer.isEmpty()) {
log.warn("Failed to get ring info for entire Cassandra cluster ({});" + " ring could not be checked for consistency.", UnsafeArg.of("keyspace", config.getKeyspaceOrThrow()));
return;
}
if (tokenRangesToServer.keySet().size() == 1) {
// all nodes agree on a consistent view of the cluster. Good.
return;
}
if (TokenRangeResolution.viewsAreConsistent(tokenRangesToServer.keySet())) {
log.info("Although multiple ring descriptions were detected, we believe these to be consistent:" + " ranges detected were identical. This may occur when there are legitimate network routing" + " changes, for instance.");
return;
}
RuntimeException ex = new SafeIllegalStateException("Hosts have differing ring descriptions. This can lead to inconsistent reads and lost data.");
log.error("Cassandra does not appear to have a consistent ring across all of its nodes. This could cause us to" + " lose writes. The mapping of token ranges to hosts is:\n{}", UnsafeArg.of("tokenRangesToServer", CassandraLogHelper.tokenRangesToServer(tokenRangesToServer)), SafeArg.of("tokenRangeHashes", CassandraLogHelper.tokenRangeHashes(tokenRangesToServer.keySet().stream().flatMap(Set::stream).collect(Collectors.toSet()))), ex);
// provide some easier to grok logging for the two most common cases
if (tokenRangesToServer.size() > 2) {
tokenRangesToServer.asMap().entrySet().stream().filter(entry -> entry.getValue().size() == 1).forEach(entry -> {
// We've checked above that entry.getValue() has one element, so we never NPE here.
log.error("Host: {} disagrees with the other nodes about the ring state.", SafeArg.of("host", Iterables.getFirst(entry.getValue(), null)));
});
}
if (tokenRangesToServer.keySet().size() == 2) {
ImmutableList<Set<TokenRange>> sets = ImmutableList.copyOf(tokenRangesToServer.keySet());
Set<TokenRange> set1 = sets.get(0);
Set<TokenRange> set2 = sets.get(1);
log.error("Hosts are split. group1: {} group2: {}", SafeArg.of("hosts1", CassandraLogHelper.collectionOfHosts(tokenRangesToServer.get(set1))), SafeArg.of("hosts2", CassandraLogHelper.collectionOfHosts(tokenRangesToServer.get(set2))));
}
CassandraVerifier.logErrorOrThrow(ex.getMessage(), config.ignoreInconsistentRingChecks());
}
use of com.palantir.logsafe.exceptions.SafeIllegalStateException in project atlasdb by palantir.
the class TransactionAborter method executeWithRetry.
private void executeWithRetry(TransactionsTableInteraction txnInteraction, Statement abortStatement, Statement checkStatement, TransactionTableEntry entry) {
long startTs = TransactionTableEntries.getStartTimestamp(entry);
long commitTs = getCommitTimestamp(entry).orElseThrow();
Preconditions.checkArgument(abortStatement.getSerialConsistencyLevel() == ConsistencyLevel.SERIAL, "Abort statement was not at expected consistency level", SafeArg.of("consistencyLevel", abortStatement.getSerialConsistencyLevel()), SafeArg.of("expectedConsistencyLevel", ConsistencyLevel.SERIAL));
Preconditions.checkArgument(checkStatement.getSerialConsistencyLevel() == ConsistencyLevel.SERIAL, "Check statement was not at expected consistency level", SafeArg.of("consistencyLevel", checkStatement.getSerialConsistencyLevel()), SafeArg.of("expectedConsistencyLevel", ConsistencyLevel.SERIAL));
try {
abortRetryer.call(() -> tryAbortTransactions(txnInteraction, abortStatement, checkStatement, startTs, commitTs));
} catch (ExecutionException e) {
throw new SafeIllegalStateException("Failed to execute transaction abort", e, SafeArg.of("startTs", startTs), SafeArg.of("commitTs", commitTs), SafeArg.of("retryCount", RETRY_COUNT), SafeArg.of("keyspace", namespace));
} catch (RetryException e) {
throw new SafeIllegalStateException("Unable to abort transactions even with retry", e, SafeArg.of("startTs", startTs), SafeArg.of("commitTs", commitTs), SafeArg.of("retryCount", RETRY_COUNT), SafeArg.of("keyspace", namespace));
}
}
use of com.palantir.logsafe.exceptions.SafeIllegalStateException in project atlasdb by palantir.
the class PaxosConsensusTestUtils method teardown.
public static void teardown(PaxosTestState state) throws Exception {
try {
ExecutorService executor = state.getExecutor();
executor.shutdownNow();
boolean terminated = executor.awaitTermination(10, TimeUnit.SECONDS);
if (!terminated) {
throw new SafeIllegalStateException("Some threads are still hanging around!" + " Can't proceed or they might corrupt future tests.");
}
} finally {
FileUtils.deleteDirectory(new File(LOG_DIR));
}
}
use of com.palantir.logsafe.exceptions.SafeIllegalStateException in project atlasdb by palantir.
the class MultiNodePaxosTimeLockServerIntegrationTest method sanityCheckMultiClientStartTransactionsAgainstConjureTimelockService.
@Test
public void sanityCheckMultiClientStartTransactionsAgainstConjureTimelockService() {
TestableTimelockServer leader = cluster.currentLeaderFor(client.namespace());
// Multi client batched TimeLock endpoints do not support multi-leader mode on TimeLock
Assume.assumeFalse(leader.isMultiLeader());
MultiClientConjureTimelockService multiClientConjureTimelockService = leader.multiClientService();
List<String> expectedNamespaces = ImmutableList.of("alpha", "beta");
int numTransactions = 7;
Map<Namespace, ConjureStartTransactionsRequest> namespaceToRequestMap = defaultStartTransactionsRequests(expectedNamespaces, numTransactions);
Map<Namespace, ConjureStartTransactionsResponse> startedTransactions = multiClientConjureTimelockService.startTransactionsForClients(AUTH_HEADER, namespaceToRequestMap);
// Whether we hit the multi client endpoint or conjureTimelockService endpoint, for a namespace, the underlying
// service to process the request is the same
startedTransactions.forEach((namespace, responseFromBatchedEndpoint) -> {
ConjureStartTransactionsResponse responseFromLegacyEndpoint = leader.client(namespace.get()).namespacedConjureTimelockService().startTransactions(namespaceToRequestMap.get(namespace));
assertThat(responseFromLegacyEndpoint.getLockWatchUpdate().logId()).isEqualTo(responseFromBatchedEndpoint.getLockWatchUpdate().logId());
PartitionedTimestamps batchedEndpointTimestamps = responseFromBatchedEndpoint.getTimestamps();
long lastTimestamp = batchedEndpointTimestamps.stream().max().orElseThrow(SafeIllegalStateException::new);
assertThat(responseFromLegacyEndpoint.getTimestamps().start()).isGreaterThan(lastTimestamp);
});
}
Aggregations