Search in sources :

Example 16 with SafeIllegalStateException

use of com.palantir.logsafe.exceptions.SafeIllegalStateException in project atlasdb by palantir.

the class CassandraService method getRandomGoodHostForPredicate.

public Optional<CassandraClientPoolingContainer> getRandomGoodHostForPredicate(Predicate<CassandraServer> predicate, Set<CassandraServer> triedNodes) {
    Map<CassandraServer, CassandraClientPoolingContainer> pools = currentPools;
    Set<CassandraServer> hostsMatchingPredicate = pools.keySet().stream().filter(predicate).collect(Collectors.toSet());
    Map<String, Long> triedDatacenters = triedNodes.stream().map(hostToDatacenter::get).filter(Objects::nonNull).collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
    Optional<Long> maximumAttemptsPerDatacenter = triedDatacenters.values().stream().max(Long::compareTo);
    Set<String> maximallyAttemptedDatacenters = KeyedStream.stream(triedDatacenters).filter(attempts -> Objects.equals(attempts, maximumAttemptsPerDatacenter.orElseThrow(() -> new SafeIllegalStateException("Unexpectedly could not find the max attempts per datacenter")))).keys().collect(Collectors.toSet());
    Set<CassandraServer> hostsInPermittedDatacenters = hostsMatchingPredicate.stream().filter(pool -> {
        String datacenter = hostToDatacenter.get(pool);
        return datacenter == null || !maximallyAttemptedDatacenters.contains(datacenter);
    }).collect(Collectors.toSet());
    Set<CassandraServer> filteredHosts = hostsInPermittedDatacenters.isEmpty() ? hostsMatchingPredicate : hostsInPermittedDatacenters;
    if (filteredHosts.isEmpty()) {
        log.info("No hosts match the provided predicate.");
        return Optional.empty();
    }
    Set<CassandraServer> livingHosts = blacklist.filterBlacklistedHostsFrom(filteredHosts);
    if (livingHosts.isEmpty()) {
        log.info("There are no known live hosts in the connection pool matching the predicate. We're choosing" + " one at random in a last-ditch attempt at forward progress.");
        livingHosts = filteredHosts;
    }
    Optional<CassandraServer> randomLivingHost = getRandomHostByActiveConnections(livingHosts);
    return randomLivingHost.map(pools::get);
}
Also used : Throwables(com.palantir.common.base.Throwables) SafeIllegalStateException(com.palantir.logsafe.exceptions.SafeIllegalStateException) Random(java.util.Random) Blacklist(com.palantir.atlasdb.keyvalue.cassandra.Blacklist) CassandraUtils(com.palantir.atlasdb.keyvalue.cassandra.CassandraUtils) LightweightOppToken(com.palantir.atlasdb.keyvalue.cassandra.LightweightOppToken) InetAddress(java.net.InetAddress) ImmutableRangeMap(com.google.common.collect.ImmutableRangeMap) MetricsManager(com.palantir.atlasdb.util.MetricsManager) ThriftHostsExtractingVisitor(com.palantir.atlasdb.cassandra.CassandraServersConfigs.ThriftHostsExtractingVisitor) Map(java.util.Map) TokenRange(org.apache.cassandra.thrift.TokenRange) PoolingContainer(com.palantir.common.pooling.PoolingContainer) EndpointDetails(org.apache.cassandra.thrift.EndpointDetails) Refreshable(com.palantir.refreshable.Refreshable) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) KeyedStream(com.palantir.common.streams.KeyedStream) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Range(com.google.common.collect.Range) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) Interner(com.google.common.collect.Interner) InetSocketAddress(java.net.InetSocketAddress) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) UnsafeArg(com.palantir.logsafe.UnsafeArg) CassandraLogHelper(com.palantir.atlasdb.keyvalue.cassandra.CassandraLogHelper) Optional(java.util.Optional) Iterables(com.google.common.collect.Iterables) CassandraKeyValueServiceConfig(com.palantir.atlasdb.cassandra.CassandraKeyValueServiceConfig) CassandraKeyValueServiceRuntimeConfig(com.palantir.atlasdb.cassandra.CassandraKeyValueServiceRuntimeConfig) SafeLoggerFactory(com.palantir.logsafe.logger.SafeLoggerFactory) HashMap(java.util.HashMap) Function(java.util.function.Function) Supplier(java.util.function.Supplier) SafeLogger(com.palantir.logsafe.logger.SafeLogger) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SafeArg(com.palantir.logsafe.SafeArg) ImmutableList(com.google.common.collect.ImmutableList) Suppliers(com.google.common.base.Suppliers) BaseEncoding(com.google.common.io.BaseEncoding) FunctionCheckedException(com.palantir.common.base.FunctionCheckedException) CassandraClientPoolingContainer(com.palantir.atlasdb.keyvalue.cassandra.CassandraClientPoolingContainer) Interners(com.google.common.collect.Interners) UnknownHostException(java.net.UnknownHostException) TimeUnit(java.util.concurrent.TimeUnit) RangeMap(com.google.common.collect.RangeMap) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) CassandraClient(com.palantir.atlasdb.keyvalue.cassandra.CassandraClient) CassandraClientPoolingContainer(com.palantir.atlasdb.keyvalue.cassandra.CassandraClientPoolingContainer) SafeIllegalStateException(com.palantir.logsafe.exceptions.SafeIllegalStateException)

Example 17 with SafeIllegalStateException

use of com.palantir.logsafe.exceptions.SafeIllegalStateException in project atlasdb by palantir.

the class CassandraClientPoolImpl method sanityCheckRingConsistency.

// This method exists to verify a particularly nasty bug where cassandra doesn't have a
// consistent ring across all of it's nodes.  One node will think it owns more than the others
// think it does and they will not send writes to it, but it will respond to requests
// acting like it does.
private void sanityCheckRingConsistency() {
    Multimap<Set<TokenRange>, CassandraServer> tokenRangesToServer = HashMultimap.create();
    for (CassandraServer host : getCachedServers()) {
        try (CassandraClient client = CassandraClientFactory.getClientInternal(host.proxy(), clientConfig)) {
            try {
                client.describe_keyspace(config.getKeyspaceOrThrow());
            } catch (NotFoundException e) {
                // don't care to check for ring consistency when we're not even fully initialized
                return;
            }
            tokenRangesToServer.put(ImmutableSet.copyOf(client.describe_ring(config.getKeyspaceOrThrow())), host);
        } catch (Exception e) {
            log.warn("Failed to get ring info from host: {}", SafeArg.of("host", host.cassandraHostName()), SafeArg.of("proxy", CassandraLogHelper.host(host.proxy())), e);
        }
    }
    if (tokenRangesToServer.isEmpty()) {
        log.warn("Failed to get ring info for entire Cassandra cluster ({});" + " ring could not be checked for consistency.", UnsafeArg.of("keyspace", config.getKeyspaceOrThrow()));
        return;
    }
    if (tokenRangesToServer.keySet().size() == 1) {
        // all nodes agree on a consistent view of the cluster. Good.
        return;
    }
    if (TokenRangeResolution.viewsAreConsistent(tokenRangesToServer.keySet())) {
        log.info("Although multiple ring descriptions were detected, we believe these to be consistent:" + " ranges detected were identical. This may occur when there are legitimate network routing" + " changes, for instance.");
        return;
    }
    RuntimeException ex = new SafeIllegalStateException("Hosts have differing ring descriptions. This can lead to inconsistent reads and lost data.");
    log.error("Cassandra does not appear to have a consistent ring across all of its nodes. This could cause us to" + " lose writes. The mapping of token ranges to hosts is:\n{}", UnsafeArg.of("tokenRangesToServer", CassandraLogHelper.tokenRangesToServer(tokenRangesToServer)), SafeArg.of("tokenRangeHashes", CassandraLogHelper.tokenRangeHashes(tokenRangesToServer.keySet().stream().flatMap(Set::stream).collect(Collectors.toSet()))), ex);
    // provide some easier to grok logging for the two most common cases
    if (tokenRangesToServer.size() > 2) {
        tokenRangesToServer.asMap().entrySet().stream().filter(entry -> entry.getValue().size() == 1).forEach(entry -> {
            // We've checked above that entry.getValue() has one element, so we never NPE here.
            log.error("Host: {} disagrees with the other nodes about the ring state.", SafeArg.of("host", Iterables.getFirst(entry.getValue(), null)));
        });
    }
    if (tokenRangesToServer.keySet().size() == 2) {
        ImmutableList<Set<TokenRange>> sets = ImmutableList.copyOf(tokenRangesToServer.keySet());
        Set<TokenRange> set1 = sets.get(0);
        Set<TokenRange> set2 = sets.get(1);
        log.error("Hosts are split. group1: {} group2: {}", SafeArg.of("hosts1", CassandraLogHelper.collectionOfHosts(tokenRangesToServer.get(set1))), SafeArg.of("hosts2", CassandraLogHelper.collectionOfHosts(tokenRangesToServer.get(set2))));
    }
    CassandraVerifier.logErrorOrThrow(ex.getMessage(), config.ignoreInconsistentRingChecks());
}
Also used : InitializeableScheduledExecutorServiceSupplier(com.palantir.common.concurrent.InitializeableScheduledExecutorServiceSupplier) Iterables(com.google.common.collect.Iterables) CassandraKeyValueServiceConfig(com.palantir.atlasdb.cassandra.CassandraKeyValueServiceConfig) CassandraKeyValueServiceRuntimeConfig(com.palantir.atlasdb.cassandra.CassandraKeyValueServiceRuntimeConfig) SafeLoggerFactory(com.palantir.logsafe.logger.SafeLoggerFactory) ScheduledFuture(java.util.concurrent.ScheduledFuture) SafeIllegalStateException(com.palantir.logsafe.exceptions.SafeIllegalStateException) HashMap(java.util.HashMap) NotFoundException(org.apache.cassandra.thrift.NotFoundException) Multimap(com.google.common.collect.Multimap) AsyncInitializer(com.palantir.async.initializer.AsyncInitializer) SafeLogger(com.palantir.logsafe.logger.SafeLogger) CassandraClientPoolMetrics(com.palantir.atlasdb.keyvalue.cassandra.pool.CassandraClientPoolMetrics) SafeArg(com.palantir.logsafe.SafeArg) HashMultimap(com.google.common.collect.HashMultimap) MetricsManager(com.palantir.atlasdb.util.MetricsManager) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) TokenRange(org.apache.cassandra.thrift.TokenRange) AtlasDbConstants(com.palantir.atlasdb.AtlasDbConstants) Refreshable(com.palantir.refreshable.Refreshable) ImmutableSet(com.google.common.collect.ImmutableSet) CassandraVerifierConfig(com.palantir.atlasdb.keyvalue.cassandra.CassandraVerifier.CassandraVerifierConfig) CassandraService(com.palantir.atlasdb.keyvalue.cassandra.pool.CassandraService) CassandraServer(com.palantir.atlasdb.keyvalue.cassandra.pool.CassandraServer) NamedThreadFactory(com.palantir.common.concurrent.NamedThreadFactory) FunctionCheckedException(com.palantir.common.base.FunctionCheckedException) KeyedStream(com.palantir.common.streams.KeyedStream) Set(java.util.Set) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) UnsafeArg(com.palantir.logsafe.UnsafeArg) RangeMap(com.google.common.collect.RangeMap) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) CassandraClientConfig(com.palantir.atlasdb.keyvalue.cassandra.CassandraClientFactory.CassandraClientConfig) CassandraServer(com.palantir.atlasdb.keyvalue.cassandra.pool.CassandraServer) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) NotFoundException(org.apache.cassandra.thrift.NotFoundException) TokenRange(org.apache.cassandra.thrift.TokenRange) SafeIllegalStateException(com.palantir.logsafe.exceptions.SafeIllegalStateException) SafeIllegalStateException(com.palantir.logsafe.exceptions.SafeIllegalStateException) NotFoundException(org.apache.cassandra.thrift.NotFoundException) FunctionCheckedException(com.palantir.common.base.FunctionCheckedException)

Example 18 with SafeIllegalStateException

use of com.palantir.logsafe.exceptions.SafeIllegalStateException in project atlasdb by palantir.

the class TransactionAborter method executeWithRetry.

private void executeWithRetry(TransactionsTableInteraction txnInteraction, Statement abortStatement, Statement checkStatement, TransactionTableEntry entry) {
    long startTs = TransactionTableEntries.getStartTimestamp(entry);
    long commitTs = getCommitTimestamp(entry).orElseThrow();
    Preconditions.checkArgument(abortStatement.getSerialConsistencyLevel() == ConsistencyLevel.SERIAL, "Abort statement was not at expected consistency level", SafeArg.of("consistencyLevel", abortStatement.getSerialConsistencyLevel()), SafeArg.of("expectedConsistencyLevel", ConsistencyLevel.SERIAL));
    Preconditions.checkArgument(checkStatement.getSerialConsistencyLevel() == ConsistencyLevel.SERIAL, "Check statement was not at expected consistency level", SafeArg.of("consistencyLevel", checkStatement.getSerialConsistencyLevel()), SafeArg.of("expectedConsistencyLevel", ConsistencyLevel.SERIAL));
    try {
        abortRetryer.call(() -> tryAbortTransactions(txnInteraction, abortStatement, checkStatement, startTs, commitTs));
    } catch (ExecutionException e) {
        throw new SafeIllegalStateException("Failed to execute transaction abort", e, SafeArg.of("startTs", startTs), SafeArg.of("commitTs", commitTs), SafeArg.of("retryCount", RETRY_COUNT), SafeArg.of("keyspace", namespace));
    } catch (RetryException e) {
        throw new SafeIllegalStateException("Unable to abort transactions even with retry", e, SafeArg.of("startTs", startTs), SafeArg.of("commitTs", commitTs), SafeArg.of("retryCount", RETRY_COUNT), SafeArg.of("keyspace", namespace));
    }
}
Also used : ExecutionException(java.util.concurrent.ExecutionException) SafeIllegalStateException(com.palantir.logsafe.exceptions.SafeIllegalStateException) RetryException(com.github.rholder.retry.RetryException)

Example 19 with SafeIllegalStateException

use of com.palantir.logsafe.exceptions.SafeIllegalStateException in project atlasdb by palantir.

the class PaxosConsensusTestUtils method teardown.

public static void teardown(PaxosTestState state) throws Exception {
    try {
        ExecutorService executor = state.getExecutor();
        executor.shutdownNow();
        boolean terminated = executor.awaitTermination(10, TimeUnit.SECONDS);
        if (!terminated) {
            throw new SafeIllegalStateException("Some threads are still hanging around!" + " Can't proceed or they might corrupt future tests.");
        }
    } finally {
        FileUtils.deleteDirectory(new File(LOG_DIR));
    }
}
Also used : ExecutorService(java.util.concurrent.ExecutorService) SafeIllegalStateException(com.palantir.logsafe.exceptions.SafeIllegalStateException) File(java.io.File)

Example 20 with SafeIllegalStateException

use of com.palantir.logsafe.exceptions.SafeIllegalStateException in project atlasdb by palantir.

the class MultiNodePaxosTimeLockServerIntegrationTest method sanityCheckMultiClientStartTransactionsAgainstConjureTimelockService.

@Test
public void sanityCheckMultiClientStartTransactionsAgainstConjureTimelockService() {
    TestableTimelockServer leader = cluster.currentLeaderFor(client.namespace());
    // Multi client batched TimeLock endpoints do not support multi-leader mode on TimeLock
    Assume.assumeFalse(leader.isMultiLeader());
    MultiClientConjureTimelockService multiClientConjureTimelockService = leader.multiClientService();
    List<String> expectedNamespaces = ImmutableList.of("alpha", "beta");
    int numTransactions = 7;
    Map<Namespace, ConjureStartTransactionsRequest> namespaceToRequestMap = defaultStartTransactionsRequests(expectedNamespaces, numTransactions);
    Map<Namespace, ConjureStartTransactionsResponse> startedTransactions = multiClientConjureTimelockService.startTransactionsForClients(AUTH_HEADER, namespaceToRequestMap);
    // Whether we hit the multi client endpoint or conjureTimelockService endpoint, for a namespace, the underlying
    // service to process the request is the same
    startedTransactions.forEach((namespace, responseFromBatchedEndpoint) -> {
        ConjureStartTransactionsResponse responseFromLegacyEndpoint = leader.client(namespace.get()).namespacedConjureTimelockService().startTransactions(namespaceToRequestMap.get(namespace));
        assertThat(responseFromLegacyEndpoint.getLockWatchUpdate().logId()).isEqualTo(responseFromBatchedEndpoint.getLockWatchUpdate().logId());
        PartitionedTimestamps batchedEndpointTimestamps = responseFromBatchedEndpoint.getTimestamps();
        long lastTimestamp = batchedEndpointTimestamps.stream().max().orElseThrow(SafeIllegalStateException::new);
        assertThat(responseFromLegacyEndpoint.getTimestamps().start()).isGreaterThan(lastTimestamp);
    });
}
Also used : MultiClientConjureTimelockService(com.palantir.atlasdb.timelock.api.MultiClientConjureTimelockService) ConjureStartTransactionsRequest(com.palantir.atlasdb.timelock.api.ConjureStartTransactionsRequest) PartitionedTimestamps(com.palantir.lock.v2.PartitionedTimestamps) Namespace(com.palantir.atlasdb.timelock.api.Namespace) ConjureStartTransactionsResponse(com.palantir.atlasdb.timelock.api.ConjureStartTransactionsResponse) SafeIllegalStateException(com.palantir.logsafe.exceptions.SafeIllegalStateException) Test(org.junit.Test)

Aggregations

SafeIllegalStateException (com.palantir.logsafe.exceptions.SafeIllegalStateException)24 VisibleForTesting (com.google.common.annotations.VisibleForTesting)5 ImmutableSet (com.google.common.collect.ImmutableSet)5 Set (java.util.Set)5 ImmutableList (com.google.common.collect.ImmutableList)4 Iterables (com.google.common.collect.Iterables)4 SafeArg (com.palantir.logsafe.SafeArg)4 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 List (java.util.List)4 Optional (java.util.Optional)4 Collectors (java.util.stream.Collectors)4 Suppliers (com.google.common.base.Suppliers)3 RangeMap (com.google.common.collect.RangeMap)3 Sets (com.google.common.collect.Sets)3 CassandraKeyValueServiceConfig (com.palantir.atlasdb.cassandra.CassandraKeyValueServiceConfig)3 Refreshable (com.palantir.refreshable.Refreshable)3 Map (java.util.Map)3 RetryException (com.github.rholder.retry.RetryException)2 ImmutableMap (com.google.common.collect.ImmutableMap)2