Search in sources :

Example 16 with UncheckedInterruptedException

use of org.apache.cassandra.utils.concurrent.UncheckedInterruptedException in project cassandra by apache.

the class UDFunction method async.

private <T> T async(ThreadIdAndCpuTime threadIdAndCpuTime, Callable<T> callable) {
    Future<T> future = executor().submit(callable);
    try {
        if (DatabaseDescriptor.getUserDefinedFunctionWarnTimeout() > 0)
            try {
                return future.get(DatabaseDescriptor.getUserDefinedFunctionWarnTimeout(), TimeUnit.MILLISECONDS);
            } catch (TimeoutException e) {
                // log and emit a warning that UDF execution took long
                String warn = String.format("User defined function %s ran longer than %dms", this, DatabaseDescriptor.getUserDefinedFunctionWarnTimeout());
                logger.warn(warn);
                ClientWarn.instance.warn(warn);
            }
        // retry with difference of warn-timeout to fail-timeout
        return future.get(DatabaseDescriptor.getUserDefinedFunctionFailTimeout() - DatabaseDescriptor.getUserDefinedFunctionWarnTimeout(), TimeUnit.MILLISECONDS);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new UncheckedInterruptedException(e);
    } catch (ExecutionException e) {
        Throwable c = e.getCause();
        if (c instanceof RuntimeException)
            throw (RuntimeException) c;
        throw new RuntimeException(c);
    } catch (TimeoutException e) {
        // retry a last time with the difference of UDF-fail-timeout to consumed CPU time (just in case execution hit a badly timed GC)
        try {
            // The threadIdAndCpuTime shouldn't take a long time to be set so this should return immediately
            threadIdAndCpuTime.get(1, TimeUnit.SECONDS);
            long cpuTimeMillis = threadMXBean.getThreadCpuTime(threadIdAndCpuTime.threadId) - threadIdAndCpuTime.cpuTime;
            cpuTimeMillis /= 1000000L;
            return future.get(Math.max(DatabaseDescriptor.getUserDefinedFunctionFailTimeout() - cpuTimeMillis, 0L), TimeUnit.MILLISECONDS);
        } catch (InterruptedException e1) {
            Thread.currentThread().interrupt();
            throw new UncheckedInterruptedException(e1);
        } catch (ExecutionException e1) {
            Throwable c = e.getCause();
            if (c instanceof RuntimeException)
                throw (RuntimeException) c;
            throw new RuntimeException(c);
        } catch (TimeoutException e1) {
            TimeoutException cause = new TimeoutException(String.format("User defined function %s ran longer than %dms%s", this, DatabaseDescriptor.getUserDefinedFunctionFailTimeout(), DatabaseDescriptor.getUserFunctionTimeoutPolicy() == Config.UserFunctionTimeoutPolicy.ignore ? "" : " - will stop Cassandra VM"));
            FunctionExecutionException fe = FunctionExecutionException.create(this, cause);
            JVMStabilityInspector.userFunctionTimeout(cause);
            throw fe;
        }
    }
}
Also used : FunctionExecutionException(org.apache.cassandra.exceptions.FunctionExecutionException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) FunctionExecutionException(org.apache.cassandra.exceptions.FunctionExecutionException) ExecutionException(java.util.concurrent.ExecutionException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) TimeoutException(java.util.concurrent.TimeoutException)

Example 17 with UncheckedInterruptedException

use of org.apache.cassandra.utils.concurrent.UncheckedInterruptedException in project cassandra by apache.

the class StorageService method decommission.

public void decommission(boolean force) throws InterruptedException {
    TokenMetadata metadata = tokenMetadata.cloneAfterAllLeft();
    if (operationMode != Mode.LEAVING) {
        if (!tokenMetadata.isMember(FBUtilities.getBroadcastAddressAndPort()))
            throw new UnsupportedOperationException("local node is not a member of the token ring yet");
        if (metadata.getAllEndpoints().size() < 2)
            throw new UnsupportedOperationException("no other normal nodes in the ring; decommission would be pointless");
        if (operationMode != Mode.NORMAL)
            throw new UnsupportedOperationException("Node in " + operationMode + " state; wait for status to become normal or restart");
    }
    if (!isDecommissioning.compareAndSet(false, true))
        throw new IllegalStateException("Node is still decommissioning. Check nodetool netstats.");
    if (logger.isDebugEnabled())
        logger.debug("DECOMMISSIONING");
    try {
        PendingRangeCalculatorService.instance.blockUntilFinished();
        String dc = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
        if (// If we're already decommissioning there is no point checking RF/pending ranges
        operationMode != Mode.LEAVING) {
            int rf, numNodes;
            for (String keyspaceName : Schema.instance.getNonLocalStrategyKeyspaces()) {
                if (!force) {
                    Keyspace keyspace = Keyspace.open(keyspaceName);
                    if (keyspace.getReplicationStrategy() instanceof NetworkTopologyStrategy) {
                        NetworkTopologyStrategy strategy = (NetworkTopologyStrategy) keyspace.getReplicationStrategy();
                        rf = strategy.getReplicationFactor(dc).allReplicas;
                        numNodes = metadata.getTopology().getDatacenterEndpoints().get(dc).size();
                    } else {
                        numNodes = metadata.getAllEndpoints().size();
                        rf = keyspace.getReplicationStrategy().getReplicationFactor().allReplicas;
                    }
                    if (numNodes <= rf)
                        throw new UnsupportedOperationException("Not enough live nodes to maintain replication factor in keyspace " + keyspaceName + " (RF = " + rf + ", N = " + numNodes + ")." + " Perform a forceful decommission to ignore.");
                }
                // TODO: do we care about fixing transient/full self-movements here? probably
                if (tokenMetadata.getPendingRanges(keyspaceName, FBUtilities.getBroadcastAddressAndPort()).size() > 0)
                    throw new UnsupportedOperationException("data is currently moving to this node; unable to leave the ring");
            }
        }
        startLeaving();
        long timeout = Math.max(RING_DELAY, BatchlogManager.instance.getBatchlogTimeout());
        setMode(Mode.LEAVING, "sleeping " + timeout + " ms for batch processing and pending range setup", true);
        Thread.sleep(timeout);
        Runnable finishLeaving = new Runnable() {

            public void run() {
                shutdownClientServers();
                Gossiper.instance.stop();
                try {
                    MessagingService.instance().shutdown();
                } catch (IOError ioe) {
                    logger.info("failed to shutdown message service: {}", ioe);
                }
                Stage.shutdownNow();
                SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.DECOMMISSIONED);
                setMode(Mode.DECOMMISSIONED, true);
            // let op be responsible for killing the process
            }
        };
        unbootstrap(finishLeaving);
    } catch (InterruptedException e) {
        throw new UncheckedInterruptedException(e);
    } catch (ExecutionException e) {
        logger.error("Error while decommissioning node ", e.getCause());
        throw new RuntimeException("Error while decommissioning node: " + e.getCause().getMessage());
    } finally {
        isDecommissioning.set(false);
    }
}
Also used : UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) IOError(java.io.IOError) TraceKeyspace(org.apache.cassandra.tracing.TraceKeyspace) AuthKeyspace(org.apache.cassandra.auth.AuthKeyspace) SystemDistributedKeyspace(org.apache.cassandra.schema.SystemDistributedKeyspace) MigrationManager.evolveSystemKeyspace(org.apache.cassandra.schema.MigrationManager.evolveSystemKeyspace) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) ExecutionException(java.util.concurrent.ExecutionException)

Example 18 with UncheckedInterruptedException

use of org.apache.cassandra.utils.concurrent.UncheckedInterruptedException in project cassandra by apache.

the class StorageService method move.

/**
 * move the node to new token or find a new token to boot to according to load
 *
 * @param newToken new token to boot to, or if null, find balanced token to boot to
 *
 * @throws IOException on any I/O operation error
 */
private void move(Token newToken) throws IOException {
    if (newToken == null)
        throw new IOException("Can't move to the undefined (null) token.");
    if (tokenMetadata.sortedTokens().contains(newToken))
        throw new IOException("target token " + newToken + " is already owned by another node.");
    // address of the current node
    InetAddressAndPort localAddress = FBUtilities.getBroadcastAddressAndPort();
    // This doesn't make any sense in a vnodes environment.
    if (getTokenMetadata().getTokens(localAddress).size() > 1) {
        logger.error("Invalid request to move(Token); This node has more than one token and cannot be moved thusly.");
        throw new UnsupportedOperationException("This node has more than one token and cannot be moved thusly.");
    }
    List<String> keyspacesToProcess = Schema.instance.getNonLocalStrategyKeyspaces();
    PendingRangeCalculatorService.instance.blockUntilFinished();
    // checking if data is moving to this node
    for (String keyspaceName : keyspacesToProcess) {
        // TODO: do we care about fixing transient/full self-movements here?
        if (tokenMetadata.getPendingRanges(keyspaceName, localAddress).size() > 0)
            throw new UnsupportedOperationException("data is currently moving to this node; unable to leave the ring");
    }
    Gossiper.instance.addLocalApplicationState(ApplicationState.STATUS_WITH_PORT, valueFactory.moving(newToken));
    Gossiper.instance.addLocalApplicationState(ApplicationState.STATUS, valueFactory.moving(newToken));
    setMode(Mode.MOVING, String.format("Moving %s from %s to %s.", localAddress, getLocalTokens().iterator().next(), newToken), true);
    setMode(Mode.MOVING, String.format("Sleeping %s ms before start streaming/fetching ranges", RING_DELAY), true);
    Uninterruptibles.sleepUninterruptibly(RING_DELAY, MILLISECONDS);
    RangeRelocator relocator = new RangeRelocator(Collections.singleton(newToken), keyspacesToProcess, tokenMetadata);
    relocator.calculateToFromStreams();
    if (relocator.streamsNeeded()) {
        setMode(Mode.MOVING, "fetching new ranges and streaming old ranges", true);
        try {
            relocator.stream().get();
        } catch (InterruptedException e) {
            throw new UncheckedInterruptedException(e);
        } catch (ExecutionException e) {
            throw new RuntimeException("Interrupted while waiting for stream/fetch ranges to finish: " + e.getMessage());
        }
    } else {
        setMode(Mode.MOVING, "No ranges to fetch/stream", true);
    }
    // setting new token as we have everything settled
    setTokens(Collections.singleton(newToken));
    if (logger.isDebugEnabled())
        logger.debug("Successfully moved to new token {}", getLocalTokens().iterator().next());
}
Also used : UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException)

Example 19 with UncheckedInterruptedException

use of org.apache.cassandra.utils.concurrent.UncheckedInterruptedException in project cassandra by apache.

the class StorageService method prepareForBootstrap.

@VisibleForTesting
public Collection<InetAddressAndPort> prepareForBootstrap(long schemaDelay) {
    Set<InetAddressAndPort> collisions = new HashSet<>();
    if (SystemKeyspace.bootstrapInProgress())
        logger.warn("Detected previous bootstrap failure; retrying");
    else
        SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.IN_PROGRESS);
    setMode(Mode.JOINING, "waiting for ring information", true);
    waitForSchema(schemaDelay);
    setMode(Mode.JOINING, "schema complete, ready to bootstrap", true);
    setMode(Mode.JOINING, "waiting for pending range calculation", true);
    PendingRangeCalculatorService.instance.blockUntilFinished();
    setMode(Mode.JOINING, "calculation complete, ready to bootstrap", true);
    logger.debug("... got ring + schema info");
    if (useStrictConsistency && !allowSimultaneousMoves() && (tokenMetadata.getBootstrapTokens().valueSet().size() > 0 || tokenMetadata.getSizeOfLeavingEndpoints() > 0 || tokenMetadata.getSizeOfMovingEndpoints() > 0)) {
        String bootstrapTokens = StringUtils.join(tokenMetadata.getBootstrapTokens().valueSet(), ',');
        String leavingTokens = StringUtils.join(tokenMetadata.getLeavingEndpoints(), ',');
        String movingTokens = StringUtils.join(tokenMetadata.getMovingEndpoints().stream().map(e -> e.right).toArray(), ',');
        throw new UnsupportedOperationException(String.format("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while cassandra.consistent.rangemovement is true. Nodes detected, bootstrapping: %s; leaving: %s; moving: %s;", bootstrapTokens, leavingTokens, movingTokens));
    }
    // get bootstrap tokens
    if (!replacing) {
        if (tokenMetadata.isMember(FBUtilities.getBroadcastAddressAndPort())) {
            String s = "This node is already a member of the token ring; bootstrap aborted. (If replacing a dead node, remove the old one from the ring first.)";
            throw new UnsupportedOperationException(s);
        }
        setMode(Mode.JOINING, "getting bootstrap token", true);
        bootstrapTokens = BootStrapper.getBootstrapTokens(tokenMetadata, FBUtilities.getBroadcastAddressAndPort(), schemaDelay);
    } else {
        if (!isReplacingSameAddress()) {
            try {
                // Sleep additionally to make sure that the server actually is not alive
                // and giving it more time to gossip if alive.
                Thread.sleep(LoadBroadcaster.BROADCAST_INTERVAL);
            } catch (InterruptedException e) {
                throw new UncheckedInterruptedException(e);
            }
            // check for operator errors...
            for (Token token : bootstrapTokens) {
                InetAddressAndPort existing = tokenMetadata.getEndpoint(token);
                if (existing != null) {
                    long nanoDelay = schemaDelay * 1000000L;
                    if (Gossiper.instance.getEndpointStateForEndpoint(existing).getUpdateTimestamp() > (nanoTime() - nanoDelay))
                        throw new UnsupportedOperationException("Cannot replace a live node... ");
                    collisions.add(existing);
                } else {
                    throw new UnsupportedOperationException("Cannot replace token " + token + " which does not exist!");
                }
            }
        } else {
            try {
                Thread.sleep(RING_DELAY);
            } catch (InterruptedException e) {
                throw new UncheckedInterruptedException(e);
            }
        }
        setMode(Mode.JOINING, "Replacing a node with token(s): " + bootstrapTokens, true);
    }
    return collisions;
}
Also used : UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 20 with UncheckedInterruptedException

use of org.apache.cassandra.utils.concurrent.UncheckedInterruptedException in project cassandra by apache.

the class AbstractWriteResponseHandler method maybeTryAdditionalReplicas.

/**
 * Cheap Quorum backup.  If we failed to reach quorum with our initial (full) nodes, reach out to other nodes.
 */
public void maybeTryAdditionalReplicas(IMutation mutation, WritePerformer writePerformer, String localDC) {
    EndpointsForToken uncontacted = replicaPlan.liveUncontacted();
    if (uncontacted.isEmpty())
        return;
    long timeout = MAX_VALUE;
    List<ColumnFamilyStore> cfs = mutation.getTableIds().stream().map(instance::getColumnFamilyStoreInstance).collect(toList());
    for (ColumnFamilyStore cf : cfs) timeout = min(timeout, cf.additionalWriteLatencyNanos);
    // no latency information, or we're overloaded
    if (timeout > mutation.getTimeout(NANOSECONDS))
        return;
    try {
        if (!condition.await(timeout, NANOSECONDS)) {
            for (ColumnFamilyStore cf : cfs) cf.metric.additionalWrites.inc();
            writePerformer.apply(mutation, replicaPlan.withContact(uncontacted), (AbstractWriteResponseHandler<IMutation>) this, localDC);
        }
    } catch (InterruptedException e) {
        throw new UncheckedInterruptedException(e);
    }
}
Also used : EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) IMutation(org.apache.cassandra.db.IMutation) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException)

Aggregations

UncheckedInterruptedException (org.apache.cassandra.utils.concurrent.UncheckedInterruptedException)28 ExecutionException (java.util.concurrent.ExecutionException)9 TimeoutException (java.util.concurrent.TimeoutException)4 WriteTimeoutException (org.apache.cassandra.exceptions.WriteTimeoutException)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 IOException (java.io.IOException)2 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)2 ScheduledFuture (java.util.concurrent.ScheduledFuture)2 WriteFailureException (org.apache.cassandra.exceptions.WriteFailureException)2 InetAddressAndPort (org.apache.cassandra.locator.InetAddressAndPort)2 Preconditions (com.google.common.base.Preconditions)1 CacheLoader (com.google.common.cache.CacheLoader)1 Iterables (com.google.common.collect.Iterables)1 Iterables.concat (com.google.common.collect.Iterables.concat)1 Ints (com.google.common.primitives.Ints)1 Uninterruptibles (com.google.common.util.concurrent.Uninterruptibles)1 BufferedReader (java.io.BufferedReader)1 FileNotFoundException (java.io.FileNotFoundException)1 IOError (java.io.IOError)1 InputStreamReader (java.io.InputStreamReader)1