use of org.apache.cassandra.utils.concurrent.UncheckedInterruptedException in project cassandra by apache.
the class UDFunction method async.
private <T> T async(ThreadIdAndCpuTime threadIdAndCpuTime, Callable<T> callable) {
Future<T> future = executor().submit(callable);
try {
if (DatabaseDescriptor.getUserDefinedFunctionWarnTimeout() > 0)
try {
return future.get(DatabaseDescriptor.getUserDefinedFunctionWarnTimeout(), TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
// log and emit a warning that UDF execution took long
String warn = String.format("User defined function %s ran longer than %dms", this, DatabaseDescriptor.getUserDefinedFunctionWarnTimeout());
logger.warn(warn);
ClientWarn.instance.warn(warn);
}
// retry with difference of warn-timeout to fail-timeout
return future.get(DatabaseDescriptor.getUserDefinedFunctionFailTimeout() - DatabaseDescriptor.getUserDefinedFunctionWarnTimeout(), TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new UncheckedInterruptedException(e);
} catch (ExecutionException e) {
Throwable c = e.getCause();
if (c instanceof RuntimeException)
throw (RuntimeException) c;
throw new RuntimeException(c);
} catch (TimeoutException e) {
// retry a last time with the difference of UDF-fail-timeout to consumed CPU time (just in case execution hit a badly timed GC)
try {
// The threadIdAndCpuTime shouldn't take a long time to be set so this should return immediately
threadIdAndCpuTime.get(1, TimeUnit.SECONDS);
long cpuTimeMillis = threadMXBean.getThreadCpuTime(threadIdAndCpuTime.threadId) - threadIdAndCpuTime.cpuTime;
cpuTimeMillis /= 1000000L;
return future.get(Math.max(DatabaseDescriptor.getUserDefinedFunctionFailTimeout() - cpuTimeMillis, 0L), TimeUnit.MILLISECONDS);
} catch (InterruptedException e1) {
Thread.currentThread().interrupt();
throw new UncheckedInterruptedException(e1);
} catch (ExecutionException e1) {
Throwable c = e.getCause();
if (c instanceof RuntimeException)
throw (RuntimeException) c;
throw new RuntimeException(c);
} catch (TimeoutException e1) {
TimeoutException cause = new TimeoutException(String.format("User defined function %s ran longer than %dms%s", this, DatabaseDescriptor.getUserDefinedFunctionFailTimeout(), DatabaseDescriptor.getUserFunctionTimeoutPolicy() == Config.UserFunctionTimeoutPolicy.ignore ? "" : " - will stop Cassandra VM"));
FunctionExecutionException fe = FunctionExecutionException.create(this, cause);
JVMStabilityInspector.userFunctionTimeout(cause);
throw fe;
}
}
}
use of org.apache.cassandra.utils.concurrent.UncheckedInterruptedException in project cassandra by apache.
the class StorageService method decommission.
public void decommission(boolean force) throws InterruptedException {
TokenMetadata metadata = tokenMetadata.cloneAfterAllLeft();
if (operationMode != Mode.LEAVING) {
if (!tokenMetadata.isMember(FBUtilities.getBroadcastAddressAndPort()))
throw new UnsupportedOperationException("local node is not a member of the token ring yet");
if (metadata.getAllEndpoints().size() < 2)
throw new UnsupportedOperationException("no other normal nodes in the ring; decommission would be pointless");
if (operationMode != Mode.NORMAL)
throw new UnsupportedOperationException("Node in " + operationMode + " state; wait for status to become normal or restart");
}
if (!isDecommissioning.compareAndSet(false, true))
throw new IllegalStateException("Node is still decommissioning. Check nodetool netstats.");
if (logger.isDebugEnabled())
logger.debug("DECOMMISSIONING");
try {
PendingRangeCalculatorService.instance.blockUntilFinished();
String dc = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
if (// If we're already decommissioning there is no point checking RF/pending ranges
operationMode != Mode.LEAVING) {
int rf, numNodes;
for (String keyspaceName : Schema.instance.getNonLocalStrategyKeyspaces()) {
if (!force) {
Keyspace keyspace = Keyspace.open(keyspaceName);
if (keyspace.getReplicationStrategy() instanceof NetworkTopologyStrategy) {
NetworkTopologyStrategy strategy = (NetworkTopologyStrategy) keyspace.getReplicationStrategy();
rf = strategy.getReplicationFactor(dc).allReplicas;
numNodes = metadata.getTopology().getDatacenterEndpoints().get(dc).size();
} else {
numNodes = metadata.getAllEndpoints().size();
rf = keyspace.getReplicationStrategy().getReplicationFactor().allReplicas;
}
if (numNodes <= rf)
throw new UnsupportedOperationException("Not enough live nodes to maintain replication factor in keyspace " + keyspaceName + " (RF = " + rf + ", N = " + numNodes + ")." + " Perform a forceful decommission to ignore.");
}
// TODO: do we care about fixing transient/full self-movements here? probably
if (tokenMetadata.getPendingRanges(keyspaceName, FBUtilities.getBroadcastAddressAndPort()).size() > 0)
throw new UnsupportedOperationException("data is currently moving to this node; unable to leave the ring");
}
}
startLeaving();
long timeout = Math.max(RING_DELAY, BatchlogManager.instance.getBatchlogTimeout());
setMode(Mode.LEAVING, "sleeping " + timeout + " ms for batch processing and pending range setup", true);
Thread.sleep(timeout);
Runnable finishLeaving = new Runnable() {
public void run() {
shutdownClientServers();
Gossiper.instance.stop();
try {
MessagingService.instance().shutdown();
} catch (IOError ioe) {
logger.info("failed to shutdown message service: {}", ioe);
}
Stage.shutdownNow();
SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.DECOMMISSIONED);
setMode(Mode.DECOMMISSIONED, true);
// let op be responsible for killing the process
}
};
unbootstrap(finishLeaving);
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
} catch (ExecutionException e) {
logger.error("Error while decommissioning node ", e.getCause());
throw new RuntimeException("Error while decommissioning node: " + e.getCause().getMessage());
} finally {
isDecommissioning.set(false);
}
}
use of org.apache.cassandra.utils.concurrent.UncheckedInterruptedException in project cassandra by apache.
the class StorageService method move.
/**
* move the node to new token or find a new token to boot to according to load
*
* @param newToken new token to boot to, or if null, find balanced token to boot to
*
* @throws IOException on any I/O operation error
*/
private void move(Token newToken) throws IOException {
if (newToken == null)
throw new IOException("Can't move to the undefined (null) token.");
if (tokenMetadata.sortedTokens().contains(newToken))
throw new IOException("target token " + newToken + " is already owned by another node.");
// address of the current node
InetAddressAndPort localAddress = FBUtilities.getBroadcastAddressAndPort();
// This doesn't make any sense in a vnodes environment.
if (getTokenMetadata().getTokens(localAddress).size() > 1) {
logger.error("Invalid request to move(Token); This node has more than one token and cannot be moved thusly.");
throw new UnsupportedOperationException("This node has more than one token and cannot be moved thusly.");
}
List<String> keyspacesToProcess = Schema.instance.getNonLocalStrategyKeyspaces();
PendingRangeCalculatorService.instance.blockUntilFinished();
// checking if data is moving to this node
for (String keyspaceName : keyspacesToProcess) {
// TODO: do we care about fixing transient/full self-movements here?
if (tokenMetadata.getPendingRanges(keyspaceName, localAddress).size() > 0)
throw new UnsupportedOperationException("data is currently moving to this node; unable to leave the ring");
}
Gossiper.instance.addLocalApplicationState(ApplicationState.STATUS_WITH_PORT, valueFactory.moving(newToken));
Gossiper.instance.addLocalApplicationState(ApplicationState.STATUS, valueFactory.moving(newToken));
setMode(Mode.MOVING, String.format("Moving %s from %s to %s.", localAddress, getLocalTokens().iterator().next(), newToken), true);
setMode(Mode.MOVING, String.format("Sleeping %s ms before start streaming/fetching ranges", RING_DELAY), true);
Uninterruptibles.sleepUninterruptibly(RING_DELAY, MILLISECONDS);
RangeRelocator relocator = new RangeRelocator(Collections.singleton(newToken), keyspacesToProcess, tokenMetadata);
relocator.calculateToFromStreams();
if (relocator.streamsNeeded()) {
setMode(Mode.MOVING, "fetching new ranges and streaming old ranges", true);
try {
relocator.stream().get();
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
} catch (ExecutionException e) {
throw new RuntimeException("Interrupted while waiting for stream/fetch ranges to finish: " + e.getMessage());
}
} else {
setMode(Mode.MOVING, "No ranges to fetch/stream", true);
}
// setting new token as we have everything settled
setTokens(Collections.singleton(newToken));
if (logger.isDebugEnabled())
logger.debug("Successfully moved to new token {}", getLocalTokens().iterator().next());
}
use of org.apache.cassandra.utils.concurrent.UncheckedInterruptedException in project cassandra by apache.
the class StorageService method prepareForBootstrap.
@VisibleForTesting
public Collection<InetAddressAndPort> prepareForBootstrap(long schemaDelay) {
Set<InetAddressAndPort> collisions = new HashSet<>();
if (SystemKeyspace.bootstrapInProgress())
logger.warn("Detected previous bootstrap failure; retrying");
else
SystemKeyspace.setBootstrapState(SystemKeyspace.BootstrapState.IN_PROGRESS);
setMode(Mode.JOINING, "waiting for ring information", true);
waitForSchema(schemaDelay);
setMode(Mode.JOINING, "schema complete, ready to bootstrap", true);
setMode(Mode.JOINING, "waiting for pending range calculation", true);
PendingRangeCalculatorService.instance.blockUntilFinished();
setMode(Mode.JOINING, "calculation complete, ready to bootstrap", true);
logger.debug("... got ring + schema info");
if (useStrictConsistency && !allowSimultaneousMoves() && (tokenMetadata.getBootstrapTokens().valueSet().size() > 0 || tokenMetadata.getSizeOfLeavingEndpoints() > 0 || tokenMetadata.getSizeOfMovingEndpoints() > 0)) {
String bootstrapTokens = StringUtils.join(tokenMetadata.getBootstrapTokens().valueSet(), ',');
String leavingTokens = StringUtils.join(tokenMetadata.getLeavingEndpoints(), ',');
String movingTokens = StringUtils.join(tokenMetadata.getMovingEndpoints().stream().map(e -> e.right).toArray(), ',');
throw new UnsupportedOperationException(String.format("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while cassandra.consistent.rangemovement is true. Nodes detected, bootstrapping: %s; leaving: %s; moving: %s;", bootstrapTokens, leavingTokens, movingTokens));
}
// get bootstrap tokens
if (!replacing) {
if (tokenMetadata.isMember(FBUtilities.getBroadcastAddressAndPort())) {
String s = "This node is already a member of the token ring; bootstrap aborted. (If replacing a dead node, remove the old one from the ring first.)";
throw new UnsupportedOperationException(s);
}
setMode(Mode.JOINING, "getting bootstrap token", true);
bootstrapTokens = BootStrapper.getBootstrapTokens(tokenMetadata, FBUtilities.getBroadcastAddressAndPort(), schemaDelay);
} else {
if (!isReplacingSameAddress()) {
try {
// Sleep additionally to make sure that the server actually is not alive
// and giving it more time to gossip if alive.
Thread.sleep(LoadBroadcaster.BROADCAST_INTERVAL);
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
}
// check for operator errors...
for (Token token : bootstrapTokens) {
InetAddressAndPort existing = tokenMetadata.getEndpoint(token);
if (existing != null) {
long nanoDelay = schemaDelay * 1000000L;
if (Gossiper.instance.getEndpointStateForEndpoint(existing).getUpdateTimestamp() > (nanoTime() - nanoDelay))
throw new UnsupportedOperationException("Cannot replace a live node... ");
collisions.add(existing);
} else {
throw new UnsupportedOperationException("Cannot replace token " + token + " which does not exist!");
}
}
} else {
try {
Thread.sleep(RING_DELAY);
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
}
}
setMode(Mode.JOINING, "Replacing a node with token(s): " + bootstrapTokens, true);
}
return collisions;
}
use of org.apache.cassandra.utils.concurrent.UncheckedInterruptedException in project cassandra by apache.
the class AbstractWriteResponseHandler method maybeTryAdditionalReplicas.
/**
* Cheap Quorum backup. If we failed to reach quorum with our initial (full) nodes, reach out to other nodes.
*/
public void maybeTryAdditionalReplicas(IMutation mutation, WritePerformer writePerformer, String localDC) {
EndpointsForToken uncontacted = replicaPlan.liveUncontacted();
if (uncontacted.isEmpty())
return;
long timeout = MAX_VALUE;
List<ColumnFamilyStore> cfs = mutation.getTableIds().stream().map(instance::getColumnFamilyStoreInstance).collect(toList());
for (ColumnFamilyStore cf : cfs) timeout = min(timeout, cf.additionalWriteLatencyNanos);
// no latency information, or we're overloaded
if (timeout > mutation.getTimeout(NANOSECONDS))
return;
try {
if (!condition.await(timeout, NANOSECONDS)) {
for (ColumnFamilyStore cf : cfs) cf.metric.additionalWrites.inc();
writePerformer.apply(mutation, replicaPlan.withContact(uncontacted), (AbstractWriteResponseHandler<IMutation>) this, localDC);
}
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
}
}
Aggregations