use of voldemort.store.UnreachableStoreException in project voldemort by voldemort.
the class ServerTestUtils method waitForServerStart.
/**
* Test if socket connection is available on the node
*
*
* @param socketStoreFactory
* @param node
*/
public static void waitForServerStart(SocketStoreFactory socketStoreFactory, Node node) {
boolean success = false;
UnreachableStoreException exception = null;
int retries = 10;
Store<ByteArray, ?, ?> store = null;
while (retries-- > 0 && !success) {
store = ServerTestUtils.getSocketStore(socketStoreFactory, MetadataStore.METADATA_STORE_NAME, node.getSocketPort());
try {
store.get(new ByteArray(MetadataStore.CLUSTER_KEY.getBytes()), null);
success = true;
} catch (UnreachableStoreException e) {
System.out.println("UnreachableSocketStore sleeping will try again " + retries + " times.");
exception = e;
try {
Thread.sleep(1000);
} catch (InterruptedException e1) {
// ignore
}
} finally {
store.close();
store = null;
}
}
if (!success)
throw exception;
}
use of voldemort.store.UnreachableStoreException in project voldemort by voldemort.
the class AdminToolUtils method assertServerState.
/**
* Checks if nodes are in a given {@link VoldemortState}. Can also be
* used to ensure that nodes are NOT in a given {@link VoldemortState}.
*
* Either way, throws an exception if any node isn't as expected.
*
* @param adminClient An instance of AdminClient points to given cluster
* @param nodeIds List of node ids to be checked
* @param stateToCheck state to be verified
* @param serverMustBeInThisState - if true, function will throw if any
* server is NOT in the stateToCheck
* - if false, function will throw if any
* server IS in the stateToCheck
* @throws VoldemortException if any node doesn't conform to the required state
*/
private static void assertServerState(AdminClient adminClient, Collection<Integer> nodeIds, VoldemortState stateToCheck, boolean serverMustBeInThisState) {
for (Integer nodeId : nodeIds) {
String nodeName = adminClient.getAdminClientCluster().getNodeById(nodeId).briefToString();
try {
Versioned<String> versioned = adminClient.metadataMgmtOps.getRemoteMetadata(nodeId, MetadataStore.SERVER_STATE_KEY);
VoldemortState state = VoldemortState.valueOf(versioned.getValue());
if (state.equals(stateToCheck) != serverMustBeInThisState) {
throw new VoldemortException("Cannot execute admin operation: " + nodeName + " is " + (serverMustBeInThisState ? "not in " : "in ") + stateToCheck.name() + " state.");
}
} catch (UnreachableStoreException e) {
System.err.println("Cannot verify the server state of " + nodeName + " because it is unreachable. Skipping.");
}
}
}
use of voldemort.store.UnreachableStoreException in project voldemort by voldemort.
the class AsyncRecoveryFailureDetector method run.
public void run() {
long asyncRecoveryInterval = getConfig().getAsyncRecoveryInterval();
while (!Thread.currentThread().isInterrupted() && isRunning) {
try {
if (logger.isDebugEnabled()) {
logger.debug("Sleeping for " + asyncRecoveryInterval + " ms before checking node availability");
}
getConfig().getTime().sleep(asyncRecoveryInterval);
} catch (InterruptedException e) {
if (logger.isDebugEnabled()) {
logger.debug("InterruptedException while sleeping " + asyncRecoveryInterval + " ms before checking node availability", e);
}
break;
}
for (Node node : getConfig().getCluster().getNodes()) {
if (isAvailable(node))
continue;
if (logger.isDebugEnabled())
logger.debug("Checking previously unavailable node " + node.getId());
ConnectionVerifier verifier = getConfig().getConnectionVerifier();
try {
// This is our test.
if (logger.isDebugEnabled())
logger.debug("Verifying previously unavailable node " + node.getId());
verifier.verifyConnection(node);
if (logger.isDebugEnabled())
logger.debug("Verified previously unavailable node " + node.getId() + "is now available.");
nodeRecovered(node);
} catch (UnreachableStoreException e) {
if (logger.isDebugEnabled()) {
logger.debug("Node " + node.getId() + " still unavailable due to UnreachableStoreException", e);
}
} catch (Exception e) {
if (logger.isEnabledFor(Level.ERROR))
logger.error("Node " + node.getId() + " unavailable due to error", e);
}
}
}
}
use of voldemort.store.UnreachableStoreException in project voldemort by voldemort.
the class PerformParallelDeleteRequests method handleException.
/**
*
* @param response
* @param pipeline
* @param isParallel
* @return true if it is a terminal error, false otherwise
*/
private boolean handleException(Response<ByteArray, Object> response, Pipeline pipeline) {
Node node = response.getNode();
Exception ex = null;
if (!(response.getValue() instanceof Exception)) {
return false;
}
ex = (Exception) response.getValue();
if (enableHintedHandoff) {
if (ex instanceof UnreachableStoreException || ex instanceof QuotaExceededException) {
Slop slop = new Slop(pipelineData.getStoreName(), Slop.Operation.DELETE, key, null, null, node.getId(), new Date());
if (isOperationCompleted.get() == false) {
hintedHandoffAction.rememberSlopForLaterEvent(node, slop);
} else if (isDeleteSuccessful.get() == true) {
hintedHandoff.sendHintParallel(node, version, slop);
}
}
}
if (ex instanceof ObsoleteVersionException) {
// able to write on this node and should be termed as clean success.
return false;
} else if (ex instanceof QuotaExceededException) {
// QuotaException silently as well
return false;
}
// responses below.
if (ex instanceof InvalidMetadataException && isOperationCompleted.get()) {
pipelineData.reportException(ex);
if (logger.isInfoEnabled()) {
logger.info("Received invalid metadata problem after a successful " + pipeline.getOperation().getSimpleName() + " call on node " + node.getId() + ", store '" + pipelineData.getStoreName() + "'");
}
} else {
return handleResponseError(response, pipeline, failureDetector);
}
return false;
}
use of voldemort.store.UnreachableStoreException in project voldemort by voldemort.
the class ClientRequestExecutorFactory method createAsync.
/**
* Create a ClientRequestExecutor for the given {@link SocketDestination}.
*
* @param dest {@link SocketDestination}
*/
@Override
public void createAsync(final SocketDestination dest, final KeyedResourcePool<SocketDestination, ClientRequestExecutor> pool) throws Exception {
int numCreated = created.incrementAndGet();
if (logger.isDebugEnabled())
logger.debug("Creating socket " + numCreated + " for " + dest.getHost() + ":" + dest.getPort() + " using protocol " + dest.getRequestFormatType().getCode());
SocketChannel socketChannel = null;
ClientRequestExecutor clientRequestExecutor = null;
long durationMs = 0;
try {
socketChannel = SocketChannel.open();
socketChannel.socket().setReceiveBufferSize(this.socketBufferSize);
socketChannel.socket().setSendBufferSize(this.socketBufferSize);
socketChannel.socket().setTcpNoDelay(true);
socketChannel.socket().setSoTimeout(soTimeoutMs);
socketChannel.socket().setKeepAlive(this.socketKeepAlive);
socketChannel.configureBlocking(false);
socketChannel.connect(new InetSocketAddress(dest.getHost(), dest.getPort()));
if (logger.isDebugEnabled()) {
logger.debug("Created socket " + numCreated + " for " + dest.getHost() + ":" + dest.getPort() + " using protocol " + dest.getRequestFormatType().getCode() + " after " + durationMs + " ms.");
}
ClientRequestSelectorManager selectorManager = selectorManagers[counter.getAndIncrement() % selectorManagers.length];
Selector selector = selectorManager.getSelector();
clientRequestExecutor = new ClientRequestExecutor(selector, socketChannel, socketBufferSize, idleConnectionTimeoutNs, dest);
int timeoutMs = this.getTimeout();
ProtocolNegotiatorClientRequest protocolRequest = new ProtocolNegotiatorClientRequest(dest.getRequestFormatType());
NonblockingStoreCallback callback = new NonblockingStoreCallback() {
@Override
public void requestComplete(Object result, long requestTime) {
if (result instanceof Exception) {
Exception e = (Exception) result;
/*
* There are 2 places where we can get a store timeout
* Exception
*
* 1) While doing connect - the machine was up once, but
* not anymore. In that case, TCP SYN will be sent by
* the client, but server would not sent TCP ACK as it
* is dead.
*
* 2) After connect doing Protocol Negotiation - Most
* likely the server and kernel is up, but the process
* is in a zombie state because of hard drive failure or
* stuck in shutdown or doing a GC. This can be
* intermittent or hard failures. Before this code
* change, if the process entered this state, Voldemort
* clients may not detect the failure immediately. They
* are treated as normal errors, instead of catastrophic
* erros.This was the reason before it is better to kill
* the process on a machine and let the machine stay up.
* After this code change they will be treated as
* connection failures ( catastrophic errors) to help
* recover the clients faster.
*
* The second case can increase the false positives, but
* if a server is consistently timing out it is better
* to treat the server as dead and let the clients
* recover faster.
*/
if (e instanceof StoreTimeoutException) {
e = new UnreachableStoreException("Error establishing connection for destination " + dest, new ConnectException(e.getMessage()));
}
if (logger.isDebugEnabled()) {
logger.debug("Reporting exception to pool " + e.getClass() + " for destination " + dest);
}
pool.reportException(dest, e);
}
}
};
NonblockingStoreCallbackClientRequest<String> clientRequest = new NonblockingStoreCallbackClientRequest<String>(pool, dest, protocolRequest, clientRequestExecutor, callback, stats);
clientRequestExecutor.setConnectRequest(clientRequest, timeoutMs);
selectorManager.add(clientRequestExecutor);
selector.wakeup();
} catch (Exception e) {
// Make sure not to leak socketChannels
if (socketChannel != null) {
try {
socketChannel.close();
} catch (Exception ex) {
if (logger.isEnabledFor(Level.WARN))
logger.warn(ex, ex);
}
}
throw UnreachableStoreException.wrap("Error establishing connection for destination " + dest, e);
}
if (stats != null) {
stats.incrementCount(dest, ClientSocketStats.Tracked.CONNECTION_CREATED_EVENT);
stats.recordConnectionEstablishmentTimeUs(dest, durationMs * Time.US_PER_MS);
}
}
Aggregations