use of voldemort.store.readonly.swapper.FailedFetchLock in project voldemort by voldemort.
the class AdminServiceRequestHandler method handleFetchFailure.
private Message handleFetchFailure(VAdminProto.HandleFetchFailureRequest handleFetchFailure) {
String storeName = handleFetchFailure.getStoreName();
long pushVersion = handleFetchFailure.getPushVersion();
String extraInfo = handleFetchFailure.getInfo();
Properties extraInfoProperties = new Properties();
try {
extraInfoProperties.load(new StringReader(extraInfo));
} catch (IOException e) {
logger.error("Got IOException while trying to decipher a HandleFetchFailureRequest's info.", e);
}
logger.info("Received HandleFetchFailureRequest:\n" + "\tstore_name: " + storeName + "\n" + "\tpush_version: " + pushVersion + "\n" + "\tinfo: " + extraInfoProperties.toString());
VAdminProto.HandleFetchFailureResponse.Builder response = VAdminProto.HandleFetchFailureResponse.newBuilder();
AdminClient adminClient = AdminClient.createTempAdminClient(voldemortConfig, metadataStore.getCluster(), 1);
try {
// Get replica.factor for current store
StoreDefinition storeDef = adminClient.metadataMgmtOps.getStoreDefinition(storeName);
if (null == storeDef) {
throw new StoreNotFoundException(storeName);
}
int replicaFactor = storeDef.getReplicationFactor();
int maxNodeFailure = voldemortConfig.getHighAvailabilityPushMaxNodeFailures();
// Considering replicaFactor could be smaller than maxNodeFailure configured in cluster level,
// we need to compare the node failure number with the smaller number of (RF - 1, maxNodeFailure)
// to make sure there is at least one replica running.
maxNodeFailure = Math.min(maxNodeFailure, replicaFactor - 1);
Set<Integer> nodesFailedInThisFetch = Sets.newHashSet(handleFetchFailure.getFailedNodesList());
int failureCount = nodesFailedInThisFetch.size();
boolean swapIsPossible = false;
String responseMessage = "";
if (failureCount > maxNodeFailure) {
// Too many nodes failed to tolerate this strategy... let's bail out.
responseMessage = "We cannot use pushHighAvailability because there is more than " + maxNodeFailure + " nodes that failed their fetches and build.replica.factor is " + replicaFactor + "...";
logger.error(responseMessage);
} else {
FailedFetchLock distributedLock = null;
try {
distributedLock = FailedFetchLock.getLock(voldemortConfig, new Props(extraInfoProperties));
distributedLock.acquireLock();
Set<Integer> alreadyDisabledNodes = distributedLock.getDisabledNodes();
Set<Integer> allNodesToBeDisabled = Sets.newHashSet();
allNodesToBeDisabled.addAll(alreadyDisabledNodes);
allNodesToBeDisabled.addAll(nodesFailedInThisFetch);
int disabledNodeSize = allNodesToBeDisabled.size();
if (disabledNodeSize > maxNodeFailure) {
// Too many exceptions to tolerate this strategy... let's bail out.
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append("We cannot use pushHighAvailability because it would bring the total ");
stringBuilder.append("number of nodes with disabled stores to more than ");
stringBuilder.append(maxNodeFailure);
stringBuilder.append("... alreadyDisabledNodes: [");
boolean firstItem = true;
for (Integer nodeId : alreadyDisabledNodes) {
if (firstItem) {
firstItem = false;
} else {
stringBuilder.append(", ");
}
stringBuilder.append(nodeId);
}
stringBuilder.append("], nodesFailedInThisFetch: [");
firstItem = true;
for (Integer nodeId : nodesFailedInThisFetch) {
if (firstItem) {
firstItem = false;
} else {
stringBuilder.append(", ");
}
stringBuilder.append(nodeId);
}
stringBuilder.append("]");
stringBuilder.append(", and build.replica.factor is ").append(replicaFactor);
responseMessage = stringBuilder.toString();
logger.error(responseMessage);
} else {
String nodesString = "node";
if (nodesFailedInThisFetch.size() > 1) {
// Good grammar is important son
nodesString += "s";
}
nodesString += " [";
boolean firstNode = true;
for (Integer nodeId : nodesFailedInThisFetch) {
logger.warn("Will disable store '" + storeName + "' on node " + nodeId);
distributedLock.addDisabledNode(nodeId, storeName, pushVersion);
logger.warn("Store '" + storeName + "' is disabled on node " + nodeId);
if (firstNode) {
firstNode = false;
} else {
nodesString += ", ";
}
nodesString += nodeId;
response.addDisableStoreResponses(adminClient.readonlyOps.disableStoreVersion(nodeId, storeName, pushVersion, extraInfo));
}
nodesString += "]";
swapIsPossible = true;
responseMessage = "Swap will be possible even though " + nodesString + " failed to fetch.";
logger.info(responseMessage);
}
} catch (ClassNotFoundException e) {
String logMessage = "Failed to find requested FailedFetchLock implementation while setting up pushHighAvailability. ";
logger.error(responseMessage, e);
responseMessage = logMessage + "\n" + ExceptionUtils.stackTraceToString(e);
} catch (Exception e) {
String logMessage = "Got exception while trying to execute pushHighAvailability. ";
logger.error(responseMessage, e);
responseMessage = logMessage + "\n" + ExceptionUtils.stackTraceToString(e);
} finally {
if (distributedLock != null) {
try {
distributedLock.releaseLock();
} catch (Exception e) {
logger.error("Error while trying to release the shared lock used for pushHighAvailability!", e);
} finally {
try {
distributedLock.close();
} catch (Exception inception) {
logger.error("Error while trying to close the shared lock used for pushHighAvailability!", inception);
}
}
}
}
}
response.setSwapIsPossible(swapIsPossible);
response.setInfo(responseMessage);
} finally {
adminClient.close();
}
return response.build();
}
use of voldemort.store.readonly.swapper.FailedFetchLock in project voldemort by voldemort.
the class VoldemortServer method validateReadOnlyStoreStatusBeforeGoingOnline.
private ReadOnlyStoreStatusValidation validateReadOnlyStoreStatusBeforeGoingOnline() {
List<StorageEngine<ByteArray, byte[], byte[]>> storageEngines = storageService.getStoreRepository().getStorageEnginesByClass(ReadOnlyStorageEngine.class);
if (storageEngines.isEmpty()) {
logger.debug("There are no Read-Only stores on this node.");
return new ReadOnlyStoreStatusValidation(true, null);
} else {
List<String> storesWithDisabledVersions = Lists.newArrayList();
for (StorageEngine storageEngine : storageEngines) {
StoreVersionManager storeVersionManager = (StoreVersionManager) storageEngine.getCapability(StoreCapabilityType.DISABLE_STORE_VERSION);
if (storeVersionManager.hasAnyDisabledVersion()) {
storesWithDisabledVersions.add(storageEngine.getName());
}
}
if (storesWithDisabledVersions.isEmpty()) {
if (voldemortConfig.getHighAvailabilityStateAutoCleanUp()) {
logger.info(VoldemortConfig.PUSH_HA_STATE_AUTO_CLEANUP + "=true, so the server will attempt to delete the HA state for this node, if any.");
FailedFetchLock failedFetchLock = null;
try {
failedFetchLock = FailedFetchLock.getLock(getVoldemortConfig(), new Props());
failedFetchLock.removeObsoleteStateForNode(getVoldemortConfig().getNodeId());
logger.info("Successfully ensured that the BnP HA shared state is cleared for this node.");
} catch (ClassNotFoundException e) {
return new ReadOnlyStoreStatusValidation(true, new VoldemortException("Failed to find FailedFetchLock class!", e));
} catch (Exception e) {
return new ReadOnlyStoreStatusValidation(true, new VoldemortException("Exception while trying to remove obsolete HA state!", e));
} finally {
IOUtils.closeQuietly(failedFetchLock);
}
} else {
logger.info(VoldemortConfig.PUSH_HA_STATE_AUTO_CLEANUP + "=false, so the server will NOT attempt to delete the HA state for this node, if any.");
}
logger.info("No Read-Only stores are disabled. Going online as planned.");
return new ReadOnlyStoreStatusValidation(true, null);
} else {
// OMG, there are disabled stores!
StringBuilder stringBuilder = new StringBuilder();
stringBuilder.append("Cannot go online, because the following Read-Only stores have some disabled version(s): ");
boolean firstItem = true;
for (String storeName : storesWithDisabledVersions) {
if (firstItem) {
firstItem = false;
} else {
stringBuilder.append(", ");
}
stringBuilder.append(storeName);
}
return new ReadOnlyStoreStatusValidation(false, new VoldemortException(stringBuilder.toString()));
}
}
}
use of voldemort.store.readonly.swapper.FailedFetchLock in project voldemort by voldemort.
the class StoreVersionManager method removeVersion.
private void removeVersion(long version, boolean alsoSyncRemoteState) {
if (currentVersion == version) {
// Should we throw instead?
currentVersion = -1;
}
versionToEnabledMap.remove(version);
if (alsoSyncRemoteState && config != null && config.getHighAvailabilityStateAutoCleanUp()) {
FailedFetchLock failedFetchLock = null;
try {
failedFetchLock = FailedFetchLock.getLock(config, new Props());
removeRemoteObsoleteState(failedFetchLock);
} catch (Exception e) {
logger.error("Failed to execute failedFetchLock.removeObsoleteStateForStore() for store " + storeName, e);
} finally {
IOUtils.closeQuietly(failedFetchLock);
}
}
}
Aggregations