Search in sources :

Example 1 with FailedFetchLock

use of voldemort.store.readonly.swapper.FailedFetchLock in project voldemort by voldemort.

the class AdminServiceRequestHandler method handleFetchFailure.

private Message handleFetchFailure(VAdminProto.HandleFetchFailureRequest handleFetchFailure) {
    String storeName = handleFetchFailure.getStoreName();
    long pushVersion = handleFetchFailure.getPushVersion();
    String extraInfo = handleFetchFailure.getInfo();
    Properties extraInfoProperties = new Properties();
    try {
        extraInfoProperties.load(new StringReader(extraInfo));
    } catch (IOException e) {
        logger.error("Got IOException while trying to decipher a HandleFetchFailureRequest's info.", e);
    }
    logger.info("Received HandleFetchFailureRequest:\n" + "\tstore_name: " + storeName + "\n" + "\tpush_version: " + pushVersion + "\n" + "\tinfo: " + extraInfoProperties.toString());
    VAdminProto.HandleFetchFailureResponse.Builder response = VAdminProto.HandleFetchFailureResponse.newBuilder();
    AdminClient adminClient = AdminClient.createTempAdminClient(voldemortConfig, metadataStore.getCluster(), 1);
    try {
        // Get replica.factor for current store
        StoreDefinition storeDef = adminClient.metadataMgmtOps.getStoreDefinition(storeName);
        if (null == storeDef) {
            throw new StoreNotFoundException(storeName);
        }
        int replicaFactor = storeDef.getReplicationFactor();
        int maxNodeFailure = voldemortConfig.getHighAvailabilityPushMaxNodeFailures();
        // Considering replicaFactor could be smaller than maxNodeFailure configured in cluster level,
        // we need to compare the node failure number with the smaller number of (RF - 1, maxNodeFailure)
        // to make sure there is at least one replica running.
        maxNodeFailure = Math.min(maxNodeFailure, replicaFactor - 1);
        Set<Integer> nodesFailedInThisFetch = Sets.newHashSet(handleFetchFailure.getFailedNodesList());
        int failureCount = nodesFailedInThisFetch.size();
        boolean swapIsPossible = false;
        String responseMessage = "";
        if (failureCount > maxNodeFailure) {
            // Too many nodes failed to tolerate this strategy... let's bail out.
            responseMessage = "We cannot use pushHighAvailability because there is more than " + maxNodeFailure + " nodes that failed their fetches and build.replica.factor is " + replicaFactor + "...";
            logger.error(responseMessage);
        } else {
            FailedFetchLock distributedLock = null;
            try {
                distributedLock = FailedFetchLock.getLock(voldemortConfig, new Props(extraInfoProperties));
                distributedLock.acquireLock();
                Set<Integer> alreadyDisabledNodes = distributedLock.getDisabledNodes();
                Set<Integer> allNodesToBeDisabled = Sets.newHashSet();
                allNodesToBeDisabled.addAll(alreadyDisabledNodes);
                allNodesToBeDisabled.addAll(nodesFailedInThisFetch);
                int disabledNodeSize = allNodesToBeDisabled.size();
                if (disabledNodeSize > maxNodeFailure) {
                    // Too many exceptions to tolerate this strategy... let's bail out.
                    StringBuilder stringBuilder = new StringBuilder();
                    stringBuilder.append("We cannot use pushHighAvailability because it would bring the total ");
                    stringBuilder.append("number of nodes with disabled stores to more than ");
                    stringBuilder.append(maxNodeFailure);
                    stringBuilder.append("... alreadyDisabledNodes: [");
                    boolean firstItem = true;
                    for (Integer nodeId : alreadyDisabledNodes) {
                        if (firstItem) {
                            firstItem = false;
                        } else {
                            stringBuilder.append(", ");
                        }
                        stringBuilder.append(nodeId);
                    }
                    stringBuilder.append("], nodesFailedInThisFetch: [");
                    firstItem = true;
                    for (Integer nodeId : nodesFailedInThisFetch) {
                        if (firstItem) {
                            firstItem = false;
                        } else {
                            stringBuilder.append(", ");
                        }
                        stringBuilder.append(nodeId);
                    }
                    stringBuilder.append("]");
                    stringBuilder.append(", and build.replica.factor is ").append(replicaFactor);
                    responseMessage = stringBuilder.toString();
                    logger.error(responseMessage);
                } else {
                    String nodesString = "node";
                    if (nodesFailedInThisFetch.size() > 1) {
                        // Good grammar is important son
                        nodesString += "s";
                    }
                    nodesString += " [";
                    boolean firstNode = true;
                    for (Integer nodeId : nodesFailedInThisFetch) {
                        logger.warn("Will disable store '" + storeName + "' on node " + nodeId);
                        distributedLock.addDisabledNode(nodeId, storeName, pushVersion);
                        logger.warn("Store '" + storeName + "' is disabled on node " + nodeId);
                        if (firstNode) {
                            firstNode = false;
                        } else {
                            nodesString += ", ";
                        }
                        nodesString += nodeId;
                        response.addDisableStoreResponses(adminClient.readonlyOps.disableStoreVersion(nodeId, storeName, pushVersion, extraInfo));
                    }
                    nodesString += "]";
                    swapIsPossible = true;
                    responseMessage = "Swap will be possible even though " + nodesString + " failed to fetch.";
                    logger.info(responseMessage);
                }
            } catch (ClassNotFoundException e) {
                String logMessage = "Failed to find requested FailedFetchLock implementation while setting up pushHighAvailability. ";
                logger.error(responseMessage, e);
                responseMessage = logMessage + "\n" + ExceptionUtils.stackTraceToString(e);
            } catch (Exception e) {
                String logMessage = "Got exception while trying to execute pushHighAvailability. ";
                logger.error(responseMessage, e);
                responseMessage = logMessage + "\n" + ExceptionUtils.stackTraceToString(e);
            } finally {
                if (distributedLock != null) {
                    try {
                        distributedLock.releaseLock();
                    } catch (Exception e) {
                        logger.error("Error while trying to release the shared lock used for pushHighAvailability!", e);
                    } finally {
                        try {
                            distributedLock.close();
                        } catch (Exception inception) {
                            logger.error("Error while trying to close the shared lock used for pushHighAvailability!", inception);
                        }
                    }
                }
            }
        }
        response.setSwapIsPossible(swapIsPossible);
        response.setInfo(responseMessage);
    } finally {
        adminClient.close();
    }
    return response.build();
}
Also used : IOException(java.io.IOException) Properties(java.util.Properties) Props(voldemort.utils.Props) NoSuchCapabilityException(voldemort.store.NoSuchCapabilityException) ConfigurationException(voldemort.utils.ConfigurationException) ObsoleteVersionException(voldemort.versioning.ObsoleteVersionException) StoreOperationFailureException(voldemort.store.StoreOperationFailureException) VoldemortException(voldemort.VoldemortException) IOException(java.io.IOException) PersistenceFailureException(voldemort.store.PersistenceFailureException) StoreNotFoundException(voldemort.store.StoreNotFoundException) StoreNotFoundException(voldemort.store.StoreNotFoundException) StoreDefinition(voldemort.store.StoreDefinition) StringReader(java.io.StringReader) FailedFetchLock(voldemort.store.readonly.swapper.FailedFetchLock) AdminClient(voldemort.client.protocol.admin.AdminClient)

Example 2 with FailedFetchLock

use of voldemort.store.readonly.swapper.FailedFetchLock in project voldemort by voldemort.

the class VoldemortServer method validateReadOnlyStoreStatusBeforeGoingOnline.

private ReadOnlyStoreStatusValidation validateReadOnlyStoreStatusBeforeGoingOnline() {
    List<StorageEngine<ByteArray, byte[], byte[]>> storageEngines = storageService.getStoreRepository().getStorageEnginesByClass(ReadOnlyStorageEngine.class);
    if (storageEngines.isEmpty()) {
        logger.debug("There are no Read-Only stores on this node.");
        return new ReadOnlyStoreStatusValidation(true, null);
    } else {
        List<String> storesWithDisabledVersions = Lists.newArrayList();
        for (StorageEngine storageEngine : storageEngines) {
            StoreVersionManager storeVersionManager = (StoreVersionManager) storageEngine.getCapability(StoreCapabilityType.DISABLE_STORE_VERSION);
            if (storeVersionManager.hasAnyDisabledVersion()) {
                storesWithDisabledVersions.add(storageEngine.getName());
            }
        }
        if (storesWithDisabledVersions.isEmpty()) {
            if (voldemortConfig.getHighAvailabilityStateAutoCleanUp()) {
                logger.info(VoldemortConfig.PUSH_HA_STATE_AUTO_CLEANUP + "=true, so the server will attempt to delete the HA state for this node, if any.");
                FailedFetchLock failedFetchLock = null;
                try {
                    failedFetchLock = FailedFetchLock.getLock(getVoldemortConfig(), new Props());
                    failedFetchLock.removeObsoleteStateForNode(getVoldemortConfig().getNodeId());
                    logger.info("Successfully ensured that the BnP HA shared state is cleared for this node.");
                } catch (ClassNotFoundException e) {
                    return new ReadOnlyStoreStatusValidation(true, new VoldemortException("Failed to find FailedFetchLock class!", e));
                } catch (Exception e) {
                    return new ReadOnlyStoreStatusValidation(true, new VoldemortException("Exception while trying to remove obsolete HA state!", e));
                } finally {
                    IOUtils.closeQuietly(failedFetchLock);
                }
            } else {
                logger.info(VoldemortConfig.PUSH_HA_STATE_AUTO_CLEANUP + "=false, so the server will NOT attempt to delete the HA state for this node, if any.");
            }
            logger.info("No Read-Only stores are disabled. Going online as planned.");
            return new ReadOnlyStoreStatusValidation(true, null);
        } else {
            // OMG, there are disabled stores!
            StringBuilder stringBuilder = new StringBuilder();
            stringBuilder.append("Cannot go online, because the following Read-Only stores have some disabled version(s): ");
            boolean firstItem = true;
            for (String storeName : storesWithDisabledVersions) {
                if (firstItem) {
                    firstItem = false;
                } else {
                    stringBuilder.append(", ");
                }
                stringBuilder.append(storeName);
            }
            return new ReadOnlyStoreStatusValidation(false, new VoldemortException(stringBuilder.toString()));
        }
    }
}
Also used : StoreVersionManager(voldemort.store.readonly.StoreVersionManager) StorageEngine(voldemort.store.StorageEngine) ConfigurationStorageEngine(voldemort.store.configuration.ConfigurationStorageEngine) ReadOnlyStorageEngine(voldemort.store.readonly.ReadOnlyStorageEngine) Props(voldemort.utils.Props) VoldemortException(voldemort.VoldemortException) DisabledStoreException(voldemort.store.DisabledStoreException) VoldemortException(voldemort.VoldemortException) VoldemortApplicationException(voldemort.VoldemortApplicationException) UnknownHostException(java.net.UnknownHostException) FailedFetchLock(voldemort.store.readonly.swapper.FailedFetchLock)

Example 3 with FailedFetchLock

use of voldemort.store.readonly.swapper.FailedFetchLock in project voldemort by voldemort.

the class StoreVersionManager method removeVersion.

private void removeVersion(long version, boolean alsoSyncRemoteState) {
    if (currentVersion == version) {
        // Should we throw instead?
        currentVersion = -1;
    }
    versionToEnabledMap.remove(version);
    if (alsoSyncRemoteState && config != null && config.getHighAvailabilityStateAutoCleanUp()) {
        FailedFetchLock failedFetchLock = null;
        try {
            failedFetchLock = FailedFetchLock.getLock(config, new Props());
            removeRemoteObsoleteState(failedFetchLock);
        } catch (Exception e) {
            logger.error("Failed to execute failedFetchLock.removeObsoleteStateForStore() for store " + storeName, e);
        } finally {
            IOUtils.closeQuietly(failedFetchLock);
        }
    }
}
Also used : FailedFetchLock(voldemort.store.readonly.swapper.FailedFetchLock) Props(voldemort.utils.Props) IOException(java.io.IOException) PersistenceFailureException(voldemort.store.PersistenceFailureException)

Aggregations

FailedFetchLock (voldemort.store.readonly.swapper.FailedFetchLock)3 Props (voldemort.utils.Props)3 IOException (java.io.IOException)2 VoldemortException (voldemort.VoldemortException)2 PersistenceFailureException (voldemort.store.PersistenceFailureException)2 StringReader (java.io.StringReader)1 UnknownHostException (java.net.UnknownHostException)1 Properties (java.util.Properties)1 VoldemortApplicationException (voldemort.VoldemortApplicationException)1 AdminClient (voldemort.client.protocol.admin.AdminClient)1 DisabledStoreException (voldemort.store.DisabledStoreException)1 NoSuchCapabilityException (voldemort.store.NoSuchCapabilityException)1 StorageEngine (voldemort.store.StorageEngine)1 StoreDefinition (voldemort.store.StoreDefinition)1 StoreNotFoundException (voldemort.store.StoreNotFoundException)1 StoreOperationFailureException (voldemort.store.StoreOperationFailureException)1 ConfigurationStorageEngine (voldemort.store.configuration.ConfigurationStorageEngine)1 ReadOnlyStorageEngine (voldemort.store.readonly.ReadOnlyStorageEngine)1 StoreVersionManager (voldemort.store.readonly.StoreVersionManager)1 ConfigurationException (voldemort.utils.ConfigurationException)1