use of org.apache.asterix.common.api.IClusterManagementWork.ClusterState in project asterixdb by apache.
the class GlobalRecoveryManager method startGlobalRecovery.
@Override
public void startGlobalRecovery(ICcApplicationContext appCtx) {
// perform global recovery if state changed to active
final ClusterState newState = ClusterStateManager.INSTANCE.getState();
boolean needToRecover = !newState.equals(state) && (newState == ClusterState.ACTIVE);
if (needToRecover) {
setState(newState);
ccServiceCtx.getControllerService().getExecutor().submit(() -> {
LOGGER.info("Starting Global Recovery");
MetadataTransactionContext mdTxnCtx = null;
try {
MetadataManager.INSTANCE.init();
// Loop over datasets
mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
for (Dataverse dataverse : MetadataManager.INSTANCE.getDataverses(mdTxnCtx)) {
mdTxnCtx = recoverDataset(appCtx, mdTxnCtx, dataverse);
}
MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
} catch (Exception e) {
// This needs to be fixed <-- Needs to shutdown the system -->
/*
* Note: Throwing this illegal state exception will terminate this thread
* and feeds listeners will not be notified.
*/
LOGGER.log(Level.SEVERE, "Global recovery was not completed successfully: ", e);
if (mdTxnCtx != null) {
try {
MetadataManager.INSTANCE.abortTransaction(mdTxnCtx);
} catch (Exception e1) {
LOGGER.log(Level.SEVERE, "Exception in aborting", e1);
e1.addSuppressed(e);
throw new IllegalStateException(e1);
}
}
}
ClusterStateManager.INSTANCE.setGlobalRecoveryCompleted(true);
LOGGER.info("Global Recovery Completed");
});
}
}
use of org.apache.asterix.common.api.IClusterManagementWork.ClusterState in project asterixdb by apache.
the class StartConfig method execCommand.
@Override
protected void execCommand() throws Exception {
InstallerDriver.initConfig(true);
String asterixInstanceName = ((StartConfig) config).name;
AsterixInstance instance = AsterixEventServiceUtil.validateAsterixInstanceExists(asterixInstanceName, State.INACTIVE);
AsterixEventServiceUtil.createAsterixZip(instance);
AsterixEventServiceClient client = AsterixEventService.getAsterixEventServiceClient(instance.getCluster());
Patterns asterixBinaryTransferPattern = PatternCreator.INSTANCE.getAsterixBinaryTransferPattern(asterixInstanceName, instance.getCluster());
client.submit(asterixBinaryTransferPattern);
// Start the watcher
ClusterStateWatcher stateWatcher = ServiceProvider.INSTANCE.getLookupService().startWatchingClusterState(asterixInstanceName);
AsterixEventServiceUtil.createClusterProperties(instance.getCluster(), instance.getAsterixConfiguration());
Patterns patterns = PatternCreator.INSTANCE.getStartAsterixPattern(asterixInstanceName, instance.getCluster(), false);
client.submit(patterns);
// Check the cluster state
ClusterState clusterState = stateWatcher.waitForClusterStart();
if (clusterState != ClusterState.ACTIVE) {
throw new Exception("CC failed to start");
}
AsterixEventServiceUtil.deleteDirectory(InstallerDriver.getManagixHome() + File.separator + InstallerDriver.ASTERIX_DIR + File.separator + asterixInstanceName);
AsterixRuntimeState runtimeState = VerificationUtil.getAsterixRuntimeState(instance);
VerificationUtil.updateInstanceWithRuntimeDescription(instance, runtimeState, true);
LOGGER.info(instance.getDescription(false));
ServiceProvider.INSTANCE.getLookupService().updateAsterixInstance(instance);
}
use of org.apache.asterix.common.api.IClusterManagementWork.ClusterState in project asterixdb by apache.
the class CreateConfig method execCommand.
@Override
protected void execCommand() throws Exception {
InstallerDriver.initConfig(true);
ValidateCommand validateCommand = new ValidateCommand();
boolean valid = validateCommand.validateCluster(((CreateConfig) config).clusterPath);
if (!valid) {
throw new Exception("Cannot create an Asterix instance.");
}
asterixInstanceName = ((CreateConfig) config).name;
AsterixEventServiceUtil.validateAsterixInstanceNotExists(asterixInstanceName);
CreateConfig createConfig = (CreateConfig) config;
cluster = EventUtil.getCluster(createConfig.clusterPath);
cluster.setInstanceName(asterixInstanceName);
asterixConfiguration = InstallerUtil.getAsterixConfiguration(createConfig.asterixConfPath);
AsterixInstance asterixInstance = AsterixEventServiceUtil.createAsterixInstance(asterixInstanceName, cluster, asterixConfiguration);
AsterixEventServiceUtil.evaluateConflictWithOtherInstances(asterixInstance);
AsterixEventServiceUtil.createAsterixZip(asterixInstance);
AsterixEventServiceUtil.createClusterProperties(cluster, asterixConfiguration);
AsterixEventServiceClient eventrixClient = AsterixEventService.getAsterixEventServiceClient(cluster, true, false);
// Store the cluster initially in Zookeeper and start watching
ServiceProvider.INSTANCE.getLookupService().writeAsterixInstance(asterixInstance);
ClusterStateWatcher stateWatcher = ServiceProvider.INSTANCE.getLookupService().startWatchingClusterState(asterixInstanceName);
Patterns asterixBinarytrasnferPattern = PatternCreator.INSTANCE.getAsterixBinaryTransferPattern(asterixInstanceName, cluster);
eventrixClient.submit(asterixBinarytrasnferPattern);
Patterns patterns = PatternCreator.INSTANCE.getStartAsterixPattern(asterixInstanceName, cluster, true);
eventrixClient.submit(patterns);
// Check the cluster state
ClusterState clusterState = stateWatcher.waitForClusterStart();
if (clusterState != ClusterState.ACTIVE) {
throw new Exception("CC failed to start");
}
AsterixRuntimeState runtimeState = VerificationUtil.getAsterixRuntimeState(asterixInstance);
VerificationUtil.updateInstanceWithRuntimeDescription(asterixInstance, runtimeState, true);
ServiceProvider.INSTANCE.getLookupService().updateAsterixInstance(asterixInstance);
AsterixEventServiceUtil.deleteDirectory(InstallerDriver.getManagixHome() + File.separator + InstallerDriver.ASTERIX_DIR + File.separator + asterixInstanceName);
LOGGER.info(asterixInstance.getDescription(false));
}
use of org.apache.asterix.common.api.IClusterManagementWork.ClusterState in project asterixdb by apache.
the class AutoFaultToleranceStrategy method validateClusterState.
private void validateClusterState() throws HyracksDataException {
clusterManager.refreshState();
ClusterState newState = clusterManager.getState();
// PENDING: all partitions are active but metadata node is not
if (newState == ClusterState.PENDING) {
requestMetadataNodeTakeover();
} else if (newState == ClusterState.ACTIVE) {
processPendingFailbackPlans();
}
}
use of org.apache.asterix.common.api.IClusterManagementWork.ClusterState in project asterixdb by apache.
the class AutoFaultToleranceStrategy method processPendingFailbackPlans.
private synchronized void processPendingFailbackPlans() {
ClusterState state = clusterManager.getState();
/*
* if the cluster state is not ACTIVE, then failbacks should not be processed
* since some partitions are not active
*/
if (state == ClusterState.ACTIVE) {
while (!pendingProcessingFailbackPlans.isEmpty()) {
//take the first pending failback plan
NodeFailbackPlan plan = pendingProcessingFailbackPlans.pop();
/*
* A plan at this stage will be in one of two states:
* 1. PREPARING -> the participants were selected but we haven't sent any request.
* 2. PENDING_ROLLBACK -> a participant failed before we send any requests
*/
if (plan.getState() == FailbackPlanState.PREPARING) {
//set the partitions that will be failed back as inactive
String failbackNode = plan.getNodeId();
for (Integer partitionId : plan.getPartitionsToFailback()) {
//partition expected to be returned to the failing back node
clusterManager.updateClusterPartition(partitionId, failbackNode, false);
}
/*
* if the returning node is the original metadata node,
* then metadata node will change after the failback completes
*/
ICcApplicationContext appCtx = (ICcApplicationContext) serviceCtx.getApplicationContext();
String originalMetadataNode = appCtx.getMetadataProperties().getMetadataNodeName();
if (originalMetadataNode.equals(failbackNode)) {
plan.setNodeToReleaseMetadataManager(currentMetadataNode);
currentMetadataNode = "";
metadataNodeActive = false;
clusterManager.updateMetadataNode(currentMetadataNode, metadataNodeActive);
}
//force new jobs to wait
clusterManager.setState(ClusterState.REBALANCING);
handleFailbackRequests(plan, messageBroker);
/*
* wait until the current plan is completed before processing the next plan.
* when the current one completes or is reverted, the cluster state will be
* ACTIVE again, and the next failback plan (if any) will be processed.
*/
break;
} else if (plan.getState() == FailbackPlanState.PENDING_ROLLBACK) {
//this plan failed before sending any requests -> nothing to rollback
planId2FailbackPlanMap.remove(plan.getPlanId());
}
}
}
}
Aggregations