use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.
the class VdcOpHandler method populateStandbySiteErrorIfNecessary.
protected void populateStandbySiteErrorIfNecessary(Site site, InternalServerErrorException e) {
SiteState operation = site.getState();
SiteError error = new SiteError(e, operation.name());
log.info("set site {} state to STANDBY_ERROR, set lastState to {}", site.getName(), site.getState());
coordinator.getCoordinatorClient().setTargetInfo(site.getUuid(), error);
site.setLastState(site.getState());
site.setState(SiteState.STANDBY_ERROR);
coordinator.getCoordinatorClient().persistServiceConfiguration(site.toConfiguration());
}
use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.
the class UpgradeManager method innerRun.
@Override
protected void innerRun() {
// need to distinguish persistent locks acquired from UpgradeManager/VdcManager/PropertyManager
// otherwise they might release locks acquired by others when they start
final String svcId = String.format("%s,upgrade", coordinator.getMySvcId());
isValidRepo = localRepository.isValidRepository();
addRepositoryInfoListener();
while (doRun) {
log.debug("Main loop: Start");
shortSleep = false;
// Step1: check if we have the reboot lock
boolean hasLock;
try {
hasLock = hasUpgradeLock(svcId);
} catch (Exception e) {
log.info("Step1: Failed to verify if the current node has the reboot lock ", e);
retrySleep();
continue;
}
if (hasLock) {
try {
releaseUpgradeLock(svcId);
log.info("Step1: Released reboot lock for node: {}", svcId);
wakeupOtherNodes();
} catch (Exception e) {
log.info("Step1: Failed to release the reboot lock and will retry: {}", e.getMessage());
retrySleep();
continue;
}
}
// Step2: publish current state, and set target if empty
try {
initializeLocalAndTargetInfo(svcId);
} catch (Exception e) {
log.info("Step2b failed and will be retried: {}", e.getMessage());
retrySleep();
continue;
}
// Step3: syncing repository
final SyncInfo syncinfo = getSyncInfoCommon(localInfo, targetInfo);
if (!syncinfo.isEmpty()) {
// Step3: nodeInSync discovery
String controlNodeInSync = null;
try {
controlNodeInSync = getAControlNodeInSync(targetInfo);
log.info("Step3: Control node in syc: {}", controlNodeInSync);
} catch (Exception e) {
log.info("Step3 failed and will be retried: {}", e.getMessage());
retrySleep();
continue;
}
// check and update images
boolean waitSyncingFinish = syncNodes(syncinfo, controlNodeInSync, svcId);
if (waitSyncingFinish) {
retrySleep();
continue;
} else {
// For restored cluster or redeployed node, the image files don't exist.it will need to download
// the upgrade image from the remote repository. If the node can't connenct with the repository,
// or the image doesn't exist in it, syssvc would keep throwing exceptions and restart.
// So here break the syncing and it will retry in next check loop(loopInterval=10mins).
log.info("Step3: Give up syncing upgrade image, and will retry in next check loop");
}
}
// Step4: if target version is changed, update
log.info("Step4: If target version is changed, update");
final SoftwareVersion currentVersion = localInfo.getCurrentVersion();
final SoftwareVersion targetVersion = targetInfo.getCurrentVersion();
if (currentVersion != null && targetVersion != null && !currentVersion.equals(targetVersion)) {
log.info("Step4: Current version: {} != target version: {}. Switch version.", currentVersion, targetVersion);
// for standby site, check if the active site is stable and the local site is STANDBY_SYNCED
if (drUtil.isStandby()) {
if (!coordinator.isActiveSiteHealthy()) {
log.info("current site is standby and active site is not stable, sleep 1m and try again");
sleep(STANDBY_UPGRADE_RETRY_INTERVAL);
continue;
}
SiteState localSiteState = drUtil.getLocalSite().getState();
if (!localSiteState.equals(SiteState.STANDBY_SYNCED) && !localSiteState.equals(SiteState.STANDBY_INCR_SYNCING)) {
log.info("current site is standby and is in state {}, sleep 1m and try again", localSiteState);
sleep(STANDBY_UPGRADE_RETRY_INTERVAL);
continue;
}
}
try {
if (!getUpgradeLock(svcId)) {
retrySleep();
continue;
}
if (!isQuorumMaintained()) {
releaseUpgradeLock(svcId);
retrySleep();
continue;
}
updateCurrentVersion(targetVersion);
} catch (Exception e) {
log.info("Step4: Upgrade failed and will be retried: {}", e.getMessage());
// Restart the loop immediately so that we release the reboot lock.
continue;
}
}
// Step6: sleep
log.info("Step6: sleep");
longSleep();
}
}
use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.
the class DrSiteNetworkMonitor method shouldStartOnCurrentSite.
/**
* Whether we should bring up network monitor. Only active site(or degraded), or paused standby site need run network monitor
*
* @return true if we should start it
*/
private boolean shouldStartOnCurrentSite() {
if (drUtil.isActiveSite()) {
return true;
}
Site localSite = drUtil.getLocalSite();
SiteState state = localSite.getState();
if (state == SiteState.STANDBY_PAUSED || state == SiteState.ACTIVE_DEGRADED) {
return true;
}
_log.debug("This site is not active site or standby paused, no need to do network monitor");
return false;
}
use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.
the class DrZkHealthMonitor method checkAndUpdateLocalSiteState.
/**
* Update the standby site state when the active site is lost.
* if SYNCED, change it to PAUSED.
* if SYNCING/RESUMING/ADDING, change it to ERROR since it will never finish without the active site.
*/
private void checkAndUpdateLocalSiteState() {
Site localSite = drUtil.getLocalSite();
SiteState state = localSite.getState();
if (SiteState.STANDBY_SYNCED.equals(state) || SiteState.STANDBY_INCR_SYNCING.equals(state)) {
log.info("Updating local site from {} to STANDBY_PAUSED since active is unreachable", state);
localSite.setState(SiteState.STANDBY_PAUSED);
coordinatorExt.getCoordinatorClient().persistServiceConfiguration(localSite.toConfiguration());
coordinatorExt.rescheduleDrSiteNetworkMonitor();
} else if (SiteState.STANDBY_SYNCING.equals(state) || SiteState.STANDBY_RESUMING.equals(state) || SiteState.STANDBY_ADDING.equals(state)) {
log.info("Updating local site from {} to STANDBY_ERROR since active is unreachable", localSite.getState());
localSite.setLastState(state);
localSite.setState(SiteState.STANDBY_ERROR);
coordinatorExt.getCoordinatorClient().persistServiceConfiguration(localSite.toConfiguration());
}
}
use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.
the class StorageDriverService method precheckForEnv.
protected void precheckForEnv() {
DrUtil drUtil = new DrUtil(coordinator);
if (!drUtil.isActiveSite()) {
throw APIException.internalServerErrors.driverOperationEnvPrecheckFailed("This operation is not allowed on standby site");
}
for (Site site : drUtil.listSites()) {
SiteState siteState = site.getState();
if (!siteState.equals(SiteState.ACTIVE) && !siteState.equals(SiteState.STANDBY_SYNCED)) {
throw APIException.internalServerErrors.driverOperationEnvPrecheckFailed(String.format("Site %s is in %s state,not active or synced", site.getName(), siteState));
}
ClusterInfo.ClusterState state = coordinator.getControlNodesState(site.getUuid());
if (state != ClusterInfo.ClusterState.STABLE) {
throw APIException.internalServerErrors.driverOperationEnvPrecheckFailed(String.format("Currently site %s is not stable", site.getName()));
}
}
// driver operations and order executions to avoid impact on each other.
if (hasOngoingQueuedOrders()) {
throw APIException.internalServerErrors.driverOperationEnvPrecheckFailed("There are ongoing or queued orders now, please wait until these orders complete");
}
}
Aggregations