Search in sources :

Example 6 with SiteState

use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.

the class VdcOpHandler method populateStandbySiteErrorIfNecessary.

protected void populateStandbySiteErrorIfNecessary(Site site, InternalServerErrorException e) {
    SiteState operation = site.getState();
    SiteError error = new SiteError(e, operation.name());
    log.info("set site {} state to STANDBY_ERROR, set lastState to {}", site.getName(), site.getState());
    coordinator.getCoordinatorClient().setTargetInfo(site.getUuid(), error);
    site.setLastState(site.getState());
    site.setState(SiteState.STANDBY_ERROR);
    coordinator.getCoordinatorClient().persistServiceConfiguration(site.toConfiguration());
}
Also used : SiteState(com.emc.storageos.coordinator.client.model.SiteState) SiteError(com.emc.storageos.coordinator.client.model.SiteError)

Example 7 with SiteState

use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.

the class UpgradeManager method innerRun.

@Override
protected void innerRun() {
    // need to distinguish persistent locks acquired from UpgradeManager/VdcManager/PropertyManager
    // otherwise they might release locks acquired by others when they start
    final String svcId = String.format("%s,upgrade", coordinator.getMySvcId());
    isValidRepo = localRepository.isValidRepository();
    addRepositoryInfoListener();
    while (doRun) {
        log.debug("Main loop: Start");
        shortSleep = false;
        // Step1: check if we have the reboot lock
        boolean hasLock;
        try {
            hasLock = hasUpgradeLock(svcId);
        } catch (Exception e) {
            log.info("Step1: Failed to verify if the current node has the reboot lock ", e);
            retrySleep();
            continue;
        }
        if (hasLock) {
            try {
                releaseUpgradeLock(svcId);
                log.info("Step1: Released reboot lock for node: {}", svcId);
                wakeupOtherNodes();
            } catch (Exception e) {
                log.info("Step1: Failed to release the reboot lock and will retry: {}", e.getMessage());
                retrySleep();
                continue;
            }
        }
        // Step2: publish current state, and set target if empty
        try {
            initializeLocalAndTargetInfo(svcId);
        } catch (Exception e) {
            log.info("Step2b failed and will be retried: {}", e.getMessage());
            retrySleep();
            continue;
        }
        // Step3: syncing repository
        final SyncInfo syncinfo = getSyncInfoCommon(localInfo, targetInfo);
        if (!syncinfo.isEmpty()) {
            // Step3: nodeInSync discovery
            String controlNodeInSync = null;
            try {
                controlNodeInSync = getAControlNodeInSync(targetInfo);
                log.info("Step3: Control node in syc: {}", controlNodeInSync);
            } catch (Exception e) {
                log.info("Step3 failed and will be retried: {}", e.getMessage());
                retrySleep();
                continue;
            }
            // check and update images
            boolean waitSyncingFinish = syncNodes(syncinfo, controlNodeInSync, svcId);
            if (waitSyncingFinish) {
                retrySleep();
                continue;
            } else {
                // For restored cluster or redeployed node, the image files don't exist.it will need to download
                // the upgrade image from the remote repository. If the node can't connenct with the repository,
                // or the image doesn't exist in it, syssvc would keep throwing exceptions and restart.
                // So here break the syncing and it will retry in next check loop(loopInterval=10mins).
                log.info("Step3: Give up syncing upgrade image, and will retry in next check loop");
            }
        }
        // Step4: if target version is changed, update
        log.info("Step4: If target version is changed, update");
        final SoftwareVersion currentVersion = localInfo.getCurrentVersion();
        final SoftwareVersion targetVersion = targetInfo.getCurrentVersion();
        if (currentVersion != null && targetVersion != null && !currentVersion.equals(targetVersion)) {
            log.info("Step4: Current version: {} != target version: {}. Switch version.", currentVersion, targetVersion);
            // for standby site, check if the active site is stable and the local site is STANDBY_SYNCED
            if (drUtil.isStandby()) {
                if (!coordinator.isActiveSiteHealthy()) {
                    log.info("current site is standby and active site is not stable, sleep 1m and try again");
                    sleep(STANDBY_UPGRADE_RETRY_INTERVAL);
                    continue;
                }
                SiteState localSiteState = drUtil.getLocalSite().getState();
                if (!localSiteState.equals(SiteState.STANDBY_SYNCED) && !localSiteState.equals(SiteState.STANDBY_INCR_SYNCING)) {
                    log.info("current site is standby and is in state {}, sleep 1m and try again", localSiteState);
                    sleep(STANDBY_UPGRADE_RETRY_INTERVAL);
                    continue;
                }
            }
            try {
                if (!getUpgradeLock(svcId)) {
                    retrySleep();
                    continue;
                }
                if (!isQuorumMaintained()) {
                    releaseUpgradeLock(svcId);
                    retrySleep();
                    continue;
                }
                updateCurrentVersion(targetVersion);
            } catch (Exception e) {
                log.info("Step4: Upgrade failed and will be retried: {}", e.getMessage());
                // Restart the loop immediately so that we release the reboot lock.
                continue;
            }
        }
        // Step6: sleep
        log.info("Step6: sleep");
        longSleep();
    }
}
Also used : SoftwareVersion(com.emc.storageos.coordinator.client.model.SoftwareVersion) SiteState(com.emc.storageos.coordinator.client.model.SiteState) URISyntaxException(java.net.URISyntaxException) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException)

Example 8 with SiteState

use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.

the class DrSiteNetworkMonitor method shouldStartOnCurrentSite.

/**
 * Whether we should bring up network monitor. Only active site(or degraded), or paused standby site need run network monitor
 *
 * @return true if we should start it
 */
private boolean shouldStartOnCurrentSite() {
    if (drUtil.isActiveSite()) {
        return true;
    }
    Site localSite = drUtil.getLocalSite();
    SiteState state = localSite.getState();
    if (state == SiteState.STANDBY_PAUSED || state == SiteState.ACTIVE_DEGRADED) {
        return true;
    }
    _log.debug("This site is not active site or standby paused, no need to do network monitor");
    return false;
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteState(com.emc.storageos.coordinator.client.model.SiteState)

Example 9 with SiteState

use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.

the class DrZkHealthMonitor method checkAndUpdateLocalSiteState.

/**
 * Update the standby site state when the active site is lost.
 * if SYNCED, change it to PAUSED.
 * if SYNCING/RESUMING/ADDING, change it to ERROR since it will never finish without the active site.
 */
private void checkAndUpdateLocalSiteState() {
    Site localSite = drUtil.getLocalSite();
    SiteState state = localSite.getState();
    if (SiteState.STANDBY_SYNCED.equals(state) || SiteState.STANDBY_INCR_SYNCING.equals(state)) {
        log.info("Updating local site from {} to STANDBY_PAUSED since active is unreachable", state);
        localSite.setState(SiteState.STANDBY_PAUSED);
        coordinatorExt.getCoordinatorClient().persistServiceConfiguration(localSite.toConfiguration());
        coordinatorExt.rescheduleDrSiteNetworkMonitor();
    } else if (SiteState.STANDBY_SYNCING.equals(state) || SiteState.STANDBY_RESUMING.equals(state) || SiteState.STANDBY_ADDING.equals(state)) {
        log.info("Updating local site from {} to STANDBY_ERROR since active is unreachable", localSite.getState());
        localSite.setLastState(state);
        localSite.setState(SiteState.STANDBY_ERROR);
        coordinatorExt.getCoordinatorClient().persistServiceConfiguration(localSite.toConfiguration());
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteState(com.emc.storageos.coordinator.client.model.SiteState)

Example 10 with SiteState

use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.

the class StorageDriverService method precheckForEnv.

protected void precheckForEnv() {
    DrUtil drUtil = new DrUtil(coordinator);
    if (!drUtil.isActiveSite()) {
        throw APIException.internalServerErrors.driverOperationEnvPrecheckFailed("This operation is not allowed on standby site");
    }
    for (Site site : drUtil.listSites()) {
        SiteState siteState = site.getState();
        if (!siteState.equals(SiteState.ACTIVE) && !siteState.equals(SiteState.STANDBY_SYNCED)) {
            throw APIException.internalServerErrors.driverOperationEnvPrecheckFailed(String.format("Site %s is in %s state,not active or synced", site.getName(), siteState));
        }
        ClusterInfo.ClusterState state = coordinator.getControlNodesState(site.getUuid());
        if (state != ClusterInfo.ClusterState.STABLE) {
            throw APIException.internalServerErrors.driverOperationEnvPrecheckFailed(String.format("Currently site %s is not stable", site.getName()));
        }
    }
    // driver operations and order executions to avoid impact on each other.
    if (hasOngoingQueuedOrders()) {
        throw APIException.internalServerErrors.driverOperationEnvPrecheckFailed("There are ongoing or queued orders now, please wait until these orders complete");
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) ClusterInfo(com.emc.vipr.model.sys.ClusterInfo) SiteState(com.emc.storageos.coordinator.client.model.SiteState) DrUtil(com.emc.storageos.coordinator.client.service.DrUtil)

Aggregations

SiteState (com.emc.storageos.coordinator.client.model.SiteState)13 Site (com.emc.storageos.coordinator.client.model.Site)9 CheckPermission (com.emc.storageos.security.authorization.CheckPermission)3 APIException (com.emc.storageos.svcs.errorhandling.resources.APIException)3 Path (javax.ws.rs.Path)3 Produces (javax.ws.rs.Produces)3 InterProcessLock (org.apache.curator.framework.recipes.locks.InterProcessLock)3 SiteNetworkState (com.emc.storageos.coordinator.client.model.SiteNetworkState)2 NetworkHealth (com.emc.storageos.coordinator.client.model.SiteNetworkState.NetworkHealth)2 DrUtil (com.emc.storageos.coordinator.client.service.DrUtil)2 ZkPath (com.emc.storageos.coordinator.common.impl.ZkPath)2 CoordinatorException (com.emc.storageos.coordinator.exceptions.CoordinatorException)2 RetryableCoordinatorException (com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException)2 InternalServerErrorException (com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException)2 UnknownHostException (java.net.UnknownHostException)2 Consumes (javax.ws.rs.Consumes)2 POST (javax.ws.rs.POST)2 InternalSiteServiceClient (com.emc.storageos.api.service.impl.resource.utils.InternalSiteServiceClient)1 MigrationStatus (com.emc.storageos.coordinator.client.model.MigrationStatus)1 SiteError (com.emc.storageos.coordinator.client.model.SiteError)1