Search in sources :

Example 11 with SiteState

use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method resumeStandby.

/**
 * Resume data replication for a paused standby site
 *
 * @param uuid site UUID
 * @brief Resume data replication for a paused standby site
 * @return updated standby site representation
 */
@POST
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN, Role.SYSTEM_ADMIN, Role.RESTRICTED_SYSTEM_ADMIN }, blockProxies = true)
@Path("/{uuid}/resume")
public SiteRestRep resumeStandby(@PathParam("uuid") String uuid) {
    log.info("Begin to resume data sync to standby site identified by uuid: {}", uuid);
    Site standby = validateSiteConfig(uuid);
    SiteState state = standby.getState();
    if (!state.equals(SiteState.STANDBY_PAUSED) && !state.equals(SiteState.ACTIVE_DEGRADED)) {
        log.error("site {} is in state {}, should be STANDBY_PAUSED or ACTIVE_DEGRADED", uuid, standby.getState());
        throw APIException.badRequests.operationOnlyAllowedOnPausedSite(standby.getName(), standby.getState().toString());
    }
    SiteNetworkState networkState = drUtil.getSiteNetworkState(uuid);
    if (networkState.getNetworkHealth() == NetworkHealth.BROKEN) {
        throw APIException.internalServerErrors.siteConnectionBroken(standby.getName(), "Network health state is broken.");
    }
    try (InternalSiteServiceClient client = createInternalSiteServiceClient(standby)) {
        commonPrecheck();
        client.setCoordinatorClient(coordinator);
        client.setKeyGenerator(apiSignatureGenerator);
        client.resumePrecheck();
    } catch (APIException e) {
        throw e;
    } catch (Exception e) {
        throw APIException.internalServerErrors.resumeStandbyPrecheckFailed(standby.getName(), e.getMessage());
    }
    // Do this before tx get started which might write key to zk.
    SecretKey secretKey = apiSignatureGenerator.getSignatureKey(SignatureKeyType.INTERVDC_API);
    InterProcessLock lock = drUtil.getDROperationLock();
    long vdcTargetVersion = DrUtil.newVdcConfigVersion();
    try {
        coordinator.startTransaction();
        for (Site site : drUtil.listStandbySites()) {
            if (site.getUuid().equals(uuid)) {
                log.error("Re-init the target standby", uuid);
                // init the to-be resumed standby site
                long dataRevision = vdcTargetVersion;
                List<Site> standbySites = drUtil.listStandbySites();
                SiteConfigParam configParam = prepareSiteConfigParam(standbySites, ipsecConfig.getPreSharedKey(), uuid, dataRevision, vdcTargetVersion, secretKey);
                try (InternalSiteServiceClient internalSiteServiceClient = new InternalSiteServiceClient()) {
                    internalSiteServiceClient.setCoordinatorClient(coordinator);
                    internalSiteServiceClient.setServer(site.getVipEndPoint());
                    internalSiteServiceClient.initStandby(configParam);
                }
                site.setState(SiteState.STANDBY_RESUMING);
                coordinator.persistServiceConfiguration(site.toConfiguration());
                drUtil.recordDrOperationStatus(site.getUuid(), InterState.RESUMING_STANDBY);
                drUtil.updateVdcTargetVersion(uuid, SiteInfo.DR_OP_CHANGE_DATA_REVISION, vdcTargetVersion, dataRevision);
            } else {
                drUtil.updateVdcTargetVersion(site.getUuid(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
            }
        }
        // update the local(active) site last
        drUtil.updateVdcTargetVersion(coordinator.getSiteId(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
        coordinator.commitTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_BEGIN, standby.toBriefString());
        return siteMapper.map(standby);
    } catch (Exception e) {
        log.error("Error resuming site {}", uuid, e);
        coordinator.discardTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_FAILURE, null, standby.toBriefString());
        InternalServerErrorException resumeStandbyFailedException = APIException.internalServerErrors.resumeStandbyFailed(standby.getName(), e.getMessage());
        throw resumeStandbyFailedException;
    } finally {
        try {
            lock.release();
        } catch (Exception ignore) {
            log.error(String.format("Lock release failed when resuming standby site: %s", uuid));
        }
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SecretKey(javax.crypto.SecretKey) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) SiteState(com.emc.storageos.coordinator.client.model.SiteState) InternalSiteServiceClient(com.emc.storageos.api.service.impl.resource.utils.InternalSiteServiceClient) SiteNetworkState(com.emc.storageos.coordinator.client.model.SiteNetworkState) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) SiteConfigParam(com.emc.storageos.model.dr.SiteConfigParam) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces) CheckPermission(com.emc.storageos.security.authorization.CheckPermission)

Example 12 with SiteState

use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.

the class DrPostFailoverHandler method run.

/**
 * Run the handler. The handler runs only on single node. If it fails, current service should quit and another node takes over to retry
 */
public void run() {
    try {
        SiteState siteState = drUtil.getLocalSite().getState();
        if (!siteState.equals(SiteState.STANDBY_FAILING_OVER)) {
            log.info("Ignore DR post failover handler for site state {}", siteState);
            return;
        }
        log.info("Acquiring lock {}", POST_FAILOVER_HANDLER_LOCK);
        InterProcessLock lock = coordinator.getLock(POST_FAILOVER_HANDLER_LOCK);
        lock.acquire();
        log.info("Acquired lock {}", POST_FAILOVER_HANDLER_LOCK);
        try {
            // check site state again after acquiring lock
            Site site = drUtil.getLocalSite();
            siteState = site.getState();
            if (!siteState.equals(SiteState.STANDBY_FAILING_OVER)) {
                log.info("Ignore DR post failover handler for site state {}", siteState);
                return;
            }
            boolean isExecuted = isCompleted();
            if (!isExecuted) {
                log.info("Start post failover processing {}", name);
                updateStatus(Status.EXECUTING);
                execute();
                updateStatus(Status.COMPLETED);
            } else {
                log.info("Handler {} was completed on other node", name);
            }
            if (isAllHandlersCompleted()) {
                log.info("All handlers successfully completed. Change site state to ACTIVE");
                site.setState(SiteState.ACTIVE);
                coordinator.persistServiceConfiguration(site.toConfiguration());
            }
        } finally {
            lock.release();
            log.info("Released lock {}", POST_FAILOVER_HANDLER_LOCK);
        }
    } catch (Exception e) {
        log.error("Failed to execute DR failover handler", e);
        throw new IllegalStateException(e);
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteState(com.emc.storageos.coordinator.client.model.SiteState) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock)

Example 13 with SiteState

use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.

the class CoordinatorClientImpl method getControlNodesState.

/**
 * Get all control nodes' state
 *
 * @param targetGiven
 *            target repository
 * @param infos
 *            control nodes' repository
 * @param targetPropertiesGiven
 *            target property
 * @param configVersions
 *            control nodes' configVersions
 * @param targetPowerOffState
 *            target poweroff state
 * @param targetDrivers
 *            target driver list
 * @param drivers
 *            control nodes' driver lists
 * @param siteId
 * @return Control nodes' state
 */
private ClusterInfo.ClusterState getControlNodesState(final RepositoryInfo targetGiven, final Map<Service, RepositoryInfo> infos, final PropertyInfoRestRep targetPropertiesGiven, final Map<Service, ConfigVersion> configVersions, final Map<Service, VdcConfigVersion> vdcConfigVersions, final PowerOffState targetPowerOffState, final StorageDriversInfo targetDrivers, final Map<Service, StorageDriversInfo> drivers, String siteId) {
    if (targetGiven == null || targetPropertiesGiven == null || targetPowerOffState == null) {
        // only for first time target initializing
        return ClusterInfo.ClusterState.INITIALIZING;
    }
    DrUtil drUtil = new DrUtil(this);
    Site site = drUtil.getSiteFromLocalVdc(siteId);
    SiteState siteState = site.getState();
    int siteNodeCount = site.getNodeCount();
    if (infos == null || infos.size() != siteNodeCount || configVersions == null || configVersions.size() != siteNodeCount) {
        return ClusterInfo.ClusterState.DEGRADED;
    }
    if (siteState == SiteState.STANDBY_ERROR) {
        log.info("Control nodes' state DEGRADED since DR site state is STANDBY_ERROR");
        return ClusterInfo.ClusterState.DEGRADED;
    }
    // 1st. Find nodes which currents and versions are different from target's
    List<String> differentCurrents = getDifferentCurrentsCommon(targetGiven, infos);
    List<String> differentVersions = getDifferentVersionsCommon(targetGiven, infos);
    // 2nd. Find nodes which configVersions are different from target's
    // Note : we use config version to judge if properties on a node are sync-ed with target's.
    List<String> differentConfigVersions = getDifferentConfigVersionCommon(targetPropertiesGiven, configVersions);
    List<String> differentVdcConfigVersions = getDifferentVdcConfigVersionCommon(vdcConfigVersions);
    if (targetPowerOffState.getPowerOffState() != PowerOffState.State.NONE) {
        log.info("Control nodes' state POWERINGOFF");
        return ClusterInfo.ClusterState.POWERINGOFF;
    } else if (!differentConfigVersions.isEmpty()) {
        log.info("Control nodes' state UPDATING: {}", Strings.repr(targetPropertiesGiven));
        return ClusterInfo.ClusterState.UPDATING;
    } else if (!differentVdcConfigVersions.isEmpty()) {
        log.info("Control nodes' state UPDATING vdc config version: {}", Strings.repr(differentVdcConfigVersions));
        return ClusterInfo.ClusterState.UPDATING;
    } else if (siteState.isDROperationOngoing()) {
        log.info("Control nodes' state UPDATING since DR operation ongoing: {}", siteState);
        return ClusterInfo.ClusterState.UPDATING;
    } else if (!isControlNodesDriversSynced(targetDrivers, drivers)) {
        log.info("Control nodes' state UPDATING since not all nodes' drivers are synced with target");
        return ClusterInfo.ClusterState.UPDATING;
    } else if (differentCurrents.isEmpty() && differentVersions.isEmpty()) {
        // check for the extra upgrading states
        if (isDbSchemaVersionChanged()) {
            MigrationStatus status = getMigrationStatus();
            if (status == null) {
                log.info("Control nodes state is UPGRADING_PREP_DB ");
                return ClusterInfo.ClusterState.UPGRADING_PREP_DB;
            }
            log.info("Control nodes state is {}", status);
            switch(status) {
                case RUNNING:
                    return ClusterInfo.ClusterState.UPGRADING_CONVERT_DB;
                case FAILED:
                    return ClusterInfo.ClusterState.UPGRADING_FAILED;
                case DONE:
                    break;
                default:
                    log.error("The current db schema version doesn't match the target db schema version, " + "but the current migration status is {} ", status);
            }
        }
        log.info("Control nodes' state STABLE");
        return ClusterInfo.ClusterState.STABLE;
    } else if (differentCurrents.isEmpty()) {
        log.info("Control nodes' state SYNCING: {}", Strings.repr(differentVersions));
        return ClusterInfo.ClusterState.SYNCING;
    } else if (differentVersions.isEmpty()) {
        log.info("Control nodes' state UPGRADING: {}", Strings.repr(differentCurrents));
        return ClusterInfo.ClusterState.UPGRADING;
    } else {
        log.error("Control nodes' in an UNKNOWN state. Target given: {} {}", targetGiven, Strings.repr(infos));
        return ClusterInfo.ClusterState.UNKNOWN;
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteState(com.emc.storageos.coordinator.client.model.SiteState) DrUtil(com.emc.storageos.coordinator.client.service.DrUtil) PropertyInfoMapper.decodeFromString(com.emc.storageos.coordinator.mapper.PropertyInfoMapper.decodeFromString) MigrationStatus(com.emc.storageos.coordinator.client.model.MigrationStatus)

Aggregations

SiteState (com.emc.storageos.coordinator.client.model.SiteState)13 Site (com.emc.storageos.coordinator.client.model.Site)9 CheckPermission (com.emc.storageos.security.authorization.CheckPermission)3 APIException (com.emc.storageos.svcs.errorhandling.resources.APIException)3 Path (javax.ws.rs.Path)3 Produces (javax.ws.rs.Produces)3 InterProcessLock (org.apache.curator.framework.recipes.locks.InterProcessLock)3 SiteNetworkState (com.emc.storageos.coordinator.client.model.SiteNetworkState)2 NetworkHealth (com.emc.storageos.coordinator.client.model.SiteNetworkState.NetworkHealth)2 DrUtil (com.emc.storageos.coordinator.client.service.DrUtil)2 ZkPath (com.emc.storageos.coordinator.common.impl.ZkPath)2 CoordinatorException (com.emc.storageos.coordinator.exceptions.CoordinatorException)2 RetryableCoordinatorException (com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException)2 InternalServerErrorException (com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException)2 UnknownHostException (java.net.UnknownHostException)2 Consumes (javax.ws.rs.Consumes)2 POST (javax.ws.rs.POST)2 InternalSiteServiceClient (com.emc.storageos.api.service.impl.resource.utils.InternalSiteServiceClient)1 MigrationStatus (com.emc.storageos.coordinator.client.model.MigrationStatus)1 SiteError (com.emc.storageos.coordinator.client.model.SiteError)1