Search in sources :

Example 61 with Site

use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method isLocalSiteRemoved.

/**
 *  Check if Local Site Removed
 *
 * @brief Check if local site is removed
 * @return result that indicates whether local site is removed
 */
@GET
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@Path("/islocalsiteremoved")
public SiteRemoved isLocalSiteRemoved() {
    SiteRemoved response = new SiteRemoved();
    Site localSite = drUtil.getLocalSite();
    if (SiteState.ACTIVE == localSite.getState()) {
        return response;
    }
    for (Site remoteSite : drUtil.listSites()) {
        if (remoteSite.getUuid().equals(localSite.getUuid())) {
            continue;
        }
        try (InternalSiteServiceClient client = new InternalSiteServiceClient(remoteSite, coordinator, apiSignatureGenerator)) {
            SiteList sites = client.getSiteList();
            if (!isActiveSite(remoteSite.getUuid(), sites)) {
                continue;
            }
            if (isSiteContainedBy(localSite.getUuid(), sites)) {
                return response;
            } else {
                log.info("According returned result from current active site {}, local site {} has been removed", remoteSite.getUuid(), localSite.getUuid());
                response.setIsRemoved(true);
                return response;
            }
        } catch (Exception e) {
            log.warn("Error happened when fetching site list from site {}", remoteSite.getUuid(), e);
            continue;
        }
    }
    return response;
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteList(com.emc.storageos.model.dr.SiteList) SiteRemoved(com.emc.storageos.model.dr.SiteRemoved) InternalSiteServiceClient(com.emc.storageos.api.service.impl.resource.utils.InternalSiteServiceClient) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 62 with Site

use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method commonPrecheck.

/**
 * Common precheck logic for DR operations.
 *
 * @param excludedSiteIds, site ids to exclude from the cluster state precheck
 */
private void commonPrecheck(List<String> excludedSiteIds) {
    if (drUtil.isStandby()) {
        throw APIException.badRequests.operationOnlyAllowedOnActiveSite();
    }
    if (!isClusterStable()) {
        throw APIException.serviceUnavailable.clusterStateNotStable();
    }
    for (Site site : drUtil.listStandbySites()) {
        if (excludedSiteIds.contains(site.getUuid())) {
            continue;
        }
        // don't check node state for paused sites.
        if (site.getState().equals(SiteState.STANDBY_PAUSED) || site.getState().equals(SiteState.ACTIVE_DEGRADED)) {
            continue;
        }
        int nodeCount = site.getNodeCount();
        ClusterInfo.ClusterState state = coordinator.getControlNodesState(site.getUuid());
        // state could be null
        if (!ClusterInfo.ClusterState.STABLE.equals(state)) {
            log.error("Site {} is not stable {}", site.getUuid(), Objects.toString(state));
            throw APIException.serviceUnavailable.siteClusterStateNotStable(site.getName(), Objects.toString(state));
        }
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) ClusterInfo(com.emc.vipr.model.sys.ClusterInfo)

Example 63 with Site

use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method precheckForFailoverLocally.

/*
     * Internal method to check whether failover to standby is allowed
     */
private void precheckForFailoverLocally(String standbyUuid) {
    Site standby = drUtil.getLocalSite();
    // API should be only send to local site
    if (!standby.getUuid().equals(standbyUuid)) {
        throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), String.format("Failover can only be executed in local site. Local site uuid %s is not matched with uuid %s", standby.getUuid(), standbyUuid));
    }
    String uuid = drUtil.getActiveSite().getUuid();
    if (!StringUtils.isEmpty(uuid)) {
        SiteNetworkState networkState = drUtil.getSiteNetworkState(uuid);
        if (networkState.getNetworkHealth() != NetworkHealth.BROKEN) {
            throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), "Active site is still available");
        }
    }
    // Don't allow failover to site of ACTIVE_DEGRADED state in X-wing
    if (standby.getState() != SiteState.STANDBY_PAUSED) {
        throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), "Please wait for this site to recognize the Active site is down and automatically switch to a Paused state before failing over.");
    }
    precheckForFailover();
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteNetworkState(com.emc.storageos.coordinator.client.model.SiteNetworkState)

Example 64 with Site

use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method failover.

/**
 * This is internal API to do failover
 *
 * @return return response with error message and service code
 */
@POST
@Path("/internal/failover")
@Consumes({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
public Response failover(@QueryParam("newActiveSiteUUid") String newActiveSiteUUID, @QueryParam("oldActiveSiteUUid") String oldActiveSiteUUID, @QueryParam("vdcVersion") String vdcTargetVersion) {
    log.info("Begin to failover internally with newActiveSiteUUid {}, oldActiveSiteUUid {}", newActiveSiteUUID, oldActiveSiteUUID);
    Site currentSite = drUtil.getLocalSite();
    String uuid = currentSite.getUuid();
    try {
        // set state
        Site oldActiveSite = new Site();
        if (StringUtils.isEmpty(oldActiveSiteUUID)) {
            log.info("Cant't find active site id, go on to do failover");
        } else {
            oldActiveSite = drUtil.getSiteFromLocalVdc(oldActiveSiteUUID);
            drUtil.removeSite(oldActiveSite);
        }
        Site newActiveSite = drUtil.getSiteFromLocalVdc(newActiveSiteUUID);
        newActiveSite.setState(SiteState.STANDBY_FAILING_OVER);
        coordinator.persistServiceConfiguration(newActiveSite.toConfiguration());
        drUtil.updateVdcTargetVersion(currentSite.getUuid(), SiteInfo.DR_OP_FAILOVER, Long.parseLong(vdcTargetVersion), oldActiveSite.getUuid(), currentSite.getUuid());
        auditDisasterRecoveryOps(OperationTypeEnum.FAILOVER, AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_BEGIN, oldActiveSite.toBriefString(), newActiveSite.toBriefString());
        return Response.status(Response.Status.ACCEPTED).build();
    } catch (Exception e) {
        log.error("Error happened when failover at site %s", uuid, e);
        auditDisasterRecoveryOps(OperationTypeEnum.FAILOVER, AuditLogManager.AUDITLOG_FAILURE, null, uuid, currentSite.getVipEndPoint(), currentSite.getName());
        throw APIException.internalServerErrors.failoverFailed(currentSite.getName(), e.getMessage());
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) POST(javax.ws.rs.POST) Consumes(javax.ws.rs.Consumes) Produces(javax.ws.rs.Produces)

Example 65 with Site

use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method resumeStandby.

/**
 * Resume data replication for a paused standby site
 *
 * @param uuid site UUID
 * @brief Resume data replication for a paused standby site
 * @return updated standby site representation
 */
@POST
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN, Role.SYSTEM_ADMIN, Role.RESTRICTED_SYSTEM_ADMIN }, blockProxies = true)
@Path("/{uuid}/resume")
public SiteRestRep resumeStandby(@PathParam("uuid") String uuid) {
    log.info("Begin to resume data sync to standby site identified by uuid: {}", uuid);
    Site standby = validateSiteConfig(uuid);
    SiteState state = standby.getState();
    if (!state.equals(SiteState.STANDBY_PAUSED) && !state.equals(SiteState.ACTIVE_DEGRADED)) {
        log.error("site {} is in state {}, should be STANDBY_PAUSED or ACTIVE_DEGRADED", uuid, standby.getState());
        throw APIException.badRequests.operationOnlyAllowedOnPausedSite(standby.getName(), standby.getState().toString());
    }
    SiteNetworkState networkState = drUtil.getSiteNetworkState(uuid);
    if (networkState.getNetworkHealth() == NetworkHealth.BROKEN) {
        throw APIException.internalServerErrors.siteConnectionBroken(standby.getName(), "Network health state is broken.");
    }
    try (InternalSiteServiceClient client = createInternalSiteServiceClient(standby)) {
        commonPrecheck();
        client.setCoordinatorClient(coordinator);
        client.setKeyGenerator(apiSignatureGenerator);
        client.resumePrecheck();
    } catch (APIException e) {
        throw e;
    } catch (Exception e) {
        throw APIException.internalServerErrors.resumeStandbyPrecheckFailed(standby.getName(), e.getMessage());
    }
    // Do this before tx get started which might write key to zk.
    SecretKey secretKey = apiSignatureGenerator.getSignatureKey(SignatureKeyType.INTERVDC_API);
    InterProcessLock lock = drUtil.getDROperationLock();
    long vdcTargetVersion = DrUtil.newVdcConfigVersion();
    try {
        coordinator.startTransaction();
        for (Site site : drUtil.listStandbySites()) {
            if (site.getUuid().equals(uuid)) {
                log.error("Re-init the target standby", uuid);
                // init the to-be resumed standby site
                long dataRevision = vdcTargetVersion;
                List<Site> standbySites = drUtil.listStandbySites();
                SiteConfigParam configParam = prepareSiteConfigParam(standbySites, ipsecConfig.getPreSharedKey(), uuid, dataRevision, vdcTargetVersion, secretKey);
                try (InternalSiteServiceClient internalSiteServiceClient = new InternalSiteServiceClient()) {
                    internalSiteServiceClient.setCoordinatorClient(coordinator);
                    internalSiteServiceClient.setServer(site.getVipEndPoint());
                    internalSiteServiceClient.initStandby(configParam);
                }
                site.setState(SiteState.STANDBY_RESUMING);
                coordinator.persistServiceConfiguration(site.toConfiguration());
                drUtil.recordDrOperationStatus(site.getUuid(), InterState.RESUMING_STANDBY);
                drUtil.updateVdcTargetVersion(uuid, SiteInfo.DR_OP_CHANGE_DATA_REVISION, vdcTargetVersion, dataRevision);
            } else {
                drUtil.updateVdcTargetVersion(site.getUuid(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
            }
        }
        // update the local(active) site last
        drUtil.updateVdcTargetVersion(coordinator.getSiteId(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
        coordinator.commitTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_BEGIN, standby.toBriefString());
        return siteMapper.map(standby);
    } catch (Exception e) {
        log.error("Error resuming site {}", uuid, e);
        coordinator.discardTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_FAILURE, null, standby.toBriefString());
        InternalServerErrorException resumeStandbyFailedException = APIException.internalServerErrors.resumeStandbyFailed(standby.getName(), e.getMessage());
        throw resumeStandbyFailedException;
    } finally {
        try {
            lock.release();
        } catch (Exception ignore) {
            log.error(String.format("Lock release failed when resuming standby site: %s", uuid));
        }
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SecretKey(javax.crypto.SecretKey) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) SiteState(com.emc.storageos.coordinator.client.model.SiteState) InternalSiteServiceClient(com.emc.storageos.api.service.impl.resource.utils.InternalSiteServiceClient) SiteNetworkState(com.emc.storageos.coordinator.client.model.SiteNetworkState) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) SiteConfigParam(com.emc.storageos.model.dr.SiteConfigParam) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces) CheckPermission(com.emc.storageos.security.authorization.CheckPermission)

Aggregations

Site (com.emc.storageos.coordinator.client.model.Site)79 RetryableCoordinatorException (com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException)21 APIException (com.emc.storageos.svcs.errorhandling.resources.APIException)21 CoordinatorException (com.emc.storageos.coordinator.exceptions.CoordinatorException)20 UnknownHostException (java.net.UnknownHostException)18 Produces (javax.ws.rs.Produces)17 InternalServerErrorException (com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException)16 Path (javax.ws.rs.Path)15 ZkPath (com.emc.storageos.coordinator.common.impl.ZkPath)14 ArrayList (java.util.ArrayList)14 DrUtil (com.emc.storageos.coordinator.client.service.DrUtil)11 CheckPermission (com.emc.storageos.security.authorization.CheckPermission)11 InterProcessLock (org.apache.curator.framework.recipes.locks.InterProcessLock)11 SiteInfo (com.emc.storageos.coordinator.client.model.SiteInfo)10 POST (javax.ws.rs.POST)10 SiteState (com.emc.storageos.coordinator.client.model.SiteState)9 Configuration (com.emc.storageos.coordinator.common.Configuration)8 VirtualDataCenter (com.emc.storageos.db.client.model.VirtualDataCenter)8 Consumes (javax.ws.rs.Consumes)8 ClusterInfo (com.emc.vipr.model.sys.ClusterInfo)6