use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.
the class DisasterRecoveryService method isLocalSiteRemoved.
/**
* Check if Local Site Removed
*
* @brief Check if local site is removed
* @return result that indicates whether local site is removed
*/
@GET
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@Path("/islocalsiteremoved")
public SiteRemoved isLocalSiteRemoved() {
SiteRemoved response = new SiteRemoved();
Site localSite = drUtil.getLocalSite();
if (SiteState.ACTIVE == localSite.getState()) {
return response;
}
for (Site remoteSite : drUtil.listSites()) {
if (remoteSite.getUuid().equals(localSite.getUuid())) {
continue;
}
try (InternalSiteServiceClient client = new InternalSiteServiceClient(remoteSite, coordinator, apiSignatureGenerator)) {
SiteList sites = client.getSiteList();
if (!isActiveSite(remoteSite.getUuid(), sites)) {
continue;
}
if (isSiteContainedBy(localSite.getUuid(), sites)) {
return response;
} else {
log.info("According returned result from current active site {}, local site {} has been removed", remoteSite.getUuid(), localSite.getUuid());
response.setIsRemoved(true);
return response;
}
} catch (Exception e) {
log.warn("Error happened when fetching site list from site {}", remoteSite.getUuid(), e);
continue;
}
}
return response;
}
use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.
the class DisasterRecoveryService method commonPrecheck.
/**
* Common precheck logic for DR operations.
*
* @param excludedSiteIds, site ids to exclude from the cluster state precheck
*/
private void commonPrecheck(List<String> excludedSiteIds) {
if (drUtil.isStandby()) {
throw APIException.badRequests.operationOnlyAllowedOnActiveSite();
}
if (!isClusterStable()) {
throw APIException.serviceUnavailable.clusterStateNotStable();
}
for (Site site : drUtil.listStandbySites()) {
if (excludedSiteIds.contains(site.getUuid())) {
continue;
}
// don't check node state for paused sites.
if (site.getState().equals(SiteState.STANDBY_PAUSED) || site.getState().equals(SiteState.ACTIVE_DEGRADED)) {
continue;
}
int nodeCount = site.getNodeCount();
ClusterInfo.ClusterState state = coordinator.getControlNodesState(site.getUuid());
// state could be null
if (!ClusterInfo.ClusterState.STABLE.equals(state)) {
log.error("Site {} is not stable {}", site.getUuid(), Objects.toString(state));
throw APIException.serviceUnavailable.siteClusterStateNotStable(site.getName(), Objects.toString(state));
}
}
}
use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.
the class DisasterRecoveryService method precheckForFailoverLocally.
/*
* Internal method to check whether failover to standby is allowed
*/
private void precheckForFailoverLocally(String standbyUuid) {
Site standby = drUtil.getLocalSite();
// API should be only send to local site
if (!standby.getUuid().equals(standbyUuid)) {
throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), String.format("Failover can only be executed in local site. Local site uuid %s is not matched with uuid %s", standby.getUuid(), standbyUuid));
}
String uuid = drUtil.getActiveSite().getUuid();
if (!StringUtils.isEmpty(uuid)) {
SiteNetworkState networkState = drUtil.getSiteNetworkState(uuid);
if (networkState.getNetworkHealth() != NetworkHealth.BROKEN) {
throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), "Active site is still available");
}
}
// Don't allow failover to site of ACTIVE_DEGRADED state in X-wing
if (standby.getState() != SiteState.STANDBY_PAUSED) {
throw APIException.internalServerErrors.failoverPrecheckFailed(standby.getName(), "Please wait for this site to recognize the Active site is down and automatically switch to a Paused state before failing over.");
}
precheckForFailover();
}
use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.
the class DisasterRecoveryService method failover.
/**
* This is internal API to do failover
*
* @return return response with error message and service code
*/
@POST
@Path("/internal/failover")
@Consumes({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
public Response failover(@QueryParam("newActiveSiteUUid") String newActiveSiteUUID, @QueryParam("oldActiveSiteUUid") String oldActiveSiteUUID, @QueryParam("vdcVersion") String vdcTargetVersion) {
log.info("Begin to failover internally with newActiveSiteUUid {}, oldActiveSiteUUid {}", newActiveSiteUUID, oldActiveSiteUUID);
Site currentSite = drUtil.getLocalSite();
String uuid = currentSite.getUuid();
try {
// set state
Site oldActiveSite = new Site();
if (StringUtils.isEmpty(oldActiveSiteUUID)) {
log.info("Cant't find active site id, go on to do failover");
} else {
oldActiveSite = drUtil.getSiteFromLocalVdc(oldActiveSiteUUID);
drUtil.removeSite(oldActiveSite);
}
Site newActiveSite = drUtil.getSiteFromLocalVdc(newActiveSiteUUID);
newActiveSite.setState(SiteState.STANDBY_FAILING_OVER);
coordinator.persistServiceConfiguration(newActiveSite.toConfiguration());
drUtil.updateVdcTargetVersion(currentSite.getUuid(), SiteInfo.DR_OP_FAILOVER, Long.parseLong(vdcTargetVersion), oldActiveSite.getUuid(), currentSite.getUuid());
auditDisasterRecoveryOps(OperationTypeEnum.FAILOVER, AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_BEGIN, oldActiveSite.toBriefString(), newActiveSite.toBriefString());
return Response.status(Response.Status.ACCEPTED).build();
} catch (Exception e) {
log.error("Error happened when failover at site %s", uuid, e);
auditDisasterRecoveryOps(OperationTypeEnum.FAILOVER, AuditLogManager.AUDITLOG_FAILURE, null, uuid, currentSite.getVipEndPoint(), currentSite.getName());
throw APIException.internalServerErrors.failoverFailed(currentSite.getName(), e.getMessage());
}
}
use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.
the class DisasterRecoveryService method resumeStandby.
/**
* Resume data replication for a paused standby site
*
* @param uuid site UUID
* @brief Resume data replication for a paused standby site
* @return updated standby site representation
*/
@POST
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN, Role.SYSTEM_ADMIN, Role.RESTRICTED_SYSTEM_ADMIN }, blockProxies = true)
@Path("/{uuid}/resume")
public SiteRestRep resumeStandby(@PathParam("uuid") String uuid) {
log.info("Begin to resume data sync to standby site identified by uuid: {}", uuid);
Site standby = validateSiteConfig(uuid);
SiteState state = standby.getState();
if (!state.equals(SiteState.STANDBY_PAUSED) && !state.equals(SiteState.ACTIVE_DEGRADED)) {
log.error("site {} is in state {}, should be STANDBY_PAUSED or ACTIVE_DEGRADED", uuid, standby.getState());
throw APIException.badRequests.operationOnlyAllowedOnPausedSite(standby.getName(), standby.getState().toString());
}
SiteNetworkState networkState = drUtil.getSiteNetworkState(uuid);
if (networkState.getNetworkHealth() == NetworkHealth.BROKEN) {
throw APIException.internalServerErrors.siteConnectionBroken(standby.getName(), "Network health state is broken.");
}
try (InternalSiteServiceClient client = createInternalSiteServiceClient(standby)) {
commonPrecheck();
client.setCoordinatorClient(coordinator);
client.setKeyGenerator(apiSignatureGenerator);
client.resumePrecheck();
} catch (APIException e) {
throw e;
} catch (Exception e) {
throw APIException.internalServerErrors.resumeStandbyPrecheckFailed(standby.getName(), e.getMessage());
}
// Do this before tx get started which might write key to zk.
SecretKey secretKey = apiSignatureGenerator.getSignatureKey(SignatureKeyType.INTERVDC_API);
InterProcessLock lock = drUtil.getDROperationLock();
long vdcTargetVersion = DrUtil.newVdcConfigVersion();
try {
coordinator.startTransaction();
for (Site site : drUtil.listStandbySites()) {
if (site.getUuid().equals(uuid)) {
log.error("Re-init the target standby", uuid);
// init the to-be resumed standby site
long dataRevision = vdcTargetVersion;
List<Site> standbySites = drUtil.listStandbySites();
SiteConfigParam configParam = prepareSiteConfigParam(standbySites, ipsecConfig.getPreSharedKey(), uuid, dataRevision, vdcTargetVersion, secretKey);
try (InternalSiteServiceClient internalSiteServiceClient = new InternalSiteServiceClient()) {
internalSiteServiceClient.setCoordinatorClient(coordinator);
internalSiteServiceClient.setServer(site.getVipEndPoint());
internalSiteServiceClient.initStandby(configParam);
}
site.setState(SiteState.STANDBY_RESUMING);
coordinator.persistServiceConfiguration(site.toConfiguration());
drUtil.recordDrOperationStatus(site.getUuid(), InterState.RESUMING_STANDBY);
drUtil.updateVdcTargetVersion(uuid, SiteInfo.DR_OP_CHANGE_DATA_REVISION, vdcTargetVersion, dataRevision);
} else {
drUtil.updateVdcTargetVersion(site.getUuid(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
}
}
// update the local(active) site last
drUtil.updateVdcTargetVersion(coordinator.getSiteId(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
coordinator.commitTransaction();
auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_BEGIN, standby.toBriefString());
return siteMapper.map(standby);
} catch (Exception e) {
log.error("Error resuming site {}", uuid, e);
coordinator.discardTransaction();
auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_FAILURE, null, standby.toBriefString());
InternalServerErrorException resumeStandbyFailedException = APIException.internalServerErrors.resumeStandbyFailed(standby.getName(), e.getMessage());
throw resumeStandbyFailedException;
} finally {
try {
lock.release();
} catch (Exception ignore) {
log.error(String.format("Lock release failed when resuming standby site: %s", uuid));
}
}
}
Aggregations