use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.
the class DisasterRecoveryService method resumeStandby.
/**
* Resume data replication for a paused standby site
*
* @param uuid site UUID
* @brief Resume data replication for a paused standby site
* @return updated standby site representation
*/
@POST
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN, Role.SYSTEM_ADMIN, Role.RESTRICTED_SYSTEM_ADMIN }, blockProxies = true)
@Path("/{uuid}/resume")
public SiteRestRep resumeStandby(@PathParam("uuid") String uuid) {
log.info("Begin to resume data sync to standby site identified by uuid: {}", uuid);
Site standby = validateSiteConfig(uuid);
SiteState state = standby.getState();
if (!state.equals(SiteState.STANDBY_PAUSED) && !state.equals(SiteState.ACTIVE_DEGRADED)) {
log.error("site {} is in state {}, should be STANDBY_PAUSED or ACTIVE_DEGRADED", uuid, standby.getState());
throw APIException.badRequests.operationOnlyAllowedOnPausedSite(standby.getName(), standby.getState().toString());
}
SiteNetworkState networkState = drUtil.getSiteNetworkState(uuid);
if (networkState.getNetworkHealth() == NetworkHealth.BROKEN) {
throw APIException.internalServerErrors.siteConnectionBroken(standby.getName(), "Network health state is broken.");
}
try (InternalSiteServiceClient client = createInternalSiteServiceClient(standby)) {
commonPrecheck();
client.setCoordinatorClient(coordinator);
client.setKeyGenerator(apiSignatureGenerator);
client.resumePrecheck();
} catch (APIException e) {
throw e;
} catch (Exception e) {
throw APIException.internalServerErrors.resumeStandbyPrecheckFailed(standby.getName(), e.getMessage());
}
// Do this before tx get started which might write key to zk.
SecretKey secretKey = apiSignatureGenerator.getSignatureKey(SignatureKeyType.INTERVDC_API);
InterProcessLock lock = drUtil.getDROperationLock();
long vdcTargetVersion = DrUtil.newVdcConfigVersion();
try {
coordinator.startTransaction();
for (Site site : drUtil.listStandbySites()) {
if (site.getUuid().equals(uuid)) {
log.error("Re-init the target standby", uuid);
// init the to-be resumed standby site
long dataRevision = vdcTargetVersion;
List<Site> standbySites = drUtil.listStandbySites();
SiteConfigParam configParam = prepareSiteConfigParam(standbySites, ipsecConfig.getPreSharedKey(), uuid, dataRevision, vdcTargetVersion, secretKey);
try (InternalSiteServiceClient internalSiteServiceClient = new InternalSiteServiceClient()) {
internalSiteServiceClient.setCoordinatorClient(coordinator);
internalSiteServiceClient.setServer(site.getVipEndPoint());
internalSiteServiceClient.initStandby(configParam);
}
site.setState(SiteState.STANDBY_RESUMING);
coordinator.persistServiceConfiguration(site.toConfiguration());
drUtil.recordDrOperationStatus(site.getUuid(), InterState.RESUMING_STANDBY);
drUtil.updateVdcTargetVersion(uuid, SiteInfo.DR_OP_CHANGE_DATA_REVISION, vdcTargetVersion, dataRevision);
} else {
drUtil.updateVdcTargetVersion(site.getUuid(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
}
}
// update the local(active) site last
drUtil.updateVdcTargetVersion(coordinator.getSiteId(), SiteInfo.DR_OP_RESUME_STANDBY, vdcTargetVersion);
coordinator.commitTransaction();
auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_BEGIN, standby.toBriefString());
return siteMapper.map(standby);
} catch (Exception e) {
log.error("Error resuming site {}", uuid, e);
coordinator.discardTransaction();
auditDisasterRecoveryOps(OperationTypeEnum.RESUME_STANDBY, AuditLogManager.AUDITLOG_FAILURE, null, standby.toBriefString());
InternalServerErrorException resumeStandbyFailedException = APIException.internalServerErrors.resumeStandbyFailed(standby.getName(), e.getMessage());
throw resumeStandbyFailedException;
} finally {
try {
lock.release();
} catch (Exception ignore) {
log.error(String.format("Lock release failed when resuming standby site: %s", uuid));
}
}
}
use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.
the class DrPostFailoverHandler method run.
/**
* Run the handler. The handler runs only on single node. If it fails, current service should quit and another node takes over to retry
*/
public void run() {
try {
SiteState siteState = drUtil.getLocalSite().getState();
if (!siteState.equals(SiteState.STANDBY_FAILING_OVER)) {
log.info("Ignore DR post failover handler for site state {}", siteState);
return;
}
log.info("Acquiring lock {}", POST_FAILOVER_HANDLER_LOCK);
InterProcessLock lock = coordinator.getLock(POST_FAILOVER_HANDLER_LOCK);
lock.acquire();
log.info("Acquired lock {}", POST_FAILOVER_HANDLER_LOCK);
try {
// check site state again after acquiring lock
Site site = drUtil.getLocalSite();
siteState = site.getState();
if (!siteState.equals(SiteState.STANDBY_FAILING_OVER)) {
log.info("Ignore DR post failover handler for site state {}", siteState);
return;
}
boolean isExecuted = isCompleted();
if (!isExecuted) {
log.info("Start post failover processing {}", name);
updateStatus(Status.EXECUTING);
execute();
updateStatus(Status.COMPLETED);
} else {
log.info("Handler {} was completed on other node", name);
}
if (isAllHandlersCompleted()) {
log.info("All handlers successfully completed. Change site state to ACTIVE");
site.setState(SiteState.ACTIVE);
coordinator.persistServiceConfiguration(site.toConfiguration());
}
} finally {
lock.release();
log.info("Released lock {}", POST_FAILOVER_HANDLER_LOCK);
}
} catch (Exception e) {
log.error("Failed to execute DR failover handler", e);
throw new IllegalStateException(e);
}
}
use of com.emc.storageos.coordinator.client.model.SiteState in project coprhd-controller by CoprHD.
the class CoordinatorClientImpl method getControlNodesState.
/**
* Get all control nodes' state
*
* @param targetGiven
* target repository
* @param infos
* control nodes' repository
* @param targetPropertiesGiven
* target property
* @param configVersions
* control nodes' configVersions
* @param targetPowerOffState
* target poweroff state
* @param targetDrivers
* target driver list
* @param drivers
* control nodes' driver lists
* @param siteId
* @return Control nodes' state
*/
private ClusterInfo.ClusterState getControlNodesState(final RepositoryInfo targetGiven, final Map<Service, RepositoryInfo> infos, final PropertyInfoRestRep targetPropertiesGiven, final Map<Service, ConfigVersion> configVersions, final Map<Service, VdcConfigVersion> vdcConfigVersions, final PowerOffState targetPowerOffState, final StorageDriversInfo targetDrivers, final Map<Service, StorageDriversInfo> drivers, String siteId) {
if (targetGiven == null || targetPropertiesGiven == null || targetPowerOffState == null) {
// only for first time target initializing
return ClusterInfo.ClusterState.INITIALIZING;
}
DrUtil drUtil = new DrUtil(this);
Site site = drUtil.getSiteFromLocalVdc(siteId);
SiteState siteState = site.getState();
int siteNodeCount = site.getNodeCount();
if (infos == null || infos.size() != siteNodeCount || configVersions == null || configVersions.size() != siteNodeCount) {
return ClusterInfo.ClusterState.DEGRADED;
}
if (siteState == SiteState.STANDBY_ERROR) {
log.info("Control nodes' state DEGRADED since DR site state is STANDBY_ERROR");
return ClusterInfo.ClusterState.DEGRADED;
}
// 1st. Find nodes which currents and versions are different from target's
List<String> differentCurrents = getDifferentCurrentsCommon(targetGiven, infos);
List<String> differentVersions = getDifferentVersionsCommon(targetGiven, infos);
// 2nd. Find nodes which configVersions are different from target's
// Note : we use config version to judge if properties on a node are sync-ed with target's.
List<String> differentConfigVersions = getDifferentConfigVersionCommon(targetPropertiesGiven, configVersions);
List<String> differentVdcConfigVersions = getDifferentVdcConfigVersionCommon(vdcConfigVersions);
if (targetPowerOffState.getPowerOffState() != PowerOffState.State.NONE) {
log.info("Control nodes' state POWERINGOFF");
return ClusterInfo.ClusterState.POWERINGOFF;
} else if (!differentConfigVersions.isEmpty()) {
log.info("Control nodes' state UPDATING: {}", Strings.repr(targetPropertiesGiven));
return ClusterInfo.ClusterState.UPDATING;
} else if (!differentVdcConfigVersions.isEmpty()) {
log.info("Control nodes' state UPDATING vdc config version: {}", Strings.repr(differentVdcConfigVersions));
return ClusterInfo.ClusterState.UPDATING;
} else if (siteState.isDROperationOngoing()) {
log.info("Control nodes' state UPDATING since DR operation ongoing: {}", siteState);
return ClusterInfo.ClusterState.UPDATING;
} else if (!isControlNodesDriversSynced(targetDrivers, drivers)) {
log.info("Control nodes' state UPDATING since not all nodes' drivers are synced with target");
return ClusterInfo.ClusterState.UPDATING;
} else if (differentCurrents.isEmpty() && differentVersions.isEmpty()) {
// check for the extra upgrading states
if (isDbSchemaVersionChanged()) {
MigrationStatus status = getMigrationStatus();
if (status == null) {
log.info("Control nodes state is UPGRADING_PREP_DB ");
return ClusterInfo.ClusterState.UPGRADING_PREP_DB;
}
log.info("Control nodes state is {}", status);
switch(status) {
case RUNNING:
return ClusterInfo.ClusterState.UPGRADING_CONVERT_DB;
case FAILED:
return ClusterInfo.ClusterState.UPGRADING_FAILED;
case DONE:
break;
default:
log.error("The current db schema version doesn't match the target db schema version, " + "but the current migration status is {} ", status);
}
}
log.info("Control nodes' state STABLE");
return ClusterInfo.ClusterState.STABLE;
} else if (differentCurrents.isEmpty()) {
log.info("Control nodes' state SYNCING: {}", Strings.repr(differentVersions));
return ClusterInfo.ClusterState.SYNCING;
} else if (differentVersions.isEmpty()) {
log.info("Control nodes' state UPGRADING: {}", Strings.repr(differentCurrents));
return ClusterInfo.ClusterState.UPGRADING;
} else {
log.error("Control nodes' in an UNKNOWN state. Target given: {} {}", targetGiven, Strings.repr(infos));
return ClusterInfo.ClusterState.UNKNOWN;
}
}
Aggregations