Search in sources :

Example 66 with Site

use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method updateSite.

/**
 * Update site information. Only name and description can be updated.
 *
 * @param uuid target site uuid
 * @param siteParam site information
 * @brief Update Site information
 * @return Response
 */
@PUT
@Path("/{uuid}")
@Consumes({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN }, blockProxies = true)
public Response updateSite(@PathParam("uuid") String uuid, SiteUpdateParam siteParam) {
    log.info("Begin to update site information for {}", uuid);
    Site site = null;
    try {
        site = drUtil.getSiteFromLocalVdc(uuid);
    } catch (RetryableCoordinatorException e) {
        log.error("Can't find site with specified site UUID {}", uuid);
        throw APIException.badRequests.siteIdNotFound();
    }
    if (!validSiteName(siteParam.getName())) {
        throw APIException.internalServerErrors.updateSiteFailed(site.getName(), String.format("Site name should not be empty or longer than %d characters.", SITE_NAME_LENGTH_LIMIT));
    }
    for (Site eachSite : drUtil.listSites()) {
        if (eachSite.getUuid().equals(uuid)) {
            continue;
        }
        if (eachSite.getName().equals(siteParam.getName())) {
            throw APIException.internalServerErrors.addStandbyPrecheckFailed("Duplicate site name");
        }
    }
    try {
        site.setName(siteParam.getName());
        site.setDescription(siteParam.getDescription());
        coordinator.persistServiceConfiguration(site.toConfiguration());
        auditDisasterRecoveryOps(OperationTypeEnum.UPDATE_SITE, AuditLogManager.AUDITLOG_SUCCESS, null, site.getName(), site.getVipEndPoint(), site.getUuid());
        return Response.status(Response.Status.ACCEPTED).build();
    } catch (Exception e) {
        log.error("Error happened when update site %s", uuid, e);
        auditDisasterRecoveryOps(OperationTypeEnum.UPDATE_SITE, AuditLogManager.AUDITLOG_FAILURE, null, site.getName(), site.getVipEndPoint(), site.getUuid());
        throw APIException.internalServerErrors.updateSiteFailed(site.getName(), e.getMessage());
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) Consumes(javax.ws.rs.Consumes) Produces(javax.ws.rs.Produces) PUT(javax.ws.rs.PUT) CheckPermission(com.emc.storageos.security.authorization.CheckPermission)

Example 67 with Site

use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method doSwitchover.

/**
 * Do Site Switchover
 * This API will do switchover to target new active site according passed in site UUID. After failover, old active site will
 * work as normal standby site and target site will be promoted to active. All site will update properties to trigger reconfig.
 *
 * @param uuid target new active site UUID
 * @brief Do site switchover
 * @return return accepted response if operation is successful
 */
@POST
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@Path("/{uuid}/switchover")
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN }, blockProxies = true)
public Response doSwitchover(@PathParam("uuid") String uuid) {
    log.info("Begin to switchover for standby UUID {}", uuid);
    precheckForSwitchoverForActiveSite(uuid);
    List<Site> allStandbySites = drUtil.listStandbySites();
    for (Site site : allStandbySites) {
        if (!site.getUuid().equals(uuid) && site.getState() == SiteState.STANDBY_PAUSED) {
            try (InternalSiteServiceClient client = new InternalSiteServiceClient(site)) {
                client.setCoordinatorClient(coordinator);
                client.setKeyGenerator(apiSignatureGenerator);
                client.switchoverPrecheck();
            }
        }
    }
    String oldActiveUUID = drUtil.getActiveSite().getUuid();
    InterProcessLock lock = drUtil.getDROperationLock();
    Site newActiveSite = null;
    Site oldActiveSite = null;
    try {
        newActiveSite = drUtil.getSiteFromLocalVdc(uuid);
        // Set old active site's state, short id and key
        oldActiveSite = drUtil.getSiteFromLocalVdc(oldActiveUUID);
        if (StringUtils.isEmpty(oldActiveSite.getSiteShortId())) {
            oldActiveSite.setSiteShortId(newActiveSite.getVdcShortId());
        }
        coordinator.startTransaction();
        oldActiveSite.setState(SiteState.ACTIVE_SWITCHING_OVER);
        coordinator.persistServiceConfiguration(oldActiveSite.toConfiguration());
        // this barrier is set when begin switchover and will be removed by new active site. Old active site will wait and reboot after
        // barrier is removed
        DistributedBarrier restartBarrier = coordinator.getDistributedBarrier(String.format("%s/%s/%s", ZkPath.SITES, oldActiveSite.getUuid(), Constants.SWITCHOVER_BARRIER_RESTART));
        restartBarrier.setBarrier();
        drUtil.recordDrOperationStatus(oldActiveSite.getUuid(), InterState.SWITCHINGOVER_ACTIVE);
        // trigger reconfig
        // a version for all sites.
        long vdcConfigVersion = DrUtil.newVdcConfigVersion();
        for (Site eachSite : drUtil.listSites()) {
            if (!eachSite.getUuid().equals(uuid) && eachSite.getState() == SiteState.STANDBY_PAUSED) {
                try (InternalSiteServiceClient client = new InternalSiteServiceClient(eachSite)) {
                    client.setCoordinatorClient(coordinator);
                    client.setKeyGenerator(apiSignatureGenerator);
                    client.switchover(newActiveSite.getUuid(), vdcConfigVersion);
                }
            } else {
                drUtil.updateVdcTargetVersion(eachSite.getUuid(), SiteInfo.DR_OP_SWITCHOVER, vdcConfigVersion, oldActiveSite.getUuid(), newActiveSite.getUuid());
            }
        }
        coordinator.commitTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.SWITCHOVER, AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_BEGIN, oldActiveSite.toBriefString(), newActiveSite.toBriefString());
        return Response.status(Response.Status.ACCEPTED).build();
    } catch (Exception e) {
        log.error(String.format("Error happened when switchover from site %s to site %s", oldActiveUUID, uuid), e);
        coordinator.discardTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.SWITCHOVER, AuditLogManager.AUDITLOG_FAILURE, null, newActiveSite.getName(), newActiveSite.getVipEndPoint());
        throw APIException.internalServerErrors.switchoverFailed(oldActiveSite.getName(), newActiveSite.getName(), e.getMessage());
    } finally {
        try {
            lock.release();
        } catch (Exception ignore) {
            log.error(String.format("Lock release failed when switchover from %s to %s", oldActiveUUID, uuid));
        }
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) InternalSiteServiceClient(com.emc.storageos.api.service.impl.resource.utils.InternalSiteServiceClient) DistributedBarrier(org.apache.curator.framework.recipes.barriers.DistributedBarrier) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces) CheckPermission(com.emc.storageos.security.authorization.CheckPermission)

Example 68 with Site

use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method checkIsActive.

/**
 * Check if current site is active site
 *
 * @brief Check if current site is active
 * @return SiteActive true if current site is active else false
 */
@GET
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@Path("/active")
public SiteActive checkIsActive() {
    log.info("Begin to check if site Active or Standby");
    SiteActive isActiveSite = new SiteActive();
    try {
        Site localSite = drUtil.getLocalSite();
        isActiveSite.setIsActive(localSite.getState() == SiteState.ACTIVE);
        isActiveSite.setLocalSiteName(localSite.getName());
        isActiveSite.setLocalUuid(localSite.getUuid());
        isActiveSite.setIsMultiSite(drUtil.isMultisite());
        return isActiveSite;
    } catch (Exception e) {
        log.error("Can't get site is Active or Standby");
        throw APIException.badRequests.siteIdNotFound();
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteActive(com.emc.storageos.model.dr.SiteActive) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 69 with Site

use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method remove.

/**
 * Remove multiple standby sites. After successfully done, it stops data replication to those sites
 *
 * @param idList site uuid list to be removed
 * @brief Remove a list of standby sites
 * @return Response
 */
@POST
@Consumes({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN }, blockProxies = true)
@Path("/remove")
public Response remove(SiteIdListParam idList) {
    List<String> siteIdList = idList.getIds();
    String siteIdStr = StringUtils.join(siteIdList, ",");
    log.info("Begin to remove standby site from local vdc by uuid: {}", siteIdStr);
    List<Site> toBeRemovedSites = new ArrayList<>();
    for (String siteId : siteIdList) {
        Site site;
        try {
            site = drUtil.getSiteFromLocalVdc(siteId);
        } catch (Exception ex) {
            log.error("Can't load site {} from ZK", siteId);
            throw APIException.badRequests.siteIdNotFound();
        }
        if (site.getState().equals(SiteState.ACTIVE)) {
            log.error("Unable to remove this site {}. It is active", siteId);
            throw APIException.badRequests.operationNotAllowedOnActiveSite();
        }
        if (site.getState().isDROperationOngoing() && !site.getState().equals(SiteState.STANDBY_SYNCING)) {
            log.error("Unable to remove this site {} in state {}. " + "DR operation other than STANDBY_SYNCING is ongoing", siteId, site.getState().name());
            throw APIException.internalServerErrors.concurrentDROperationNotAllowed(site.getName(), site.getState().toString());
        }
        toBeRemovedSites.add(site);
    }
    // Build a site names' string for more human-readable Exception error message
    StringBuilder siteNamesSb = new StringBuilder();
    for (Site site : toBeRemovedSites) {
        if (siteNamesSb.length() != 0) {
            siteNamesSb.append(", ");
        }
        siteNamesSb.append(site.getName());
    }
    String SiteNamesStr = siteNamesSb.toString();
    try {
        commonPrecheck(siteIdList);
    } catch (APIException e) {
        throw e;
    } catch (Exception e) {
        throw APIException.internalServerErrors.removeStandbyPrecheckFailed(SiteNamesStr, e.getMessage());
    }
    InterProcessLock lock = drUtil.getDROperationLock(false);
    List<String> sitesString = new ArrayList<>();
    try {
        log.info("Removing sites");
        coordinator.startTransaction();
        for (Site site : toBeRemovedSites) {
            site.setState(SiteState.STANDBY_REMOVING);
            coordinator.persistServiceConfiguration(site.toConfiguration());
            drUtil.recordDrOperationStatus(site.getUuid(), InterState.REMOVING_STANDBY);
            sitesString.add(site.toBriefString());
        }
        log.info("Notify all sites for reconfig");
        long vdcTargetVersion = DrUtil.newVdcConfigVersion();
        for (Site standbySite : drUtil.listSites()) {
            drUtil.updateVdcTargetVersion(standbySite.getUuid(), SiteInfo.DR_OP_REMOVE_STANDBY, vdcTargetVersion);
        }
        coordinator.commitTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.REMOVE_STANDBY, AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_BEGIN, StringUtils.join(sitesString, ','));
        return Response.status(Response.Status.ACCEPTED).build();
    } catch (Exception e) {
        log.error("Failed to remove site {}", siteIdStr, e);
        coordinator.discardTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.REMOVE_STANDBY, AuditLogManager.AUDITLOG_FAILURE, null, StringUtils.join(sitesString, ','));
        throw APIException.internalServerErrors.removeStandbyFailed(SiteNamesStr, e.getMessage());
    } finally {
        try {
            lock.release();
        } catch (Exception ignore) {
            log.error(String.format("Lock release failed when removing standby sites: %s", siteIdStr));
        }
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) ArrayList(java.util.ArrayList) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) POST(javax.ws.rs.POST) Consumes(javax.ws.rs.Consumes) Produces(javax.ws.rs.Produces) CheckPermission(com.emc.storageos.security.authorization.CheckPermission)

Example 70 with Site

use of com.emc.storageos.coordinator.client.model.Site in project coprhd-controller by CoprHD.

the class DbClientContext method checkAndResetConsistencyLevel.

private void checkAndResetConsistencyLevel(DrUtil drUtil, String svcName) {
    if (isRetryFailedWriteWithLocalQuorum() && drUtil.isMultivdc()) {
        log.info("Disable retry for write failure in multiple vdc configuration");
        setRetryFailedWriteWithLocalQuorum(false);
        return;
    }
    ConsistencyLevel currentConsistencyLevel = getKeyspace().getConfig().getDefaultWriteConsistencyLevel();
    if (currentConsistencyLevel.equals(ConsistencyLevel.CL_EACH_QUORUM)) {
        log.debug("Write consistency level is EACH_QUORUM. No need adjust");
        return;
    }
    log.info("Db consistency level for {} is downgraded as LOCAL_QUORUM. Check if we need reset it back", svcName);
    for (Site site : drUtil.listStandbySites()) {
        if (site.getState().equals(SiteState.STANDBY_PAUSED) || site.getState().equals(SiteState.STANDBY_DEGRADED)) {
            // ignore a standby site which is paused by customer explicitly
            continue;
        }
        String siteUuid = site.getUuid();
        int count = drUtil.getNumberOfLiveServices(siteUuid, svcName);
        if (count <= site.getNodeCount() / 2) {
            log.info("Service {} of quorum nodes on site {} is down. Still keep write consistency level to LOCAL_QUORUM", svcName, siteUuid);
            return;
        }
    }
    log.info("Service {} of quorum nodes on all standby sites are up. Reset default write consistency level back to EACH_QUORUM", svcName);
    AstyanaxConfigurationImpl config = (AstyanaxConfigurationImpl) keyspaceContext.getAstyanaxConfiguration();
    config.setDefaultWriteConsistencyLevel(ConsistencyLevel.CL_EACH_QUORUM);
}
Also used : ConsistencyLevel(com.netflix.astyanax.model.ConsistencyLevel) Site(com.emc.storageos.coordinator.client.model.Site) AstyanaxConfigurationImpl(com.netflix.astyanax.impl.AstyanaxConfigurationImpl)

Aggregations

Site (com.emc.storageos.coordinator.client.model.Site)79 RetryableCoordinatorException (com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException)21 APIException (com.emc.storageos.svcs.errorhandling.resources.APIException)21 CoordinatorException (com.emc.storageos.coordinator.exceptions.CoordinatorException)20 UnknownHostException (java.net.UnknownHostException)18 Produces (javax.ws.rs.Produces)17 InternalServerErrorException (com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException)16 Path (javax.ws.rs.Path)15 ZkPath (com.emc.storageos.coordinator.common.impl.ZkPath)14 ArrayList (java.util.ArrayList)14 DrUtil (com.emc.storageos.coordinator.client.service.DrUtil)11 CheckPermission (com.emc.storageos.security.authorization.CheckPermission)11 InterProcessLock (org.apache.curator.framework.recipes.locks.InterProcessLock)11 SiteInfo (com.emc.storageos.coordinator.client.model.SiteInfo)10 POST (javax.ws.rs.POST)10 SiteState (com.emc.storageos.coordinator.client.model.SiteState)9 Configuration (com.emc.storageos.coordinator.common.Configuration)8 VirtualDataCenter (com.emc.storageos.db.client.model.VirtualDataCenter)8 Consumes (javax.ws.rs.Consumes)8 ClusterInfo (com.emc.vipr.model.sys.ClusterInfo)6