Search in sources :

Example 1 with RetryableCoordinatorException

use of com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException in project coprhd-controller by CoprHD.

the class VdcConfigUtil method genSiteProperties.

private void genSiteProperties(Map<String, String> vdcConfig, String vdcShortId, List<Site> sites) {
    String activeSiteId = null;
    try {
        activeSiteId = drUtil.getActiveSite().getUuid();
    } catch (RetryableCoordinatorException e) {
        log.warn("Failed to find active site id from ZK, go on since it maybe switchover case");
    }
    SiteInfo siteInfo = coordinator.getTargetInfo(SiteInfo.class);
    Site localSite = drUtil.getLocalSite();
    if (StringUtils.isEmpty(activeSiteId) && SiteInfo.DR_OP_SWITCHOVER.equals(siteInfo.getActionRequired())) {
        activeSiteId = drUtil.getSiteFromLocalVdc(siteInfo.getTargetSiteUUID()).getUuid();
    }
    Collections.sort(sites, new Comparator<Site>() {

        @Override
        public int compare(Site a, Site b) {
            return (int) (a.getCreationTime() - b.getCreationTime());
        }
    });
    List<String> shortIds = new ArrayList<>();
    for (Site site : sites) {
        if (shouldExcludeFromConfig(site)) {
            log.info("Ignore site {} of vdc {}", site.getSiteShortId(), site.getVdcShortId());
            continue;
        }
        // this will make it easier to resume the data replication.
        if (!drUtil.isLocalSite(site) && (site.getState().equals(SiteState.STANDBY_PAUSING) || site.getState().equals(SiteState.STANDBY_PAUSED) || site.getState().equals(SiteState.STANDBY_REMOVING) || site.getState().equals(SiteState.ACTIVE_FAILING_OVER) || site.getState().equals(SiteState.ACTIVE_DEGRADED))) {
            continue;
        }
        int siteNodeCnt = 0;
        Map<String, String> siteIPv4Addrs = site.getHostIPv4AddressMap();
        Map<String, String> siteIPv6Addrs = site.getHostIPv6AddressMap();
        List<String> siteHosts = getHostsFromIPAddrMap(siteIPv4Addrs, siteIPv6Addrs);
        String siteShortId = site.getSiteShortId();
        // sort the host names as vipr1, vipr2 ...
        Collections.sort(siteHosts);
        for (String hostName : siteHosts) {
            siteNodeCnt++;
            String address = siteIPv4Addrs.get(hostName);
            vdcConfig.put(String.format(VDC_SITE_IPADDR_PTN, vdcShortId, siteShortId, siteNodeCnt), address == null ? "" : address);
            address = siteIPv6Addrs.get(hostName);
            vdcConfig.put(String.format(VDC_SITE_IPADDR6_PTN, vdcShortId, siteShortId, siteNodeCnt), address == null ? "" : address);
        }
        vdcConfig.put(String.format(VDC_SITE_NODE_COUNT_PTN, vdcShortId, siteShortId), String.valueOf(siteNodeCnt));
        vdcConfig.put(String.format(VDC_SITE_VIP_PTN, vdcShortId, siteShortId), site.getVip());
        vdcConfig.put(String.format(VDC_SITE_VIP6_PTN, vdcShortId, siteShortId), site.getVip6());
        if (drUtil.isLocalSite(site)) {
            vdcConfig.put(SITE_MYID, siteShortId);
            vdcConfig.put(SITE_MY_UUID, site.getUuid());
        }
        shortIds.add(siteShortId);
    }
    Collections.sort(shortIds);
    if (drUtil.getLocalVdcShortId().equals(vdcShortId)) {
        // right now we assume that SITE_IDS and SITE_IS_STANDBY only makes sense for local VDC
        // moving forward this may or may not be the case.
        vdcConfig.put(SITE_IDS, StringUtils.join(shortIds, ','));
        vdcConfig.put(SITE_IS_STANDBY, String.valueOf(!localSite.getUuid().equals(activeSiteId)));
        vdcConfig.put(SITE_ACTIVE_ID, StringUtils.isEmpty(activeSiteId) ? DEFAULT_ACTIVE_SITE_ID : drUtil.getSiteFromLocalVdc(activeSiteId).getSiteShortId());
    }
    vdcConfig.put(String.format(VDC_SITE_IDS, vdcShortId), StringUtils.join(shortIds, ','));
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteInfo(com.emc.storageos.coordinator.client.model.SiteInfo) ArrayList(java.util.ArrayList) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException)

Example 2 with RetryableCoordinatorException

use of com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException in project coprhd-controller by CoprHD.

the class CoordinatorClientImpl method getSemaphoreLock.

@Override
public InterProcessSemaphoreMutex getSemaphoreLock(String name) throws CoordinatorException {
    EnsurePath path = new EnsurePath(ZkPath.MUTEX.toString());
    try {
        path.ensure(_zkConnection.curator().getZookeeperClient());
    } catch (Exception e) {
        throw new RetryableCoordinatorException(ServiceCode.COORDINATOR_SVC_NOT_FOUND, e, "Unable to get lock {0}. Caused by: {1}", new Object[] { name, e.getMessage() });
    }
    String lockPath = ZKPaths.makePath(ZkPath.MUTEX.toString(), name);
    return new InterProcessSemaphoreMutex(_zkConnection.curator(), lockPath);
}
Also used : EnsurePath(org.apache.curator.utils.EnsurePath) InterProcessSemaphoreMutex(org.apache.curator.framework.recipes.locks.InterProcessSemaphoreMutex) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) PropertyInfoMapper.decodeFromString(com.emc.storageos.coordinator.mapper.PropertyInfoMapper.decodeFromString) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException)

Example 3 with RetryableCoordinatorException

use of com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException in project coprhd-controller by CoprHD.

the class VdcManager method auditCompletedDrOperation.

/**
 * Check if ongoing DR operation succeeded or failed, then record audit log accordingly and remove this operation record from ZK.
 */
private void auditCompletedDrOperation() {
    if (!drUtil.isActiveSite()) {
        return;
    }
    InterProcessLock lock = coordinator.getCoordinatorClient().getSiteLocalLock(AUDIT_DR_OPERATION_LOCK);
    boolean hasLock = false;
    try {
        hasLock = lock.acquire(AUDIT_LOCK_WAIT_TIME_SEC, TimeUnit.SECONDS);
        if (!hasLock) {
            return;
        }
        log.info("Local site is active, local node acquired lock, starting audit complete DR operations ...");
        List<Configuration> configs = coordinator.getCoordinatorClient().queryAllConfiguration(DrOperationStatus.CONFIG_KIND);
        if (configs == null || configs.isEmpty()) {
            return;
        }
        for (Configuration config : configs) {
            DrOperationStatus operation = new DrOperationStatus(config);
            String siteId = operation.getSiteUuid();
            InterState interState = operation.getInterState();
            Site site = null;
            try {
                site = drUtil.getSiteFromLocalVdc(siteId);
            } catch (RetryableCoordinatorException e) {
                // Under this situation, just record audit log and clear DR operation status
                if (interState.equals(InterState.REMOVING_STANDBY) && e.getServiceCode() == ServiceCode.COORDINATOR_SITE_NOT_FOUND) {
                    this.auditMgr.recordAuditLog(null, null, EVENT_SERVICE_TYPE, getOperationType(interState), System.currentTimeMillis(), AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_END, siteId);
                    coordinator.getCoordinatorClient().removeServiceConfiguration(config);
                    log.info("DR operation status has been cleared: {}", operation);
                    continue;
                }
                throw e;
            }
            SiteState currentState = site.getState();
            if (currentState.equals(SiteState.STANDBY_ERROR)) {
                // Failed
                this.auditMgr.recordAuditLog(null, null, EVENT_SERVICE_TYPE, getOperationType(interState), System.currentTimeMillis(), AuditLogManager.AUDITLOG_FAILURE, AuditLogManager.AUDITOP_END, site.toBriefString());
            } else if (!currentState.isDROperationOngoing()) {
                // Succeeded
                this.auditMgr.recordAuditLog(null, null, EVENT_SERVICE_TYPE, getOperationType(interState), System.currentTimeMillis(), AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_END, site.toBriefString());
            } else {
                // Still ongoing, do nothing
                continue;
            }
            log.info(String.format("Site %s state has transformed from %s to %s", siteId, interState, currentState));
            // clear this operation status
            coordinator.getCoordinatorClient().removeServiceConfiguration(config);
            log.info("DR operation status has been cleared: {}", operation);
        }
    } catch (Exception e) {
        log.error("Auditing DR operation failed with exception", e);
    } finally {
        try {
            if (hasLock) {
                lock.release();
            }
        } catch (Exception e) {
            log.error("Failed to release DR operation audit lock", e);
        }
    }
}
Also used : InterState(com.emc.storageos.coordinator.client.model.DrOperationStatus.InterState) Configuration(com.emc.storageos.coordinator.common.Configuration) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) InvalidLockOwnerException(com.emc.storageos.systemservices.exceptions.InvalidLockOwnerException) CoordinatorClientException(com.emc.storageos.systemservices.exceptions.CoordinatorClientException)

Example 4 with RetryableCoordinatorException

use of com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException in project coprhd-controller by CoprHD.

the class DbSvcRunner method isStarted.

/**
 * Check if service is started
 *
 * @return
 */
public boolean isStarted() {
    try {
        CoordinatorClient coordinator = getCoordinator();
        List<Service> service = coordinator.locateAllServices(serviceName, SVC_VERSION, null, null);
        if (service.iterator().hasNext()) {
            Service svc = service.iterator().next();
            URI hostUri = svc.getEndpoint();
            log.info("Found " + svc.getName() + "; host = " + hostUri.getHost() + "; port = " + hostUri.getPort());
            return true;
        }
    } catch (RetryableCoordinatorException e) {
        log.warn("no {} instance running. Coordinator exception message: {}", serviceName, e.getMessage());
    } catch (Exception e) {
        log.error("service lookup failure", e);
    }
    return false;
}
Also used : CoordinatorClient(com.emc.storageos.coordinator.client.service.CoordinatorClient) DbService(com.emc.storageos.db.server.DbService) Service(com.emc.storageos.coordinator.common.Service) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) URI(java.net.URI) URISyntaxException(java.net.URISyntaxException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) IOException(java.io.IOException)

Example 5 with RetryableCoordinatorException

use of com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method updateSite.

/**
 * Update site information. Only name and description can be updated.
 *
 * @param uuid target site uuid
 * @param siteParam site information
 * @brief Update Site information
 * @return Response
 */
@PUT
@Path("/{uuid}")
@Consumes({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN }, blockProxies = true)
public Response updateSite(@PathParam("uuid") String uuid, SiteUpdateParam siteParam) {
    log.info("Begin to update site information for {}", uuid);
    Site site = null;
    try {
        site = drUtil.getSiteFromLocalVdc(uuid);
    } catch (RetryableCoordinatorException e) {
        log.error("Can't find site with specified site UUID {}", uuid);
        throw APIException.badRequests.siteIdNotFound();
    }
    if (!validSiteName(siteParam.getName())) {
        throw APIException.internalServerErrors.updateSiteFailed(site.getName(), String.format("Site name should not be empty or longer than %d characters.", SITE_NAME_LENGTH_LIMIT));
    }
    for (Site eachSite : drUtil.listSites()) {
        if (eachSite.getUuid().equals(uuid)) {
            continue;
        }
        if (eachSite.getName().equals(siteParam.getName())) {
            throw APIException.internalServerErrors.addStandbyPrecheckFailed("Duplicate site name");
        }
    }
    try {
        site.setName(siteParam.getName());
        site.setDescription(siteParam.getDescription());
        coordinator.persistServiceConfiguration(site.toConfiguration());
        auditDisasterRecoveryOps(OperationTypeEnum.UPDATE_SITE, AuditLogManager.AUDITLOG_SUCCESS, null, site.getName(), site.getVipEndPoint(), site.getUuid());
        return Response.status(Response.Status.ACCEPTED).build();
    } catch (Exception e) {
        log.error("Error happened when update site %s", uuid, e);
        auditDisasterRecoveryOps(OperationTypeEnum.UPDATE_SITE, AuditLogManager.AUDITLOG_FAILURE, null, site.getName(), site.getVipEndPoint(), site.getUuid());
        throw APIException.internalServerErrors.updateSiteFailed(site.getName(), e.getMessage());
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) Consumes(javax.ws.rs.Consumes) Produces(javax.ws.rs.Produces) PUT(javax.ws.rs.PUT) CheckPermission(com.emc.storageos.security.authorization.CheckPermission)

Aggregations

RetryableCoordinatorException (com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException)6 Site (com.emc.storageos.coordinator.client.model.Site)2 Service (com.emc.storageos.coordinator.common.Service)2 CoordinatorException (com.emc.storageos.coordinator.exceptions.CoordinatorException)2 APIException (com.emc.storageos.svcs.errorhandling.resources.APIException)2 IOException (java.io.IOException)2 URI (java.net.URI)2 UnknownHostException (java.net.UnknownHostException)2 InterState (com.emc.storageos.coordinator.client.model.DrOperationStatus.InterState)1 SiteInfo (com.emc.storageos.coordinator.client.model.SiteInfo)1 CoordinatorClient (com.emc.storageos.coordinator.client.service.CoordinatorClient)1 Configuration (com.emc.storageos.coordinator.common.Configuration)1 ZkPath (com.emc.storageos.coordinator.common.impl.ZkPath)1 PropertyInfoMapper.decodeFromString (com.emc.storageos.coordinator.mapper.PropertyInfoMapper.decodeFromString)1 DbService (com.emc.storageos.db.server.DbService)1 CheckPermission (com.emc.storageos.security.authorization.CheckPermission)1 InternalServerErrorException (com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException)1 CoordinatorClientException (com.emc.storageos.systemservices.exceptions.CoordinatorClientException)1 InvalidLockOwnerException (com.emc.storageos.systemservices.exceptions.InvalidLockOwnerException)1 Host (com.netflix.astyanax.connectionpool.Host)1