Search in sources :

Example 86 with InterProcessLock

use of org.apache.curator.framework.recipes.locks.InterProcessLock in project coprhd-controller by CoprHD.

the class DisasterRecoveryService method remove.

/**
 * Remove multiple standby sites. After successfully done, it stops data replication to those sites
 *
 * @param idList site uuid list to be removed
 * @brief Remove a list of standby sites
 * @return Response
 */
@POST
@Consumes({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@Produces({ MediaType.APPLICATION_XML, MediaType.APPLICATION_JSON })
@CheckPermission(roles = { Role.SECURITY_ADMIN, Role.RESTRICTED_SECURITY_ADMIN }, blockProxies = true)
@Path("/remove")
public Response remove(SiteIdListParam idList) {
    List<String> siteIdList = idList.getIds();
    String siteIdStr = StringUtils.join(siteIdList, ",");
    log.info("Begin to remove standby site from local vdc by uuid: {}", siteIdStr);
    List<Site> toBeRemovedSites = new ArrayList<>();
    for (String siteId : siteIdList) {
        Site site;
        try {
            site = drUtil.getSiteFromLocalVdc(siteId);
        } catch (Exception ex) {
            log.error("Can't load site {} from ZK", siteId);
            throw APIException.badRequests.siteIdNotFound();
        }
        if (site.getState().equals(SiteState.ACTIVE)) {
            log.error("Unable to remove this site {}. It is active", siteId);
            throw APIException.badRequests.operationNotAllowedOnActiveSite();
        }
        if (site.getState().isDROperationOngoing() && !site.getState().equals(SiteState.STANDBY_SYNCING)) {
            log.error("Unable to remove this site {} in state {}. " + "DR operation other than STANDBY_SYNCING is ongoing", siteId, site.getState().name());
            throw APIException.internalServerErrors.concurrentDROperationNotAllowed(site.getName(), site.getState().toString());
        }
        toBeRemovedSites.add(site);
    }
    // Build a site names' string for more human-readable Exception error message
    StringBuilder siteNamesSb = new StringBuilder();
    for (Site site : toBeRemovedSites) {
        if (siteNamesSb.length() != 0) {
            siteNamesSb.append(", ");
        }
        siteNamesSb.append(site.getName());
    }
    String SiteNamesStr = siteNamesSb.toString();
    try {
        commonPrecheck(siteIdList);
    } catch (APIException e) {
        throw e;
    } catch (Exception e) {
        throw APIException.internalServerErrors.removeStandbyPrecheckFailed(SiteNamesStr, e.getMessage());
    }
    InterProcessLock lock = drUtil.getDROperationLock(false);
    List<String> sitesString = new ArrayList<>();
    try {
        log.info("Removing sites");
        coordinator.startTransaction();
        for (Site site : toBeRemovedSites) {
            site.setState(SiteState.STANDBY_REMOVING);
            coordinator.persistServiceConfiguration(site.toConfiguration());
            drUtil.recordDrOperationStatus(site.getUuid(), InterState.REMOVING_STANDBY);
            sitesString.add(site.toBriefString());
        }
        log.info("Notify all sites for reconfig");
        long vdcTargetVersion = DrUtil.newVdcConfigVersion();
        for (Site standbySite : drUtil.listSites()) {
            drUtil.updateVdcTargetVersion(standbySite.getUuid(), SiteInfo.DR_OP_REMOVE_STANDBY, vdcTargetVersion);
        }
        coordinator.commitTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.REMOVE_STANDBY, AuditLogManager.AUDITLOG_SUCCESS, AuditLogManager.AUDITOP_BEGIN, StringUtils.join(sitesString, ','));
        return Response.status(Response.Status.ACCEPTED).build();
    } catch (Exception e) {
        log.error("Failed to remove site {}", siteIdStr, e);
        coordinator.discardTransaction();
        auditDisasterRecoveryOps(OperationTypeEnum.REMOVE_STANDBY, AuditLogManager.AUDITLOG_FAILURE, null, StringUtils.join(sitesString, ','));
        throw APIException.internalServerErrors.removeStandbyFailed(SiteNamesStr, e.getMessage());
    } finally {
        try {
            lock.release();
        } catch (Exception ignore) {
            log.error(String.format("Lock release failed when removing standby sites: %s", siteIdStr));
        }
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) ArrayList(java.util.ArrayList) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) APIException(com.emc.storageos.svcs.errorhandling.resources.APIException) InternalServerErrorException(com.emc.storageos.svcs.errorhandling.resources.InternalServerErrorException) CoordinatorException(com.emc.storageos.coordinator.exceptions.CoordinatorException) RetryableCoordinatorException(com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException) UnknownHostException(java.net.UnknownHostException) Path(javax.ws.rs.Path) ZkPath(com.emc.storageos.coordinator.common.impl.ZkPath) POST(javax.ws.rs.POST) Consumes(javax.ws.rs.Consumes) Produces(javax.ws.rs.Produces) CheckPermission(com.emc.storageos.security.authorization.CheckPermission)

Example 87 with InterProcessLock

use of org.apache.curator.framework.recipes.locks.InterProcessLock in project coprhd-controller by CoprHD.

the class WorkflowService method rollbackInnerWorkflow.

/**
 * Rolls back a workflow that is assumed to be a child of the given stepId.
 * Updates the step status to EXECUTING if workflow is successfully initiated,
 * and aranges for a rollback completer to mark the step as SUCCESS when
 * the rollback completes.
 * NOTE: The current state of the child workflow must be SUCCESS in order
 * for rollback to be invoked.
 *
 * @param workflow
 *            -- the Inner workflow
 * @param stepId
 *            -- assumed to be a stepId of the outer workflow
 */
private void rollbackInnerWorkflow(Workflow workflow, String stepId) {
    URI uri = workflow.getWorkflowURI();
    _log.info(String.format("Rollback requested workflow: %s", uri));
    // Get the workflow state.
    String[] message = new String[1];
    message[0] = "";
    StepState state = Workflow.getOverallState(workflow.getStepStatusMap(), message);
    // Update the rollback handlers. We do this in order to be able to fire a completer at the end of the workflow.
    Object[] args;
    if (workflow._rollbackHandler != null) {
        // Nested rollback handler, add our arguments to the end.
        // Our rollback handler will call the nested handler.
        args = new Object[workflow._rollbackHandlerArgs.length + NestedWorkflowRollbackHandler.NUMBER_OF_ADDED_ARGS];
        for (int i = 0; i < workflow._rollbackHandlerArgs.length; i++) {
            // copy original arguments
            args[i] = workflow._rollbackHandlerArgs[i];
        }
        // append our new arguments, to the original original rollback handler
        args[NestedWorkflowRollbackHandler.indexOfNestedHandler(args)] = workflow._rollbackHandler;
        // append stepId for completion
        args[NestedWorkflowRollbackHandler.indexOfParentStepId(args)] = stepId;
    } else {
        // No nested rollback handler.
        args = new Object[NestedWorkflowRollbackHandler.NUMBER_OF_ADDED_ARGS];
        args[NestedWorkflowRollbackHandler.indexOfNestedHandler(args)] = null;
        args[NestedWorkflowRollbackHandler.indexOfParentStepId(args)] = stepId;
    }
    workflow._rollbackHandler = new NestedWorkflowRollbackHandler();
    workflow._rollbackHandlerArgs = args;
    // Determine if the workflow already attempted a rollback.
    // If so, attempt to restart the rollback's error and cancelled steps.
    boolean rollBackCompleted = determineIfRollbackCompleted(workflow);
    if (rollBackCompleted) {
        _log.info(String.format("Rollback already completed workflow %s", workflow.getWorkflowURI()));
        WorkflowStepCompleter.stepSucceded(stepId);
        return;
    }
    // See if can restart the previous rollback.
    InterProcessLock workflowLock = null;
    try {
        workflowLock = lockWorkflow(workflow);
        boolean rollBackStarted = resumePreviousRollback(workflow);
        if (rollBackStarted) {
            _log.info(String.format("Previous rollback resumed; errored/cancelled rollback steps queued; workflow %s", workflow.getWorkflowURI()));
        } else {
            // Otherwise, attempt to initiate a new rollback.
            if (workflow._rollbackHandler != null) {
                workflow._rollbackHandler.initiatingRollback(workflow, workflow._rollbackHandlerArgs);
            }
            rollBackStarted = initiateRollback(workflow);
            if (rollBackStarted) {
                _log.info(String.format("New rollback initiated workflow %s", workflow.getWorkflowURI()));
            }
        }
        if (rollBackStarted) {
            // Return now, wait until the rollback completions fire the completer.
            persistWorkflow(workflow);
            logWorkflow(workflow, true);
            WorkflowStepCompleter.stepExecuting(stepId);
        } else {
            ServiceCoded coded = WorkflowException.exceptions.workflowRollbackNotInitiated(uri.toString());
            WorkflowStepCompleter.stepFailed(stepId, coded);
        }
    } finally {
        unlockWorkflow(workflow, workflowLock);
    }
}
Also used : StepState(com.emc.storageos.workflow.Workflow.StepState) ServiceCoded(com.emc.storageos.svcs.errorhandling.model.ServiceCoded) DataObject(com.emc.storageos.db.client.model.DataObject) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) URI(java.net.URI) AlternateIdConstraint(com.emc.storageos.db.client.constraint.AlternateIdConstraint)

Example 88 with InterProcessLock

use of org.apache.curator.framework.recipes.locks.InterProcessLock in project coprhd-controller by CoprHD.

the class WorkflowService method resumeWorkflow.

@Override
public void resumeWorkflow(URI uri, String taskId) throws ControllerException {
    Workflow workflow = null;
    InterProcessLock workflowLock = null;
    WorkflowTaskCompleter completer = new WorkflowTaskCompleter(uri, taskId);
    try {
        _log.info(String.format("Resume request workflow: %s", uri));
        workflow = loadWorkflowFromUri(uri);
        if (workflow == null) {
            // Cannot resume non-existent workflow
            throw WorkflowException.exceptions.workflowNotFound(uri.toString());
        }
        WorkflowState state = workflow.getWorkflowState();
        if (state != WorkflowState.SUSPENDED_ERROR && state != WorkflowState.SUSPENDED_NO_ERROR) {
            // Cannot resume a workflow that is not suspended
            _log.info(String.format("Workflow %s state %s is not suspended and will not be resumed", uri, state));
            throw WorkflowException.exceptions.workflowNotSuspended(uri.toString(), state.toString());
        }
        if (workflow._taskCompleter != null) {
            workflow._taskCompleter.statusPending(_dbClient, "Resuming workflow");
        }
        workflowLock = lockWorkflow(workflow);
        Map<String, com.emc.storageos.db.client.model.Workflow> childWFMap = getChildWorkflowsMap(workflow);
        removeRollbackSteps(workflow);
        queueResumeSteps(workflow, childWFMap);
        // Resume the child workflows if applicable.
        for (com.emc.storageos.db.client.model.Workflow child : childWFMap.values()) {
            resumeWorkflow(child.getId(), null);
        }
        completer.ready(_dbClient);
    } catch (WorkflowException ex) {
        completer.error(_dbClient, ex);
    } finally {
        unlockWorkflow(workflow, workflowLock);
    }
}
Also used : InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock)

Example 89 with InterProcessLock

use of org.apache.curator.framework.recipes.locks.InterProcessLock in project coprhd-controller by CoprHD.

the class WorkflowService method lockWorkflow.

/**
 * Locks a Workflow using ZK
 *
 * @param workflow
 * @return true if lock acquired
 * @throws WorkflowException
 */
private InterProcessLock lockWorkflow(Workflow workflow) throws WorkflowException {
    boolean acquired = false;
    InterProcessLock lock = getWorkflowLock(workflow);
    try {
        acquired = lock.acquire(60, TimeUnit.MINUTES);
    } catch (Exception ex) {
        _log.error("Exception locking workflow: " + workflow.getWorkflowURI().toString(), ex);
        throw new WorkflowException("Exception locking workflow: " + workflow.getWorkflowURI().toString(), ex);
    }
    if (acquired == false) {
        _log.error("Unable to acquire workflow lock: " + workflow.getWorkflowURI().toString());
        throw new WorkflowException("Unable to acquire workflow lock: " + workflow.getWorkflowURI().toString());
    }
    return lock;
}
Also used : InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock) InternalException(com.emc.storageos.svcs.errorhandling.resources.InternalException) DatabaseException(com.emc.storageos.db.exceptions.DatabaseException) DeviceControllerException(com.emc.storageos.exceptions.DeviceControllerException) ControllerException(com.emc.storageos.volumecontroller.ControllerException) LockRetryException(com.emc.storageos.locking.LockRetryException)

Example 90 with InterProcessLock

use of org.apache.curator.framework.recipes.locks.InterProcessLock in project coprhd-controller by CoprHD.

the class DrPostFailoverHandler method run.

/**
 * Run the handler. The handler runs only on single node. If it fails, current service should quit and another node takes over to retry
 */
public void run() {
    try {
        SiteState siteState = drUtil.getLocalSite().getState();
        if (!siteState.equals(SiteState.STANDBY_FAILING_OVER)) {
            log.info("Ignore DR post failover handler for site state {}", siteState);
            return;
        }
        log.info("Acquiring lock {}", POST_FAILOVER_HANDLER_LOCK);
        InterProcessLock lock = coordinator.getLock(POST_FAILOVER_HANDLER_LOCK);
        lock.acquire();
        log.info("Acquired lock {}", POST_FAILOVER_HANDLER_LOCK);
        try {
            // check site state again after acquiring lock
            Site site = drUtil.getLocalSite();
            siteState = site.getState();
            if (!siteState.equals(SiteState.STANDBY_FAILING_OVER)) {
                log.info("Ignore DR post failover handler for site state {}", siteState);
                return;
            }
            boolean isExecuted = isCompleted();
            if (!isExecuted) {
                log.info("Start post failover processing {}", name);
                updateStatus(Status.EXECUTING);
                execute();
                updateStatus(Status.COMPLETED);
            } else {
                log.info("Handler {} was completed on other node", name);
            }
            if (isAllHandlersCompleted()) {
                log.info("All handlers successfully completed. Change site state to ACTIVE");
                site.setState(SiteState.ACTIVE);
                coordinator.persistServiceConfiguration(site.toConfiguration());
            }
        } finally {
            lock.release();
            log.info("Released lock {}", POST_FAILOVER_HANDLER_LOCK);
        }
    } catch (Exception e) {
        log.error("Failed to execute DR failover handler", e);
        throw new IllegalStateException(e);
    }
}
Also used : Site(com.emc.storageos.coordinator.client.model.Site) SiteState(com.emc.storageos.coordinator.client.model.SiteState) InterProcessLock(org.apache.curator.framework.recipes.locks.InterProcessLock)

Aggregations

InterProcessLock (org.apache.curator.framework.recipes.locks.InterProcessLock)98 APIException (com.emc.storageos.svcs.errorhandling.resources.APIException)25 DatabaseException (com.emc.storageos.db.exceptions.DatabaseException)21 DeviceControllerException (com.emc.storageos.exceptions.DeviceControllerException)15 IOException (java.io.IOException)15 ControllerException (com.emc.storageos.volumecontroller.ControllerException)14 Configuration (com.emc.storageos.coordinator.common.Configuration)12 CoordinatorException (com.emc.storageos.coordinator.exceptions.CoordinatorException)12 UnknownHostException (java.net.UnknownHostException)12 Site (com.emc.storageos.coordinator.client.model.Site)11 RetryableCoordinatorException (com.emc.storageos.coordinator.exceptions.RetryableCoordinatorException)11 NetworkDeviceControllerException (com.emc.storageos.networkcontroller.exceptions.NetworkDeviceControllerException)10 CheckPermission (com.emc.storageos.security.authorization.CheckPermission)9 ServiceError (com.emc.storageos.svcs.errorhandling.model.ServiceError)9 BiosCommandResult (com.emc.storageos.volumecontroller.impl.BiosCommandResult)9 ArrayList (java.util.ArrayList)9 POST (javax.ws.rs.POST)9 NetworkSystem (com.emc.storageos.db.client.model.NetworkSystem)8 Path (javax.ws.rs.Path)8 ConfigurationImpl (com.emc.storageos.coordinator.common.impl.ConfigurationImpl)6