use of org.apache.oozie.action.control.ControlNodeActionExecutor in project oozie by apache.
the class KillXCommand method execute.
@Override
protected Void execute() throws CommandException {
LOG.info("STARTED WorkflowKillXCommand for jobId=" + wfId);
wfJob.setEndTime(new Date());
if (wfJob.getStatus() != WorkflowJob.Status.FAILED) {
InstrumentUtils.incrJobCounter(getName(), 1, getInstrumentation());
wfJob.setStatus(WorkflowJob.Status.KILLED);
SLAEventBean slaEvent = SLADbXOperations.createStatusEvent(wfJob.getSlaXml(), wfJob.getId(), Status.KILLED, SlaAppType.WORKFLOW_JOB);
if (slaEvent != null) {
insertList.add(slaEvent);
}
try {
wfJob.getWorkflowInstance().kill();
} catch (WorkflowException e) {
throw new CommandException(ErrorCode.E0725, e.getMessage(), e);
}
WorkflowInstance wfInstance = wfJob.getWorkflowInstance();
((LiteWorkflowInstance) wfInstance).setStatus(WorkflowInstance.Status.KILLED);
wfJob.setWorkflowInstance(wfInstance);
}
try {
for (WorkflowActionBean action : actionList) {
if (action.getStatus() == WorkflowActionBean.Status.RUNNING || action.getStatus() == WorkflowActionBean.Status.DONE) {
if (!(actionService.getExecutor(action.getType()) instanceof ControlNodeActionExecutor)) {
action.setPending();
}
action.setStatus(WorkflowActionBean.Status.KILLED);
updateList.add(new UpdateEntry<WorkflowActionQuery>(WorkflowActionQuery.UPDATE_ACTION_STATUS_PENDING, action));
queue(new ActionKillXCommand(action.getId(), action.getType()));
} else if (action.getStatus() == WorkflowActionBean.Status.PREP || action.getStatus() == WorkflowActionBean.Status.START_RETRY || action.getStatus() == WorkflowActionBean.Status.START_MANUAL || action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL || action.getStatus() == WorkflowActionBean.Status.USER_RETRY) {
action.setStatus(WorkflowActionBean.Status.KILLED);
action.resetPending();
SLAEventBean slaEvent = SLADbXOperations.createStatusEvent(action.getSlaXml(), action.getId(), Status.KILLED, SlaAppType.WORKFLOW_ACTION);
if (slaEvent != null) {
insertList.add(slaEvent);
}
updateList.add(new UpdateEntry<WorkflowActionQuery>(WorkflowActionQuery.UPDATE_ACTION_STATUS_PENDING, action));
if (EventHandlerService.isEnabled() && !(actionService.getExecutor(action.getType()) instanceof ControlNodeActionExecutor)) {
generateEvent(action, wfJob.getUser());
}
}
}
wfJob.setLastModifiedTime(new Date());
updateList.add(new UpdateEntry<WorkflowJobQuery>(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MOD_END, wfJob));
BatchQueryExecutor.getInstance().executeBatchInsertUpdateDelete(insertList, updateList, null);
if (EventHandlerService.isEnabled()) {
generateEvent(wfJob);
}
queue(new WorkflowNotificationXCommand(wfJob));
} catch (JPAExecutorException e) {
throw new CommandException(e);
} finally {
if (wfJob.getStatus() == WorkflowJob.Status.KILLED) {
// To delete the WF temp dir
new WfEndXCommand(wfJob).call();
}
updateParentIfNecessary(wfJob);
}
LOG.info("ENDED WorkflowKillXCommand for jobId=" + wfId);
return null;
}
use of org.apache.oozie.action.control.ControlNodeActionExecutor in project oozie by apache.
the class ActionEndXCommand method execute.
@Override
protected Void execute() throws CommandException {
LOG.debug("STARTED ActionEndXCommand for action " + actionId);
Configuration conf = wfJob.getWorkflowInstance().getConf();
int maxRetries = 0;
long retryInterval = 0;
if (!(executor instanceof ControlNodeActionExecutor)) {
maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries());
retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval());
}
executor.setMaxRetries(maxRetries);
executor.setRetryInterval(retryInterval);
boolean isRetry = false;
if (wfAction.getStatus() == WorkflowActionBean.Status.END_RETRY || wfAction.getStatus() == WorkflowActionBean.Status.END_MANUAL) {
isRetry = true;
}
boolean isUserRetry = false;
ActionExecutorContext context = new ActionXCommand.ActionExecutorContext(wfJob, wfAction, isRetry, isUserRetry);
try {
LOG.debug("End, name [{0}] type [{1}] status[{2}] external status [{3}] signal value [{4}]", wfAction.getName(), wfAction.getType(), wfAction.getStatus(), wfAction.getExternalStatus(), wfAction.getSignalValue());
Instrumentation.Cron cron = new Instrumentation.Cron();
cron.start();
executor.end(context, wfAction);
cron.stop();
addActionCron(wfAction.getType(), cron);
incrActionCounter(wfAction.getType(), 1);
if (!context.isEnded()) {
LOG.warn(XLog.OPS, "Action Ended, ActionExecutor [{0}] must call setEndData()", executor.getType());
wfAction.setErrorInfo(END_DATA_MISSING, "Execution Ended, but End Data Missing from Action");
failJob(context);
} else {
wfAction.setRetries(0);
wfAction.setEndTime(new Date());
boolean shouldHandleUserRetry = false;
Status slaStatus = null;
switch(wfAction.getStatus()) {
case OK:
slaStatus = Status.SUCCEEDED;
break;
case KILLED:
slaStatus = Status.KILLED;
break;
case FAILED:
slaStatus = Status.FAILED;
shouldHandleUserRetry = true;
break;
case ERROR:
LOG.info("ERROR is considered as FAILED for SLA");
slaStatus = Status.KILLED;
shouldHandleUserRetry = true;
break;
default:
slaStatus = Status.FAILED;
shouldHandleUserRetry = true;
break;
}
if (!shouldHandleUserRetry || !handleUserRetry(context, wfAction)) {
SLAEventBean slaEvent = SLADbXOperations.createStatusEvent(wfAction.getSlaXml(), wfAction.getId(), slaStatus, SlaAppType.WORKFLOW_ACTION);
if (slaEvent != null) {
insertList.add(slaEvent);
}
}
}
WorkflowInstance wfInstance = wfJob.getWorkflowInstance();
DagELFunctions.setActionInfo(wfInstance, wfAction);
wfJob.setWorkflowInstance(wfInstance);
updateList.add(new UpdateEntry<WorkflowActionQuery>(WorkflowActionQuery.UPDATE_ACTION_END, wfAction));
wfJob.setLastModifiedTime(new Date());
updateList.add(new UpdateEntry<WorkflowJobQuery>(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MODIFIED, wfJob));
} catch (ActionExecutorException ex) {
LOG.warn("Error ending action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]", wfAction.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage());
wfAction.setErrorInfo(ex.getErrorCode(), ex.getMessage());
wfAction.setEndTime(null);
switch(ex.getErrorType()) {
case TRANSIENT:
if (!handleTransient(context, executor, WorkflowAction.Status.END_RETRY)) {
handleNonTransient(context, executor, WorkflowAction.Status.END_MANUAL);
wfAction.setPendingAge(new Date());
wfAction.setRetries(0);
}
wfAction.setEndTime(null);
break;
case NON_TRANSIENT:
handleNonTransient(context, executor, WorkflowAction.Status.END_MANUAL);
wfAction.setEndTime(null);
break;
case ERROR:
handleError(context, executor, COULD_NOT_END, false, WorkflowAction.Status.ERROR);
break;
case FAILED:
failJob(context);
break;
}
WorkflowInstance wfInstance = wfJob.getWorkflowInstance();
DagELFunctions.setActionInfo(wfInstance, wfAction);
wfJob.setWorkflowInstance(wfInstance);
updateList.add(new UpdateEntry<WorkflowActionQuery>(WorkflowActionQuery.UPDATE_ACTION_END, wfAction));
wfJob.setLastModifiedTime(new Date());
updateList.add(new UpdateEntry<WorkflowJobQuery>(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_INSTANCE_MODIFIED, wfJob));
} finally {
try {
BatchQueryExecutor.getInstance().executeBatchInsertUpdateDelete(insertList, updateList, null);
} catch (JPAExecutorException e) {
throw new CommandException(e);
}
if (!(executor instanceof ControlNodeActionExecutor) && EventHandlerService.isEnabled()) {
generateEvent(wfAction, wfJob.getUser());
}
new SignalXCommand(jobId, actionId).call();
}
LOG.debug("ENDED ActionEndXCommand for action " + actionId);
return null;
}
use of org.apache.oozie.action.control.ControlNodeActionExecutor in project oozie by apache.
the class ActionKillXCommand method execute.
@Override
protected Void execute() throws CommandException {
LOG.debug("STARTED WorkflowActionKillXCommand for action " + actionId);
if (wfAction.isPending()) {
ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(wfAction.getType());
if (executor != null) {
ActionExecutorContext context = null;
try {
boolean isRetry = false;
boolean isUserRetry = false;
context = new ActionXCommand.ActionExecutorContext(wfJob, wfAction, isRetry, isUserRetry);
incrActionCounter(wfAction.getType(), 1);
Instrumentation.Cron cron = new Instrumentation.Cron();
cron.start();
executor.kill(context, wfAction);
cron.stop();
addActionCron(wfAction.getType(), cron);
wfAction.resetPending();
wfAction.setStatus(WorkflowActionBean.Status.KILLED);
wfAction.setEndTime(new Date());
updateList.add(new UpdateEntry<WorkflowActionQuery>(WorkflowActionQuery.UPDATE_ACTION_END, wfAction));
wfJob.setLastModifiedTime(new Date());
updateList.add(new UpdateEntry<WorkflowJobQuery>(WorkflowJobQuery.UPDATE_WORKFLOW_MODTIME, wfJob));
// Add SLA status event (KILLED) for WF_ACTION
SLAEventBean slaEvent = SLADbXOperations.createStatusEvent(wfAction.getSlaXml(), wfAction.getId(), Status.KILLED, SlaAppType.WORKFLOW_ACTION);
if (slaEvent != null) {
insertList.add(slaEvent);
}
queue(new WorkflowNotificationXCommand(wfJob, wfAction));
} catch (ActionExecutorException ex) {
wfAction.resetPending();
wfAction.setStatus(WorkflowActionBean.Status.FAILED);
wfAction.setErrorInfo(ex.getErrorCode().toString(), "KILL COMMAND FAILED - exception while executing job kill");
wfAction.setEndTime(new Date());
wfJob.setStatus(WorkflowJobBean.Status.KILLED);
updateList.add(new UpdateEntry<WorkflowActionQuery>(WorkflowActionQuery.UPDATE_ACTION_END, wfAction));
wfJob.setLastModifiedTime(new Date());
updateList.add(new UpdateEntry<WorkflowJobQuery>(WorkflowJobQuery.UPDATE_WORKFLOW_STATUS_MODTIME, wfJob));
// What will happen to WF and COORD_ACTION, NOTIFICATION?
SLAEventBean slaEvent = SLADbXOperations.createStatusEvent(wfAction.getSlaXml(), wfAction.getId(), Status.FAILED, SlaAppType.WORKFLOW_ACTION);
if (slaEvent != null) {
insertList.add(slaEvent);
}
LOG.warn("Exception while executing kill(). Error Code [{0}], Message[{1}]", ex.getErrorCode(), ex.getMessage(), ex);
} finally {
try {
cleanupActionDir(context);
BatchQueryExecutor.getInstance().executeBatchInsertUpdateDelete(insertList, updateList, null);
if (!(executor instanceof ControlNodeActionExecutor) && EventHandlerService.isEnabled()) {
generateEvent(wfAction, wfJob.getUser());
}
} catch (JPAExecutorException e) {
throw new CommandException(e);
}
}
}
}
LOG.debug("ENDED WorkflowActionKillXCommand for action " + actionId);
return null;
}
use of org.apache.oozie.action.control.ControlNodeActionExecutor in project oozie by apache.
the class ActionStartXCommand method execute.
@Override
protected ActionExecutorContext execute() throws CommandException {
LOG.debug("STARTED ActionStartXCommand for wf actionId=" + actionId);
Configuration conf = wfJob.getWorkflowInstance().getConf();
int maxRetries = 0;
long retryInterval = 0;
boolean execSynchronous = false;
if (!(executor instanceof ControlNodeActionExecutor)) {
maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries());
retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval());
}
executor.setMaxRetries(maxRetries);
executor.setRetryInterval(retryInterval);
try {
boolean isRetry = false;
if (wfAction.getStatus() == WorkflowActionBean.Status.START_RETRY || wfAction.getStatus() == WorkflowActionBean.Status.START_MANUAL) {
isRetry = true;
prepareForRetry(wfAction);
}
boolean isUserRetry = false;
if (wfAction.getStatus() == WorkflowActionBean.Status.USER_RETRY) {
isUserRetry = true;
prepareForRetry(wfAction);
}
context = getContext(isRetry, isUserRetry);
boolean caught = false;
try {
if (!(executor instanceof ControlNodeActionExecutor)) {
String tmpActionConf = XmlUtils.removeComments(wfAction.getConf());
String actionConf = context.getELEvaluator().evaluate(tmpActionConf, String.class);
wfAction.setConf(actionConf);
LOG.debug("Start, name [{0}] type [{1}] configuration{E}{E}{2}{E}", wfAction.getName(), wfAction.getType(), actionConf);
}
} catch (ELEvaluationException ex) {
caught = true;
throw new ActionExecutorException(ActionExecutorException.ErrorType.TRANSIENT, EL_EVAL_ERROR, ex.getMessage(), ex);
} catch (ELException ex) {
caught = true;
context.setErrorInfo(EL_ERROR, ex.getMessage());
LOG.warn("ELException in ActionStartXCommand ", ex.getMessage(), ex);
handleError(context, wfJob, wfAction);
} catch (org.jdom.JDOMException je) {
caught = true;
context.setErrorInfo("ParsingError", je.getMessage());
LOG.warn("JDOMException in ActionStartXCommand ", je.getMessage(), je);
handleError(context, wfJob, wfAction);
} catch (Exception ex) {
caught = true;
context.setErrorInfo(EL_ERROR, ex.getMessage());
LOG.warn("Exception in ActionStartXCommand ", ex.getMessage(), ex);
handleError(context, wfJob, wfAction);
}
if (!caught) {
wfAction.setErrorInfo(null, null);
incrActionCounter(wfAction.getType(), 1);
LOG.info("Start action [{0}] with user-retry state : userRetryCount [{1}], userRetryMax [{2}], userRetryInterval" + " [{3}]", wfAction.getId(), wfAction.getUserRetryCount(), wfAction.getUserRetryMax(), wfAction.getUserRetryInterval());
Instrumentation.Cron cron = new Instrumentation.Cron();
cron.start();
// do not override starttime for retries
if (wfAction.getStartTime() == null) {
context.setStartTime();
}
context.setVar(JobUtils.getRetryKey(wfAction, JsonTags.WORKFLOW_ACTION_START_TIME), String.valueOf(new Date().getTime()));
executor.start(context, wfAction);
cron.stop();
FaultInjection.activate("org.apache.oozie.command.SkipCommitFaultInjection");
addActionCron(wfAction.getType(), cron);
wfAction.setRetries(0);
if (wfAction.isExecutionComplete()) {
if (!context.isExecuted()) {
LOG.warn(XLog.OPS, "Action Completed, ActionExecutor [{0}] must call setExecutionData()", executor.getType());
wfAction.setErrorInfo(EXEC_DATA_MISSING, "Execution Complete, but Execution Data Missing from Action");
failJob(context);
} else {
wfAction.setPending();
if (!(executor instanceof ControlNodeActionExecutor)) {
queue(new ActionEndXCommand(wfAction.getId(), wfAction.getType()));
} else {
execSynchronous = true;
}
}
} else {
if (!context.isStarted()) {
LOG.warn(XLog.OPS, "Action Started, ActionExecutor [{0}] must call setStartData()", executor.getType());
wfAction.setErrorInfo(START_DATA_MISSING, "Execution Started, but Start Data Missing from Action");
failJob(context);
} else {
queue(new WorkflowNotificationXCommand(wfJob, wfAction));
}
}
LOG.info(XLog.STD, "[***" + wfAction.getId() + "***]" + "Action status=" + wfAction.getStatusStr());
updateList.add(new UpdateEntry<WorkflowActionQuery>(WorkflowActionQuery.UPDATE_ACTION_START, wfAction));
updateJobLastModified();
// Add SLA status event (STARTED) for WF_ACTION
SLAEventBean slaEvent = SLADbXOperations.createStatusEvent(wfAction.getSlaXml(), wfAction.getId(), Status.STARTED, SlaAppType.WORKFLOW_ACTION);
if (slaEvent != null) {
insertList.add(slaEvent);
}
LOG.info(XLog.STD, "[***" + wfAction.getId() + "***]" + "Action updated in DB!");
}
} catch (ActionExecutorException ex) {
LOG.warn("Error starting action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]", wfAction.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage(), ex);
wfAction.setErrorInfo(ex.getErrorCode(), ex.getMessage());
switch(ex.getErrorType()) {
case TRANSIENT:
if (!handleTransient(context, executor, WorkflowAction.Status.START_RETRY)) {
handleNonTransient(context, executor, WorkflowAction.Status.START_MANUAL);
wfAction.setPendingAge(new Date());
wfAction.setRetries(0);
wfAction.setStartTime(null);
}
break;
case NON_TRANSIENT:
handleNonTransient(context, executor, WorkflowAction.Status.START_MANUAL);
break;
case ERROR:
handleError(context, executor, WorkflowAction.Status.ERROR.toString(), true, WorkflowAction.Status.DONE);
break;
case FAILED:
try {
failJob(context);
endWF();
SLAEventBean slaEvent1 = SLADbXOperations.createStatusEvent(wfAction.getSlaXml(), wfAction.getId(), Status.FAILED, SlaAppType.WORKFLOW_ACTION);
if (slaEvent1 != null) {
insertList.add(slaEvent1);
}
} catch (XException x) {
LOG.warn("ActionStartXCommand - case:FAILED ", x.getMessage());
}
break;
}
updateList.add(new UpdateEntry<WorkflowActionQuery>(WorkflowActionQuery.UPDATE_ACTION_START, wfAction));
updateJobLastModified();
} finally {
try {
BatchQueryExecutor.getInstance().executeBatchInsertUpdateDelete(insertList, updateList, null);
if (!(executor instanceof ControlNodeActionExecutor) && EventHandlerService.isEnabled()) {
generateEvent(wfAction, wfJob.getUser());
}
if (execSynchronous) {
// Changing to synchronous call from asynchronous queuing to prevent
// undue delay from ::start:: to action due to queuing
callActionEnd();
}
} catch (JPAExecutorException e) {
throw new CommandException(e);
}
}
LOG.debug("ENDED ActionStartXCommand for wf actionId=" + actionId + ", jobId=" + jobId);
return null;
}
Aggregations