use of io.cdap.cdap.api.workflow.WorkflowActionNode in project cdap by cdapio.
the class WorkflowDriver method executeNode.
private void executeNode(ApplicationSpecification appSpec, WorkflowNode node, InstantiatorFactory instantiator, ClassLoader classLoader, WorkflowToken token) throws Exception {
WorkflowNodeType nodeType = node.getType();
((BasicWorkflowToken) token).setCurrentNode(node.getNodeId());
switch(nodeType) {
case ACTION:
WorkflowActionNode actionNode = (WorkflowActionNode) node;
if (SchedulableProgramType.CUSTOM_ACTION == actionNode.getProgram().getProgramType()) {
executeCustomAction(actionNode, instantiator, classLoader, token);
} else {
executeAction(actionNode, token);
}
break;
case FORK:
executeFork(appSpec, (WorkflowForkNode) node, instantiator, classLoader, token);
break;
case CONDITION:
executeCondition(appSpec, (WorkflowConditionNode) node, instantiator, classLoader, token);
break;
default:
break;
}
}
use of io.cdap.cdap.api.workflow.WorkflowActionNode in project cdap by caskdata.
the class ApplicationVerificationStage method verifyWorkflowAction.
private void verifyWorkflowAction(ApplicationSpecification appSpec, WorkflowNode node) {
WorkflowActionNode actionNode = (WorkflowActionNode) node;
ScheduleProgramInfo program = actionNode.getProgram();
switch(program.getProgramType()) {
case MAPREDUCE:
Preconditions.checkArgument(appSpec.getMapReduce().containsKey(program.getProgramName()), String.format("MapReduce program '%s' is not configured with the Application.", program.getProgramName()));
break;
case SPARK:
Preconditions.checkArgument(appSpec.getSpark().containsKey(program.getProgramName()), String.format("Spark program '%s' is not configured with the Application.", program.getProgramName()));
break;
case CUSTOM_ACTION:
// no-op
break;
default:
throw new RuntimeException(String.format("Unknown Program '%s' in the Workflow.", program.getProgramName()));
}
}
use of io.cdap.cdap.api.workflow.WorkflowActionNode in project cdap by caskdata.
the class LineageAdmin method extractAndAddInnerPrograms.
/**
* Extract inner programs and runs from the workflow run record, the run record's properties have all the
* inner program run ids. The workflow spec can then be used to determine what the inner programs are and
* create the program run ids for them
*/
private void extractAndAddInnerPrograms(Set<ProgramId> toVisitPrograms, Map<ProgramRunId, ProgramRunId> programWorkflowMap, Map<ApplicationId, ApplicationSpecification> appSpecs, ProgramRunId programRunId, RunRecordDetail wfRunRecord) {
ApplicationId appId = programRunId.getParent().getParent();
WorkflowSpecification workflowSpec = appSpecs.get(appId).getWorkflows().get(programRunId.getProgram());
Map<String, WorkflowNode> nodeIdMap = workflowSpec.getNodeIdMap();
wfRunRecord.getProperties().forEach((key, value) -> {
if (nodeIdMap.containsKey(key)) {
WorkflowActionNode node = (WorkflowActionNode) nodeIdMap.get(key);
ProgramType type = ProgramType.valueOf(node.getProgram().getProgramType().name());
ProgramId program = appId.program(type, key);
programWorkflowMap.put(program.run(value), programRunId);
toVisitPrograms.add(program);
}
});
}
use of io.cdap.cdap.api.workflow.WorkflowActionNode in project cdap by caskdata.
the class WorkflowNodeCreator method createWorkflowCustomActionNode.
static WorkflowNode createWorkflowCustomActionNode(CustomAction action, Id.Namespace deployNamespace, Id.Artifact artifactId, PluginFinder pluginFinder, PluginInstantiator pluginInstantiator, @Nullable AppDeploymentRuntimeInfo runtimeInfo, FeatureFlagsProvider featureFlagsProvider) {
Preconditions.checkArgument(action != null, "CustomAction is null.");
CustomActionSpecification spec = DefaultCustomActionConfigurer.configureAction(action, deployNamespace, artifactId, pluginFinder, pluginInstantiator, runtimeInfo, featureFlagsProvider);
return new WorkflowActionNode(spec.getName(), spec);
}
use of io.cdap.cdap.api.workflow.WorkflowActionNode in project cdap by caskdata.
the class LineageAdminTest method testWorkflowLineage.
@Test
public void testWorkflowLineage() {
TransactionRunner transactionRunner = getInjector().getInstance(TransactionRunner.class);
LineageStoreReader lineageReader = new DefaultLineageStoreReader(transactionRunner);
LineageWriter lineageWriter = new BasicLineageWriter(transactionRunner);
ApplicationId testApp = NamespaceId.DEFAULT.app("testApp");
ProgramId workflowId = testApp.workflow("wf1");
// if the spark and mr job are inner jobs of workflow, they should be in the same app
ProgramId mrId = testApp.mr("mr1");
ProgramId sparkId = testApp.mr("spark1");
ImmutableList<WorkflowNode> nodes = ImmutableList.of(new WorkflowActionNode("mr1", new ScheduleProgramInfo(SchedulableProgramType.MAPREDUCE, "mr1")), new WorkflowActionNode("spark1", new ScheduleProgramInfo(SchedulableProgramType.SPARK, "spark1")));
WorkflowSpecification wfSpec = new WorkflowSpecification("test", "wf1", "", Collections.emptyMap(), nodes, Collections.emptyMap(), Collections.emptyMap());
ApplicationSpecification appSpec = new DefaultApplicationSpecification("testApp", ProjectInfo.getVersion().toString(), "dummy app", null, NamespaceId.DEFAULT.artifact("testArtifact", "1.0").toApiArtifactId(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), ImmutableMap.of(workflowId.getProgram(), wfSpec), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
Store store = getInjector().getInstance(Store.class);
store.addApplication(testApp, appSpec);
LineageAdmin lineageAdmin = new LineageAdmin(lineageReader, store);
// Add accesses for D3 -> P2 -> D2 -> P1 -> D1 <-> P3
// |
// |-> P5,
// P1 and P2 are inner programs of the workflow
// We need to use current time here as metadata store stores access time using current time
ProgramRunId run1 = mrId.run(RunIds.generate(System.currentTimeMillis()).getId());
ProgramRunId run2 = sparkId.run(RunIds.generate(System.currentTimeMillis()).getId());
ProgramRunId run3 = program3.run(RunIds.generate(System.currentTimeMillis()).getId());
ProgramRunId workflow = workflowId.run(RunIds.generate(System.currentTimeMillis()).getId());
ProgramRunId run5 = program5.run(RunIds.generate(System.currentTimeMillis()).getId());
addRuns(store, workflow);
// only mr and spark can be inner programs
addWorkflowRuns(store, workflow.getProgram(), workflow.getRun(), run1, run2);
addRuns(store, run3);
addRuns(store, run5);
// It is okay to use current time here since access time is ignore during assertions
lineageWriter.addAccess(run1, dataset1, AccessType.WRITE);
lineageWriter.addAccess(run1, dataset2, AccessType.READ);
lineageWriter.addAccess(run2, dataset2, AccessType.WRITE);
lineageWriter.addAccess(run2, dataset3, AccessType.READ);
lineageWriter.addAccess(run3, dataset1, AccessType.UNKNOWN, null);
lineageWriter.addAccess(run5, dataset1, AccessType.READ, null);
// The UNKNOWN access type will get filtered out if there is READ/WRITE. It will be preserved if it is the
// only access type
Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, workflowId, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset2, workflowId, AccessType.READ, twillRunId(workflow)), new Relation(dataset2, workflowId, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset3, workflowId, AccessType.READ, twillRunId(workflow)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5))));
Lineage resultLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100, "workflow");
// Lineage for D1
Assert.assertEquals(expectedLineage, resultLineage);
resultLineage = lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100, "workflow");
// Lineage for D2
Assert.assertEquals(expectedLineage, resultLineage);
// Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1, "workflow");
Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, workflowId, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset2, workflowId, AccessType.READ, twillRunId(workflow)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))), oneLevelLineage.getRelations());
// Run tests without workflow parameter
expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, mrId, AccessType.WRITE, twillRunId(run1)), new Relation(dataset2, mrId, AccessType.READ, twillRunId(run1)), new Relation(dataset2, sparkId, AccessType.WRITE, twillRunId(run2)), new Relation(dataset3, sparkId, AccessType.READ, twillRunId(run2)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5))));
resultLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100, null);
// Lineage for D1
Assert.assertEquals(expectedLineage, resultLineage);
resultLineage = lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100, null);
// Lineage for D2
Assert.assertEquals(expectedLineage, resultLineage);
// Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1, null);
Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, mrId, AccessType.WRITE, twillRunId(run1)), new Relation(dataset2, mrId, AccessType.READ, twillRunId(run1)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))), oneLevelLineage.getRelations());
// Assert that in a different namespace both lineage and metadata should be empty
NamespaceId customNamespace = new NamespaceId("custom_namespace");
DatasetId customDataset1 = customNamespace.dataset(dataset1.getEntityName());
Assert.assertEquals(new Lineage(ImmutableSet.of()), lineageAdmin.computeLineage(customDataset1, 500, System.currentTimeMillis() + 10000, 100));
}
Aggregations