Search in sources :

Example 6 with WorkflowNode

use of io.cdap.cdap.api.workflow.WorkflowNode in project cdap by caskdata.

the class DefaultWorkflowConfigurer method createConditionNodeWithId.

private WorkflowNode createConditionNodeWithId(WorkflowNode node) {
    WorkflowConditionNode conditionNode = (WorkflowConditionNode) node;
    List<WorkflowNode> ifbranch = Lists.newArrayList();
    List<WorkflowNode> elsebranch = Lists.newArrayList();
    ifbranch.addAll(createNodesWithId(conditionNode.getIfBranch()));
    elsebranch.addAll(createNodesWithId(conditionNode.getElseBranch()));
    ConditionSpecification spec = conditionNode.getConditionSpecification();
    return new WorkflowConditionNode(spec.getName(), spec, ifbranch, elsebranch);
}
Also used : WorkflowNode(io.cdap.cdap.api.workflow.WorkflowNode) WorkflowConditionNode(io.cdap.cdap.api.workflow.WorkflowConditionNode) DefaultConditionSpecification(io.cdap.cdap.internal.workflow.condition.DefaultConditionSpecification) ConditionSpecification(io.cdap.cdap.api.workflow.ConditionSpecification)

Example 7 with WorkflowNode

use of io.cdap.cdap.api.workflow.WorkflowNode in project cdap by caskdata.

the class ApplicationVerificationStage method verifyWorkflowNodeList.

private void verifyWorkflowNodeList(ApplicationSpecification appSpec, WorkflowSpecification workflowSpec, List<WorkflowNode> nodeList, Set<String> existingNodeNames) {
    for (WorkflowNode n : nodeList) {
        if (existingNodeNames.contains(n.getNodeId())) {
            throw new RuntimeException(String.format("Node '%s' already exists in workflow '%s'.", n.getNodeId(), workflowSpec.getName()));
        }
        existingNodeNames.add(n.getNodeId());
        verifyWorkflowNode(appSpec, workflowSpec, n, existingNodeNames);
    }
}
Also used : WorkflowNode(io.cdap.cdap.api.workflow.WorkflowNode)

Example 8 with WorkflowNode

use of io.cdap.cdap.api.workflow.WorkflowNode in project cdap by caskdata.

the class LineageAdminTest method testWorkflowLineage.

@Test
public void testWorkflowLineage() {
    TransactionRunner transactionRunner = getInjector().getInstance(TransactionRunner.class);
    LineageStoreReader lineageReader = new DefaultLineageStoreReader(transactionRunner);
    LineageWriter lineageWriter = new BasicLineageWriter(transactionRunner);
    ApplicationId testApp = NamespaceId.DEFAULT.app("testApp");
    ProgramId workflowId = testApp.workflow("wf1");
    // if the spark and mr job are inner jobs of workflow, they should be in the same app
    ProgramId mrId = testApp.mr("mr1");
    ProgramId sparkId = testApp.mr("spark1");
    ImmutableList<WorkflowNode> nodes = ImmutableList.of(new WorkflowActionNode("mr1", new ScheduleProgramInfo(SchedulableProgramType.MAPREDUCE, "mr1")), new WorkflowActionNode("spark1", new ScheduleProgramInfo(SchedulableProgramType.SPARK, "spark1")));
    WorkflowSpecification wfSpec = new WorkflowSpecification("test", "wf1", "", Collections.emptyMap(), nodes, Collections.emptyMap(), Collections.emptyMap());
    ApplicationSpecification appSpec = new DefaultApplicationSpecification("testApp", ProjectInfo.getVersion().toString(), "dummy app", null, NamespaceId.DEFAULT.artifact("testArtifact", "1.0").toApiArtifactId(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), ImmutableMap.of(workflowId.getProgram(), wfSpec), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
    Store store = getInjector().getInstance(Store.class);
    store.addApplication(testApp, appSpec);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageReader, store);
    // Add accesses for D3 -> P2 -> D2 -> P1 -> D1 <-> P3
    // |
    // |-> P5,
    // P1 and P2 are inner programs of the workflow
    // We need to use current time here as metadata store stores access time using current time
    ProgramRunId run1 = mrId.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run2 = sparkId.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run3 = program3.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId workflow = workflowId.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run5 = program5.run(RunIds.generate(System.currentTimeMillis()).getId());
    addRuns(store, workflow);
    // only mr and spark can be inner programs
    addWorkflowRuns(store, workflow.getProgram(), workflow.getRun(), run1, run2);
    addRuns(store, run3);
    addRuns(store, run5);
    // It is okay to use current time here since access time is ignore during assertions
    lineageWriter.addAccess(run1, dataset1, AccessType.WRITE);
    lineageWriter.addAccess(run1, dataset2, AccessType.READ);
    lineageWriter.addAccess(run2, dataset2, AccessType.WRITE);
    lineageWriter.addAccess(run2, dataset3, AccessType.READ);
    lineageWriter.addAccess(run3, dataset1, AccessType.UNKNOWN, null);
    lineageWriter.addAccess(run5, dataset1, AccessType.READ, null);
    // The UNKNOWN access type will get filtered out if there is READ/WRITE. It will be preserved if it is the
    // only access type
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, workflowId, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset2, workflowId, AccessType.READ, twillRunId(workflow)), new Relation(dataset2, workflowId, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset3, workflowId, AccessType.READ, twillRunId(workflow)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5))));
    Lineage resultLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100, "workflow");
    // Lineage for D1
    Assert.assertEquals(expectedLineage, resultLineage);
    resultLineage = lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100, "workflow");
    // Lineage for D2
    Assert.assertEquals(expectedLineage, resultLineage);
    // Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
    Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1, "workflow");
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, workflowId, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset2, workflowId, AccessType.READ, twillRunId(workflow)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))), oneLevelLineage.getRelations());
    // Run tests without workflow parameter
    expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, mrId, AccessType.WRITE, twillRunId(run1)), new Relation(dataset2, mrId, AccessType.READ, twillRunId(run1)), new Relation(dataset2, sparkId, AccessType.WRITE, twillRunId(run2)), new Relation(dataset3, sparkId, AccessType.READ, twillRunId(run2)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5))));
    resultLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100, null);
    // Lineage for D1
    Assert.assertEquals(expectedLineage, resultLineage);
    resultLineage = lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100, null);
    // Lineage for D2
    Assert.assertEquals(expectedLineage, resultLineage);
    // Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
    oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1, null);
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, mrId, AccessType.WRITE, twillRunId(run1)), new Relation(dataset2, mrId, AccessType.READ, twillRunId(run1)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))), oneLevelLineage.getRelations());
    // Assert that in a different namespace both lineage and metadata should be empty
    NamespaceId customNamespace = new NamespaceId("custom_namespace");
    DatasetId customDataset1 = customNamespace.dataset(dataset1.getEntityName());
    Assert.assertEquals(new Lineage(ImmutableSet.of()), lineageAdmin.computeLineage(customDataset1, 500, System.currentTimeMillis() + 10000, 100));
}
Also used : DefaultApplicationSpecification(io.cdap.cdap.internal.app.DefaultApplicationSpecification) ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) WorkflowActionNode(io.cdap.cdap.api.workflow.WorkflowActionNode) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) DefaultLineageStoreReader(io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader) Store(io.cdap.cdap.app.store.Store) ProgramId(io.cdap.cdap.proto.id.ProgramId) WorkflowNode(io.cdap.cdap.api.workflow.WorkflowNode) DatasetId(io.cdap.cdap.proto.id.DatasetId) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) BasicLineageWriter(io.cdap.cdap.data2.metadata.writer.BasicLineageWriter) LineageWriter(io.cdap.cdap.data2.metadata.writer.LineageWriter) TransactionRunner(io.cdap.cdap.spi.data.transaction.TransactionRunner) DefaultLineageStoreReader(io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader) LineageStoreReader(io.cdap.cdap.data2.metadata.lineage.LineageStoreReader) WorkflowSpecification(io.cdap.cdap.api.workflow.WorkflowSpecification) DefaultApplicationSpecification(io.cdap.cdap.internal.app.DefaultApplicationSpecification) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) ScheduleProgramInfo(io.cdap.cdap.api.workflow.ScheduleProgramInfo) BasicLineageWriter(io.cdap.cdap.data2.metadata.writer.BasicLineageWriter) Test(org.junit.Test)

Example 9 with WorkflowNode

use of io.cdap.cdap.api.workflow.WorkflowNode in project cdap by caskdata.

the class WorkflowDriver method executeAll.

private void executeAll(Iterator<WorkflowNode> iterator, ApplicationSpecification appSpec, InstantiatorFactory instantiator, ClassLoader classLoader, WorkflowToken token) throws Exception {
    while (iterator.hasNext() && runningThread != null) {
        try {
            blockIfSuspended();
            WorkflowNode node = iterator.next();
            executeNode(appSpec, node, instantiator, classLoader, token);
        } catch (Throwable t) {
            Throwable rootCause = Throwables.getRootCause(t);
            if (rootCause instanceof InterruptedException) {
                LOG.debug("Workflow '{}' with run id '{}' aborted", workflowSpec.getName(), workflowRunId.getRun());
                workflowContext.setState(new ProgramState(ProgramStatus.KILLED, rootCause.getMessage()));
                break;
            }
            workflowContext.setState(new ProgramState(ProgramStatus.FAILED, Exceptions.condenseThrowableMessage(t)));
            throw t;
        }
    }
}
Also used : BasicThrowable(io.cdap.cdap.proto.BasicThrowable) ProgramState(io.cdap.cdap.api.ProgramState) WorkflowNode(io.cdap.cdap.api.workflow.WorkflowNode)

Example 10 with WorkflowNode

use of io.cdap.cdap.api.workflow.WorkflowNode in project cdap by caskdata.

the class WorkflowDriver method executeFork.

private void executeFork(final ApplicationSpecification appSpec, WorkflowForkNode fork, final InstantiatorFactory instantiator, final ClassLoader classLoader, final WorkflowToken token) throws Exception {
    CountDownLatch executorTerminateLatch = new CountDownLatch(1);
    ExecutorService executorService = createExecutor(fork.getBranches().size(), executorTerminateLatch, "fork-" + fork.getNodeId() + "-%d");
    CompletionService<Map.Entry<String, WorkflowToken>> completionService = new ExecutorCompletionService<>(executorService);
    try {
        for (final List<WorkflowNode> branch : fork.getBranches()) {
            completionService.submit(new Callable<Map.Entry<String, WorkflowToken>>() {

                @Override
                public Map.Entry<String, WorkflowToken> call() throws Exception {
                    WorkflowToken copiedToken = ((BasicWorkflowToken) token).deepCopy();
                    executeAll(branch.iterator(), appSpec, instantiator, classLoader, copiedToken);
                    return Maps.immutableEntry(branch.toString(), copiedToken);
                }
            });
        }
        for (int i = 0; i < fork.getBranches().size(); i++) {
            try {
                Future<Map.Entry<String, WorkflowToken>> forkBranchResult = completionService.take();
                Map.Entry<String, WorkflowToken> retValue = forkBranchResult.get();
                String branchInfo = retValue.getKey();
                WorkflowToken branchToken = retValue.getValue();
                ((BasicWorkflowToken) token).mergeToken(branchToken);
                LOG.trace("Execution of branch {} for fork {} completed.", branchInfo, fork);
            } catch (InterruptedException e) {
                // Due to workflow abortion, so just break the loop
                break;
            } catch (ExecutionException e) {
                // Unwrap the cause
                Throwables.propagateIfPossible(e.getCause(), Exception.class);
                throw Throwables.propagate(e.getCause());
            }
        }
    } finally {
        // Update the WorkflowToken after the execution of the FORK node completes.
        workflowStateWriter.setWorkflowToken(workflowRunId, token);
        executorService.shutdownNow();
        // Wait for the executor termination
        executorTerminateLatch.await();
    }
}
Also used : WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) ExecutorCompletionService(java.util.concurrent.ExecutorCompletionService) CountDownLatch(java.util.concurrent.CountDownLatch) WorkflowNode(io.cdap.cdap.api.workflow.WorkflowNode) DatasetManagementException(io.cdap.cdap.api.dataset.DatasetManagementException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ExecutorService(java.util.concurrent.ExecutorService) ExecutionException(java.util.concurrent.ExecutionException) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Aggregations

WorkflowNode (io.cdap.cdap.api.workflow.WorkflowNode)38 WorkflowSpecification (io.cdap.cdap.api.workflow.WorkflowSpecification)22 WorkflowActionNode (io.cdap.cdap.api.workflow.WorkflowActionNode)16 ScheduleProgramInfo (io.cdap.cdap.api.workflow.ScheduleProgramInfo)12 WorkflowForkNode (io.cdap.cdap.api.workflow.WorkflowForkNode)10 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)10 ProgramId (io.cdap.cdap.proto.id.ProgramId)10 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)8 ProgramType (io.cdap.cdap.proto.ProgramType)8 Map (java.util.Map)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 WorkflowConditionNode (io.cdap.cdap.api.workflow.WorkflowConditionNode)6 Store (io.cdap.cdap.app.store.Store)6 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)5 TransactionRunner (io.cdap.cdap.spi.data.transaction.TransactionRunner)5 SchedulableProgramType (io.cdap.cdap.api.schedule.SchedulableProgramType)4 DefaultLineageStoreReader (io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader)4 Lineage (io.cdap.cdap.data2.metadata.lineage.Lineage)4 LineageStoreReader (io.cdap.cdap.data2.metadata.lineage.LineageStoreReader)4 Relation (io.cdap.cdap.data2.metadata.lineage.Relation)4