Search in sources :

Example 6 with WorkflowSpecification

use of io.cdap.cdap.api.workflow.WorkflowSpecification in project cdap by caskdata.

the class LineageAdminTest method testWorkflowLineage.

@Test
public void testWorkflowLineage() {
    TransactionRunner transactionRunner = getInjector().getInstance(TransactionRunner.class);
    LineageStoreReader lineageReader = new DefaultLineageStoreReader(transactionRunner);
    LineageWriter lineageWriter = new BasicLineageWriter(transactionRunner);
    ApplicationId testApp = NamespaceId.DEFAULT.app("testApp");
    ProgramId workflowId = testApp.workflow("wf1");
    // if the spark and mr job are inner jobs of workflow, they should be in the same app
    ProgramId mrId = testApp.mr("mr1");
    ProgramId sparkId = testApp.mr("spark1");
    ImmutableList<WorkflowNode> nodes = ImmutableList.of(new WorkflowActionNode("mr1", new ScheduleProgramInfo(SchedulableProgramType.MAPREDUCE, "mr1")), new WorkflowActionNode("spark1", new ScheduleProgramInfo(SchedulableProgramType.SPARK, "spark1")));
    WorkflowSpecification wfSpec = new WorkflowSpecification("test", "wf1", "", Collections.emptyMap(), nodes, Collections.emptyMap(), Collections.emptyMap());
    ApplicationSpecification appSpec = new DefaultApplicationSpecification("testApp", ProjectInfo.getVersion().toString(), "dummy app", null, NamespaceId.DEFAULT.artifact("testArtifact", "1.0").toApiArtifactId(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), ImmutableMap.of(workflowId.getProgram(), wfSpec), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
    Store store = getInjector().getInstance(Store.class);
    store.addApplication(testApp, appSpec);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageReader, store);
    // Add accesses for D3 -> P2 -> D2 -> P1 -> D1 <-> P3
    // |
    // |-> P5,
    // P1 and P2 are inner programs of the workflow
    // We need to use current time here as metadata store stores access time using current time
    ProgramRunId run1 = mrId.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run2 = sparkId.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run3 = program3.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId workflow = workflowId.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run5 = program5.run(RunIds.generate(System.currentTimeMillis()).getId());
    addRuns(store, workflow);
    // only mr and spark can be inner programs
    addWorkflowRuns(store, workflow.getProgram(), workflow.getRun(), run1, run2);
    addRuns(store, run3);
    addRuns(store, run5);
    // It is okay to use current time here since access time is ignore during assertions
    lineageWriter.addAccess(run1, dataset1, AccessType.WRITE);
    lineageWriter.addAccess(run1, dataset2, AccessType.READ);
    lineageWriter.addAccess(run2, dataset2, AccessType.WRITE);
    lineageWriter.addAccess(run2, dataset3, AccessType.READ);
    lineageWriter.addAccess(run3, dataset1, AccessType.UNKNOWN, null);
    lineageWriter.addAccess(run5, dataset1, AccessType.READ, null);
    // The UNKNOWN access type will get filtered out if there is READ/WRITE. It will be preserved if it is the
    // only access type
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, workflowId, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset2, workflowId, AccessType.READ, twillRunId(workflow)), new Relation(dataset2, workflowId, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset3, workflowId, AccessType.READ, twillRunId(workflow)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5))));
    Lineage resultLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100, "workflow");
    // Lineage for D1
    Assert.assertEquals(expectedLineage, resultLineage);
    resultLineage = lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100, "workflow");
    // Lineage for D2
    Assert.assertEquals(expectedLineage, resultLineage);
    // Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
    Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1, "workflow");
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, workflowId, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset2, workflowId, AccessType.READ, twillRunId(workflow)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))), oneLevelLineage.getRelations());
    // Run tests without workflow parameter
    expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, mrId, AccessType.WRITE, twillRunId(run1)), new Relation(dataset2, mrId, AccessType.READ, twillRunId(run1)), new Relation(dataset2, sparkId, AccessType.WRITE, twillRunId(run2)), new Relation(dataset3, sparkId, AccessType.READ, twillRunId(run2)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5))));
    resultLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100, null);
    // Lineage for D1
    Assert.assertEquals(expectedLineage, resultLineage);
    resultLineage = lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100, null);
    // Lineage for D2
    Assert.assertEquals(expectedLineage, resultLineage);
    // Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
    oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1, null);
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, mrId, AccessType.WRITE, twillRunId(run1)), new Relation(dataset2, mrId, AccessType.READ, twillRunId(run1)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))), oneLevelLineage.getRelations());
    // Assert that in a different namespace both lineage and metadata should be empty
    NamespaceId customNamespace = new NamespaceId("custom_namespace");
    DatasetId customDataset1 = customNamespace.dataset(dataset1.getEntityName());
    Assert.assertEquals(new Lineage(ImmutableSet.of()), lineageAdmin.computeLineage(customDataset1, 500, System.currentTimeMillis() + 10000, 100));
}
Also used : DefaultApplicationSpecification(io.cdap.cdap.internal.app.DefaultApplicationSpecification) ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) WorkflowActionNode(io.cdap.cdap.api.workflow.WorkflowActionNode) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) DefaultLineageStoreReader(io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader) Store(io.cdap.cdap.app.store.Store) ProgramId(io.cdap.cdap.proto.id.ProgramId) WorkflowNode(io.cdap.cdap.api.workflow.WorkflowNode) DatasetId(io.cdap.cdap.proto.id.DatasetId) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) BasicLineageWriter(io.cdap.cdap.data2.metadata.writer.BasicLineageWriter) LineageWriter(io.cdap.cdap.data2.metadata.writer.LineageWriter) TransactionRunner(io.cdap.cdap.spi.data.transaction.TransactionRunner) DefaultLineageStoreReader(io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader) LineageStoreReader(io.cdap.cdap.data2.metadata.lineage.LineageStoreReader) WorkflowSpecification(io.cdap.cdap.api.workflow.WorkflowSpecification) DefaultApplicationSpecification(io.cdap.cdap.internal.app.DefaultApplicationSpecification) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) ScheduleProgramInfo(io.cdap.cdap.api.workflow.ScheduleProgramInfo) BasicLineageWriter(io.cdap.cdap.data2.metadata.writer.BasicLineageWriter) Test(org.junit.Test)

Example 7 with WorkflowSpecification

use of io.cdap.cdap.api.workflow.WorkflowSpecification in project cdap by caskdata.

the class ApplicationLifecycleService method deleteAppVersion.

/**
 * Delete the specified application version without performing checks that its programs are stopped.
 *
 * @param appId the id of the application to delete
 * @param spec the spec of the application to delete
 */
private void deleteAppVersion(ApplicationId appId, ApplicationSpecification spec) {
    // Delete the schedules
    scheduler.deleteSchedules(appId);
    for (WorkflowSpecification workflowSpec : spec.getWorkflows().values()) {
        scheduler.modifySchedulesTriggeredByDeletedProgram(appId.workflow(workflowSpec.getName()));
    }
    store.removeApplication(appId);
}
Also used : WorkflowSpecification(io.cdap.cdap.api.workflow.WorkflowSpecification)

Example 8 with WorkflowSpecification

use of io.cdap.cdap.api.workflow.WorkflowSpecification in project cdap by caskdata.

the class ApplicationLifecycleService method deleteApp.

// deletes without performs checks that no programs are running
/**
 * Delete the specified application without performing checks that its programs are stopped.
 *
 * @param appId the id of the application to delete
 * @param spec the spec of the application to delete
 * @throws Exception
 */
private void deleteApp(ApplicationId appId, ApplicationSpecification spec) throws Exception {
    // Delete the schedules
    scheduler.deleteSchedules(appId);
    for (WorkflowSpecification workflowSpec : spec.getWorkflows().values()) {
        scheduler.modifySchedulesTriggeredByDeletedProgram(appId.workflow(workflowSpec.getName()));
    }
    deleteMetrics(appId, spec);
    // Delete all preferences of the application and of all its programs
    deletePreferences(appId, spec);
    deleteAppMetadata(appId, spec);
    store.deleteWorkflowStats(appId);
    store.removeApplication(appId);
    try {
        // delete the owner as it has already been determined that this is the only version of the app
        ownerAdmin.delete(appId);
    } catch (Exception e) {
        LOG.warn("Failed to delete app owner principal for application {} if one existed while deleting the " + "application.", appId);
    }
    try {
        usageRegistry.unregister(appId);
    } catch (Exception e) {
        LOG.warn("Failed to unregister usage of app: {}", appId, e);
    }
    // make sure the program profile metadata is removed
    adminEventPublisher.publishAppDeletion(appId, spec);
}
Also used : WorkflowSpecification(io.cdap.cdap.api.workflow.WorkflowSpecification) ApplicationNotFoundException(io.cdap.cdap.common.ApplicationNotFoundException) CapabilityNotAvailableException(io.cdap.cdap.internal.capability.CapabilityNotAvailableException) IOException(java.io.IOException) CannotBeDeletedException(io.cdap.cdap.common.CannotBeDeletedException) ExecutionException(java.util.concurrent.ExecutionException) AccessException(io.cdap.cdap.api.security.AccessException) JsonIOException(com.google.gson.JsonIOException) InvalidArtifactException(io.cdap.cdap.common.InvalidArtifactException) ArtifactAlreadyExistsException(io.cdap.cdap.common.ArtifactAlreadyExistsException) NotFoundException(io.cdap.cdap.common.NotFoundException) ArtifactNotFoundException(io.cdap.cdap.common.ArtifactNotFoundException)

Example 9 with WorkflowSpecification

use of io.cdap.cdap.api.workflow.WorkflowSpecification in project cdap by caskdata.

the class WorkflowHttpHandler method getWorkflowNodeStates.

@GET
@Path("/apps/{app-id}/workflows/{workflow-id}/runs/{run-id}/nodes/state")
public void getWorkflowNodeStates(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("app-id") String applicationId, @PathParam("workflow-id") String workflowId, @PathParam("run-id") String runId) throws NotFoundException {
    ApplicationId appId = Ids.namespace(namespaceId).app(applicationId);
    ApplicationSpecification appSpec = store.getApplication(appId);
    if (appSpec == null) {
        throw new ApplicationNotFoundException(appId);
    }
    ProgramId workflowProgramId = appId.workflow(workflowId);
    WorkflowSpecification workflowSpec = appSpec.getWorkflows().get(workflowProgramId.getProgram());
    if (workflowSpec == null) {
        throw new ProgramNotFoundException(workflowProgramId);
    }
    ProgramRunId workflowRunId = workflowProgramId.run(runId);
    if (store.getRun(workflowRunId) == null) {
        throw new NotFoundException(workflowRunId);
    }
    List<WorkflowNodeStateDetail> nodeStateDetails = store.getWorkflowNodeStates(workflowRunId);
    Map<String, WorkflowNodeStateDetail> nodeStates = new HashMap<>();
    for (WorkflowNodeStateDetail nodeStateDetail : nodeStateDetails) {
        nodeStates.put(nodeStateDetail.getNodeId(), nodeStateDetail);
    }
    responder.sendJson(HttpResponseStatus.OK, GSON.toJson(nodeStates, STRING_TO_NODESTATEDETAIL_MAP_TYPE));
}
Also used : ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) HashMap(java.util.HashMap) ApplicationNotFoundException(io.cdap.cdap.common.ApplicationNotFoundException) InstanceNotFoundException(io.cdap.cdap.api.dataset.InstanceNotFoundException) NotFoundException(io.cdap.cdap.common.NotFoundException) ProgramNotFoundException(io.cdap.cdap.common.ProgramNotFoundException) ProgramId(io.cdap.cdap.proto.id.ProgramId) WorkflowNodeStateDetail(io.cdap.cdap.proto.WorkflowNodeStateDetail) ApplicationNotFoundException(io.cdap.cdap.common.ApplicationNotFoundException) WorkflowSpecification(io.cdap.cdap.api.workflow.WorkflowSpecification) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) ProgramNotFoundException(io.cdap.cdap.common.ProgramNotFoundException) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 10 with WorkflowSpecification

use of io.cdap.cdap.api.workflow.WorkflowSpecification in project cdap by caskdata.

the class WorkflowHttpHandler method deleteWorkflowLocalDatasets.

@DELETE
@Path("/apps/{app-id}/workflows/{workflow-id}/runs/{run-id}/localdatasets")
public void deleteWorkflowLocalDatasets(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("app-id") String applicationId, @PathParam("workflow-id") String workflowId, @PathParam("run-id") String runId) throws NotFoundException {
    WorkflowSpecification workflowSpec = getWorkflowSpecForValidRun(namespaceId, applicationId, workflowId, runId);
    Set<String> errorOnDelete = new HashSet<>();
    for (Map.Entry<String, DatasetCreationSpec> localDatasetEntry : workflowSpec.getLocalDatasetSpecs().entrySet()) {
        String mappedDatasetName = localDatasetEntry.getKey() + "." + runId;
        // try best to delete the local datasets.
        try {
            datasetFramework.deleteInstance(new DatasetId(namespaceId, mappedDatasetName));
        } catch (InstanceNotFoundException e) {
        // Dataset instance is already deleted. so its no-op.
        } catch (Throwable t) {
            errorOnDelete.add(mappedDatasetName);
            LOG.error("Failed to delete the Workflow local dataset {}. Reason - {}", mappedDatasetName, t.getMessage());
        }
    }
    if (errorOnDelete.isEmpty()) {
        responder.sendStatus(HttpResponseStatus.OK);
        return;
    }
    String errorMessage = "Failed to delete Workflow local datasets - " + Joiner.on(",").join(errorOnDelete);
    throw new RuntimeException(errorMessage);
}
Also used : InstanceNotFoundException(io.cdap.cdap.api.dataset.InstanceNotFoundException) DatasetCreationSpec(io.cdap.cdap.internal.dataset.DatasetCreationSpec) WorkflowSpecification(io.cdap.cdap.api.workflow.WorkflowSpecification) Map(java.util.Map) HashMap(java.util.HashMap) HashSet(java.util.HashSet) DatasetId(io.cdap.cdap.proto.id.DatasetId) Path(javax.ws.rs.Path) DELETE(javax.ws.rs.DELETE)

Aggregations

WorkflowSpecification (io.cdap.cdap.api.workflow.WorkflowSpecification)50 WorkflowNode (io.cdap.cdap.api.workflow.WorkflowNode)24 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)18 WorkflowActionNode (io.cdap.cdap.api.workflow.WorkflowActionNode)18 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)18 ProgramId (io.cdap.cdap.proto.id.ProgramId)18 ScheduleProgramInfo (io.cdap.cdap.api.workflow.ScheduleProgramInfo)14 Map (java.util.Map)14 ProgramType (io.cdap.cdap.proto.ProgramType)12 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)12 DatasetCreationSpec (io.cdap.cdap.internal.dataset.DatasetCreationSpec)10 DatasetId (io.cdap.cdap.proto.id.DatasetId)8 ArrayList (java.util.ArrayList)8 HashMap (java.util.HashMap)8 ImmutableMap (com.google.common.collect.ImmutableMap)6 InstanceNotFoundException (io.cdap.cdap.api.dataset.InstanceNotFoundException)6 WorkflowForkNode (io.cdap.cdap.api.workflow.WorkflowForkNode)6 ApplicationNotFoundException (io.cdap.cdap.common.ApplicationNotFoundException)6 NotFoundException (io.cdap.cdap.common.NotFoundException)6 Path (javax.ws.rs.Path)6