Search in sources :

Example 41 with ProgramRunId

use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.

the class LineageAdminTest method testWorkflowLineage.

@Test
public void testWorkflowLineage() throws Exception {
    // Lineage for D3 -> P2 -> D2 -> P1 -> D1
    LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testWorkflowLineage"));
    Store store = getInjector().getInstance(Store.class);
    MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
    // Define metadata
    MetadataRecord run1AppMeta = new MetadataRecord(program1.getParent(), MetadataScope.USER, toMap("pk1", "pk1"), toSet("pt1"));
    MetadataRecord run1ProgramMeta = new MetadataRecord(program1, MetadataScope.USER, toMap("pk1", "pk1"), toSet("pt1"));
    MetadataRecord run1Data1Meta = new MetadataRecord(dataset1, MetadataScope.USER, toMap("dk1", "dk1"), toSet("dt1"));
    MetadataRecord run1Data2Meta = new MetadataRecord(dataset2, MetadataScope.USER, toMap("dk2", "dk2"), toSet("dt2"));
    // Add metadata
    metadataStore.setProperties(MetadataScope.USER, program1.getParent(), run1AppMeta.getProperties());
    //noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, program1.getParent(), run1AppMeta.getTags().toArray(new String[0]));
    metadataStore.setProperties(MetadataScope.USER, program1, run1ProgramMeta.getProperties());
    //noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, program1, run1ProgramMeta.getTags().toArray(new String[0]));
    metadataStore.setProperties(MetadataScope.USER, dataset1, run1Data1Meta.getProperties());
    //noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, dataset1, run1Data1Meta.getTags().toArray(new String[0]));
    metadataStore.setProperties(MetadataScope.USER, dataset2, run1Data2Meta.getProperties());
    //noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, dataset2, run1Data2Meta.getTags().toArray(new String[0]));
    // Add accesses for D3 -> P2 -> D2 -> P1 -> D1 <-> P3
    // We need to use current time here as metadata store stores access time using current time
    ProgramRunId run1 = program1.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run2 = program2.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run3 = program3.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId workflow = program6.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run5 = program5.run(RunIds.generate(System.currentTimeMillis()).getId());
    addWorkflowRuns(store, workflow.getProgram(), workflow.getRun(), run1, run2, run3);
    addRuns(store, workflow);
    addRuns(store, run5);
    // It is okay to use current time here since access time is ignore during assertions
    lineageStore.addAccess(run1, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run2, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run2, dataset3, AccessType.READ, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run3, dataset1, AccessType.UNKNOWN, System.currentTimeMillis());
    lineageStore.addAccess(run5, dataset1, AccessType.READ, System.currentTimeMillis());
    // The UNKNOWN access type will get filtered out if there is READ/WRITE. It will be preserved if it is the
    // only access type
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program6, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset2, program6, AccessType.READ, twillRunId(workflow)), new Relation(dataset2, program6, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset3, program6, AccessType.READ, twillRunId(workflow)), new Relation(dataset1, program6, AccessType.UNKNOWN, twillRunId(workflow)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5))));
    Lineage resultLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100, "workflow");
    // Lineage for D1
    Assert.assertEquals(expectedLineage, resultLineage);
    resultLineage = lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100, "workflow");
    // Lineage for D2
    Assert.assertEquals(expectedLineage, resultLineage);
    // Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
    Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1, "workflow");
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, program6, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset2, program6, AccessType.READ, twillRunId(workflow)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset1, program6, AccessType.UNKNOWN, twillRunId(workflow))), oneLevelLineage.getRelations());
    // Run tests without workflow parameter
    expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5))));
    resultLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100, null);
    // Lineage for D1
    Assert.assertEquals(expectedLineage, resultLineage);
    resultLineage = lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100, null);
    // Lineage for D2
    Assert.assertEquals(expectedLineage, resultLineage);
    // Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
    oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1, null);
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))), oneLevelLineage.getRelations());
    // Assert metadata
    Assert.assertEquals(toSet(run1AppMeta, run1ProgramMeta, run1Data1Meta, run1Data2Meta), lineageAdmin.getMetadataForRun(run1));
    // Assert that in a different namespace both lineage and metadata should be empty
    NamespaceId customNamespace = new NamespaceId("custom_namespace");
    DatasetId customDataset1 = customNamespace.dataset(dataset1.getEntityName());
    ProgramRunId customRun1 = customNamespace.app(program1.getApplication()).program(program1.getType(), program1.getEntityName()).run(run1.getEntityName());
    Assert.assertEquals(new Lineage(ImmutableSet.<Relation>of()), lineageAdmin.computeLineage(customDataset1, 500, System.currentTimeMillis() + 10000, 100));
    Assert.assertEquals(ImmutableSet.<MetadataRecord>of(), lineageAdmin.getMetadataForRun(customRun1));
}
Also used : MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Relation(co.cask.cdap.data2.metadata.lineage.Relation) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) Store(co.cask.cdap.app.store.Store) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) NamespaceId(co.cask.cdap.proto.id.NamespaceId) MetadataRecord(co.cask.cdap.proto.metadata.MetadataRecord) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 42 with ProgramRunId

use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.

the class LineageAdminTest method addWorkflowRuns.

/** Adds runs which have workflows associated with them
   *
   * @param store store instance
   * @param workflowName name of the workflow
   * @param workflowRunId run ID associated with all program runs
   * @param runs list ofo runs to be added
   */
private void addWorkflowRuns(Store store, String workflowName, String workflowRunId, ProgramRunId... runs) {
    Map<String, String> workflowIDMap = new HashMap<>();
    Map<String, String> emptyMap = ImmutableMap.of();
    workflowIDMap.put(ProgramOptionConstants.WORKFLOW_NAME, workflowName);
    workflowIDMap.put(ProgramOptionConstants.WORKFLOW_NODE_ID, "workflowNodeId");
    workflowIDMap.put(ProgramOptionConstants.WORKFLOW_RUN_ID, workflowRunId);
    for (ProgramRunId run : runs) {
        store.setStart(run.getParent(), run.getEntityName(), RunIds.getTime(RunIds.fromString(run.getEntityName()), TimeUnit.SECONDS), null, emptyMap, workflowIDMap);
    }
}
Also used : HashMap(java.util.HashMap) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId)

Example 43 with ProgramRunId

use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.

the class DefaultStoreTest method testConcurrentStopStart.

@Test
public void testConcurrentStopStart() throws Exception {
    // Two programs that start/stop at same time
    // Should have two run history.
    ProgramId programId = new ProgramId("account1", "concurrentApp", ProgramType.FLOW, "concurrentFlow");
    long now = System.currentTimeMillis();
    long nowSecs = TimeUnit.MILLISECONDS.toSeconds(now);
    RunId run1 = RunIds.generate(now - 10000);
    store.setStart(programId, run1.getId(), runIdToSecs(run1));
    RunId run2 = RunIds.generate(now - 10000);
    store.setStart(programId, run2.getId(), runIdToSecs(run2));
    store.setStop(programId, run1.getId(), nowSecs, ProgramController.State.COMPLETED.getRunStatus());
    store.setStop(programId, run2.getId(), nowSecs, ProgramController.State.COMPLETED.getRunStatus());
    Map<ProgramRunId, RunRecordMeta> historymap = store.getRuns(programId, ProgramRunStatus.ALL, 0, Long.MAX_VALUE, Integer.MAX_VALUE);
    Assert.assertEquals(2, historymap.size());
}
Also used : ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramId(co.cask.cdap.proto.id.ProgramId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Test(org.junit.Test)

Example 44 with ProgramRunId

use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.

the class DefaultStoreTest method testRuntimeArgsDeletion.

@Test
public void testRuntimeArgsDeletion() throws Exception {
    ApplicationSpecification spec = Specifications.from(new AllProgramsApp());
    ApplicationId appId = new ApplicationId("testDeleteRuntimeArgs", spec.getName());
    store.addApplication(appId, spec);
    Assert.assertNotNull(store.getApplication(appId));
    ProgramId flowProgramId = appId.flow("NoOpFlow");
    ProgramId mapreduceProgramId = appId.mr("NoOpMR");
    ProgramId workflowProgramId = appId.workflow("NoOpWorkflow");
    String flowRunId = RunIds.generate().getId();
    String mapreduceRunId = RunIds.generate().getId();
    String workflowRunId = RunIds.generate().getId();
    ProgramRunId flowProgramRunId = flowProgramId.run(flowRunId);
    ProgramRunId mapreduceProgramRunId = mapreduceProgramId.run(mapreduceRunId);
    ProgramRunId workflowProgramRunId = workflowProgramId.run(workflowRunId);
    store.setStart(flowProgramId, flowRunId, System.currentTimeMillis(), null, ImmutableMap.of("model", "click"), null);
    store.setStart(mapreduceProgramId, mapreduceRunId, System.currentTimeMillis(), null, ImmutableMap.of("path", "/data"), null);
    store.setStart(workflowProgramId, workflowRunId, System.currentTimeMillis(), null, ImmutableMap.of("whitelist", "cask"), null);
    Map<String, String> args = store.getRuntimeArguments(flowProgramRunId);
    Assert.assertEquals(1, args.size());
    Assert.assertEquals("click", args.get("model"));
    args = store.getRuntimeArguments(mapreduceProgramRunId);
    Assert.assertEquals(1, args.size());
    Assert.assertEquals("/data", args.get("path"));
    args = store.getRuntimeArguments(workflowProgramRunId);
    Assert.assertEquals(1, args.size());
    Assert.assertEquals("cask", args.get("whitelist"));
    // removing application
    store.removeApplication(appId);
    //Check if args are deleted.
    args = store.getRuntimeArguments(flowProgramRunId);
    Assert.assertEquals(0, args.size());
    args = store.getRuntimeArguments(mapreduceProgramRunId);
    Assert.assertEquals(0, args.size());
    args = store.getRuntimeArguments(workflowProgramRunId);
    Assert.assertEquals(0, args.size());
}
Also used : ApplicationSpecification(co.cask.cdap.api.app.ApplicationSpecification) AllProgramsApp(co.cask.cdap.AllProgramsApp) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) ProgramId(co.cask.cdap.proto.id.ProgramId) Test(org.junit.Test)

Example 45 with ProgramRunId

use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.

the class DefaultStoreTest method testRunsLimit.

@Test
public void testRunsLimit() throws Exception {
    ApplicationSpecification spec = Specifications.from(new AllProgramsApp());
    ApplicationId appId = new ApplicationId("testRunsLimit", spec.getName());
    store.addApplication(appId, spec);
    ProgramId flowProgramId = new ProgramId("testRunsLimit", spec.getName(), ProgramType.FLOW, "NoOpFlow");
    Assert.assertNotNull(store.getApplication(appId));
    long now = System.currentTimeMillis();
    store.setStart(flowProgramId, "flowRun1", now - 3000);
    store.setStop(flowProgramId, "flowRun1", now - 100, ProgramController.State.COMPLETED.getRunStatus());
    store.setStart(flowProgramId, "flowRun2", now - 2000);
    // even though there's two separate run records (one that's complete and one that's active), only one should be
    // returned by the query, because the limit parameter of 1 is being passed in.
    Map<ProgramRunId, RunRecordMeta> historymap = store.getRuns(flowProgramId, ProgramRunStatus.ALL, 0, Long.MAX_VALUE, 1);
    Assert.assertEquals(1, historymap.size());
}
Also used : ApplicationSpecification(co.cask.cdap.api.app.ApplicationSpecification) AllProgramsApp(co.cask.cdap.AllProgramsApp) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) ProgramId(co.cask.cdap.proto.id.ProgramId) Test(org.junit.Test)

Aggregations

ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)53 ProgramId (co.cask.cdap.proto.id.ProgramId)23 Test (org.junit.Test)22 ApplicationId (co.cask.cdap.proto.id.ApplicationId)12 RunRecordMeta (co.cask.cdap.internal.app.store.RunRecordMeta)10 Path (javax.ws.rs.Path)10 RunId (org.apache.twill.api.RunId)10 DatasetId (co.cask.cdap.proto.id.DatasetId)9 Relation (co.cask.cdap.data2.metadata.lineage.Relation)7 HashSet (java.util.HashSet)7 NotFoundException (co.cask.cdap.common.NotFoundException)6 WorkflowNodeStateDetail (co.cask.cdap.proto.WorkflowNodeStateDetail)6 HashMap (java.util.HashMap)6 GET (javax.ws.rs.GET)6 ApplicationSpecification (co.cask.cdap.api.app.ApplicationSpecification)5 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)5 StreamId (co.cask.cdap.proto.id.StreamId)5 Map (java.util.Map)5 CommandInputError (co.cask.cdap.cli.exception.CommandInputError)4 MethodArgument (co.cask.cdap.common.internal.remote.MethodArgument)4