Search in sources :

Example 36 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageAdminTest method testBranchLineage.

@Test
public void testBranchLineage() throws Exception {
    // Lineage for:
    // 
    // ->D4        -> D5 -> P3 -> D6
    // |           |
    // |           |
    // D1 -> P1 -> D2 -> P2 -> D3
    // |     |           |
    // |     |           |
    // S1 -->|     ---------------> P4 -> D7
    LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testBranchLineage"));
    Store store = getInjector().getInstance(Store.class);
    MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
    // Add accesses
    addRuns(store, run1, run2, run3, run4, run5);
    // It is okay to use current time here since access time is ignore during assertions
    lineageStore.addAccess(run1, stream1, AccessType.READ, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset4, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run2, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run2, dataset3, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run2, dataset5, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run3, dataset5, AccessType.READ, System.currentTimeMillis());
    lineageStore.addAccess(run3, dataset6, AccessType.WRITE, System.currentTimeMillis());
    lineageStore.addAccess(run4, dataset2, AccessType.READ, System.currentTimeMillis());
    lineageStore.addAccess(run4, dataset3, AccessType.READ, System.currentTimeMillis());
    lineageStore.addAccess(run4, dataset7, AccessType.WRITE, System.currentTimeMillis());
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(stream1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset4, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3), emptySet()), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3), emptySet()), new Relation(dataset2, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset3, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset7, program4, AccessType.WRITE, twillRunId(run4), emptySet())));
    // Lineage for D7
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset7, 500, 20000, 100));
    // Lineage for D6
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset6, 500, 20000, 100));
    // Lineage for D3
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset3, 500, 20000, 100));
}
Also used : MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Relation(co.cask.cdap.data2.metadata.lineage.Relation) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) Store(co.cask.cdap.app.store.Store) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Test(org.junit.Test)

Example 37 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageAdminTest method testDirectCycle.

@Test
public void testDirectCycle() throws Exception {
    // Lineage for:
    // 
    // D1 <-> P1
    // 
    LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testDirectCycle"));
    Store store = getInjector().getInstance(Store.class);
    MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
    // Add accesses
    addRuns(store, run1, run2, run3, run4, run5);
    // It is okay to use current time here since access time is ignore during assertions
    lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1))));
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
}
Also used : MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Relation(co.cask.cdap.data2.metadata.lineage.Relation) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) Store(co.cask.cdap.app.store.Store) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Test(org.junit.Test)

Example 38 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageAdminTest method testWorkflowLineage.

@Test
public void testWorkflowLineage() throws Exception {
    // Lineage for D3 -> P2 -> D2 -> P1 -> D1
    LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testWorkflowLineage"));
    Store store = getInjector().getInstance(Store.class);
    MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
    // Define metadata
    MetadataRecord run1AppMeta = new MetadataRecord(program1.getParent(), MetadataScope.USER, toMap("pk1", "pk1"), toSet("pt1"));
    MetadataRecord run1ProgramMeta = new MetadataRecord(program1, MetadataScope.USER, toMap("pk1", "pk1"), toSet("pt1"));
    MetadataRecord run1Data1Meta = new MetadataRecord(dataset1, MetadataScope.USER, toMap("dk1", "dk1"), toSet("dt1"));
    MetadataRecord run1Data2Meta = new MetadataRecord(dataset2, MetadataScope.USER, toMap("dk2", "dk2"), toSet("dt2"));
    // Add metadata
    metadataStore.setProperties(MetadataScope.USER, program1.getParent(), run1AppMeta.getProperties());
    // noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, program1.getParent(), run1AppMeta.getTags().toArray(new String[0]));
    metadataStore.setProperties(MetadataScope.USER, program1, run1ProgramMeta.getProperties());
    // noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, program1, run1ProgramMeta.getTags().toArray(new String[0]));
    metadataStore.setProperties(MetadataScope.USER, dataset1, run1Data1Meta.getProperties());
    // noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, dataset1, run1Data1Meta.getTags().toArray(new String[0]));
    metadataStore.setProperties(MetadataScope.USER, dataset2, run1Data2Meta.getProperties());
    // noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, dataset2, run1Data2Meta.getTags().toArray(new String[0]));
    // Add accesses for D3 -> P2 -> D2 -> P1 -> D1 <-> P3
    // We need to use current time here as metadata store stores access time using current time
    ProgramRunId run1 = program1.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run2 = program2.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run3 = program3.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId workflow = program6.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run5 = program5.run(RunIds.generate(System.currentTimeMillis()).getId());
    addWorkflowRuns(store, workflow.getProgram(), workflow.getRun(), run1, run2, run3);
    addRuns(store, workflow);
    addRuns(store, run5);
    // It is okay to use current time here since access time is ignore during assertions
    lineageStore.addAccess(run1, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run2, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run2, dataset3, AccessType.READ, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run3, dataset1, AccessType.UNKNOWN, System.currentTimeMillis());
    lineageStore.addAccess(run5, dataset1, AccessType.READ, System.currentTimeMillis());
    // The UNKNOWN access type will get filtered out if there is READ/WRITE. It will be preserved if it is the
    // only access type
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program6, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset2, program6, AccessType.READ, twillRunId(workflow)), new Relation(dataset2, program6, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset3, program6, AccessType.READ, twillRunId(workflow)), new Relation(dataset1, program6, AccessType.UNKNOWN, twillRunId(workflow)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5))));
    Lineage resultLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100, "workflow");
    // Lineage for D1
    Assert.assertEquals(expectedLineage, resultLineage);
    resultLineage = lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100, "workflow");
    // Lineage for D2
    Assert.assertEquals(expectedLineage, resultLineage);
    // Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
    Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1, "workflow");
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, program6, AccessType.WRITE, twillRunId(workflow)), new Relation(dataset2, program6, AccessType.READ, twillRunId(workflow)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset1, program6, AccessType.UNKNOWN, twillRunId(workflow))), oneLevelLineage.getRelations());
    // Run tests without workflow parameter
    expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5))));
    resultLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100, null);
    // Lineage for D1
    Assert.assertEquals(expectedLineage, resultLineage);
    resultLineage = lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100, null);
    // Lineage for D2
    Assert.assertEquals(expectedLineage, resultLineage);
    // Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
    oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1, null);
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))), oneLevelLineage.getRelations());
    // Assert metadata
    Assert.assertEquals(toSet(run1AppMeta, run1ProgramMeta, run1Data1Meta, run1Data2Meta), lineageAdmin.getMetadataForRun(run1));
    // Assert that in a different namespace both lineage and metadata should be empty
    NamespaceId customNamespace = new NamespaceId("custom_namespace");
    DatasetId customDataset1 = customNamespace.dataset(dataset1.getEntityName());
    ProgramRunId customRun1 = customNamespace.app(program1.getApplication()).program(program1.getType(), program1.getEntityName()).run(run1.getEntityName());
    Assert.assertEquals(new Lineage(ImmutableSet.<Relation>of()), lineageAdmin.computeLineage(customDataset1, 500, System.currentTimeMillis() + 10000, 100));
    Assert.assertEquals(ImmutableSet.<MetadataRecord>of(), lineageAdmin.getMetadataForRun(customRun1));
}
Also used : MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Relation(co.cask.cdap.data2.metadata.lineage.Relation) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) Store(co.cask.cdap.app.store.Store) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) NamespaceId(co.cask.cdap.proto.id.NamespaceId) MetadataRecord(co.cask.cdap.common.metadata.MetadataRecord) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 39 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageAdminTest method testSimpleLineage.

@Test
public void testSimpleLineage() throws Exception {
    // Lineage for D3 -> P2 -> D2 -> P1 -> D1
    LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testSimpleLineage"));
    Store store = getInjector().getInstance(Store.class);
    MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
    // Define metadata
    MetadataRecord run1AppMeta = new MetadataRecord(program1.getParent(), MetadataScope.USER, toMap("pk1", "pk1"), toSet("pt1"));
    MetadataRecord run1ProgramMeta = new MetadataRecord(program1, MetadataScope.USER, toMap("pk1", "pk1"), toSet("pt1"));
    MetadataRecord run1Data1Meta = new MetadataRecord(dataset1, MetadataScope.USER, toMap("dk1", "dk1"), toSet("dt1"));
    MetadataRecord run1Data2Meta = new MetadataRecord(dataset2, MetadataScope.USER, toMap("dk2", "dk2"), toSet("dt2"));
    // Add metadata
    metadataStore.setProperties(MetadataScope.USER, program1.getParent(), run1AppMeta.getProperties());
    // noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, program1.getParent(), run1AppMeta.getTags().toArray(new String[0]));
    metadataStore.setProperties(MetadataScope.USER, program1, run1ProgramMeta.getProperties());
    // noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, program1, run1ProgramMeta.getTags().toArray(new String[0]));
    metadataStore.setProperties(MetadataScope.USER, dataset1, run1Data1Meta.getProperties());
    // noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, dataset1, run1Data1Meta.getTags().toArray(new String[0]));
    metadataStore.setProperties(MetadataScope.USER, dataset2, run1Data2Meta.getProperties());
    // noinspection ToArrayCallWithZeroLengthArrayArgument
    metadataStore.addTags(MetadataScope.USER, dataset2, run1Data2Meta.getTags().toArray(new String[0]));
    // Add accesses for D3 -> P2 -> D2 -> P1 -> D1 <-> P3
    // We need to use current time here as metadata store stores access time using current time
    ProgramRunId run1 = program1.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run2 = program2.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run3 = program3.run(RunIds.generate(System.currentTimeMillis()).getId());
    addRuns(store, run1, run2, run3);
    // It is okay to use current time here since access time is ignore during assertions
    lineageStore.addAccess(run1, dataset1, AccessType.UNKNOWN, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run1, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet1);
    lineageStore.addAccess(run2, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run2, dataset3, AccessType.READ, System.currentTimeMillis(), flowlet2);
    lineageStore.addAccess(run3, dataset1, AccessType.UNKNOWN, System.currentTimeMillis());
    // The UNKNOWN access type will get filtered out if there is READ/WRITE. It will be preserved if it is the
    // only access type
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))));
    // Lineage for D1
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100));
    // Lineage for D2
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100));
    // Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
    Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1);
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))), oneLevelLineage.getRelations());
    // Assert metadata
    Assert.assertEquals(toSet(run1AppMeta, run1ProgramMeta, run1Data1Meta, run1Data2Meta), lineageAdmin.getMetadataForRun(run1));
    // Assert that in a different namespace both lineage and metadata should be empty
    NamespaceId customNamespace = new NamespaceId("custom_namespace");
    DatasetId customDataset1 = customNamespace.dataset(dataset1.getEntityName());
    ProgramRunId customRun1 = customNamespace.app(program1.getApplication()).program(program1.getType(), program1.getEntityName()).run(run1.getEntityName());
    Assert.assertEquals(new Lineage(ImmutableSet.<Relation>of()), lineageAdmin.computeLineage(customDataset1, 500, System.currentTimeMillis() + 10000, 100));
    Assert.assertEquals(ImmutableSet.<MetadataRecord>of(), lineageAdmin.getMetadataForRun(customRun1));
}
Also used : MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) Relation(co.cask.cdap.data2.metadata.lineage.Relation) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) Store(co.cask.cdap.app.store.Store) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) NamespaceId(co.cask.cdap.proto.id.NamespaceId) MetadataRecord(co.cask.cdap.common.metadata.MetadataRecord) DatasetId(co.cask.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 40 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageCollapserTest method testCollapseCombinations.

@Test
public void testCollapseCombinations() throws Exception {
    Set<Relation> relations = ImmutableSet.of(// First run
    new Relation(data1, flow1, AccessType.READ, runId1, ImmutableSet.of(flowlet11)), new Relation(data1, flow1, AccessType.WRITE, runId1, ImmutableSet.of(flowlet11)), new Relation(data1, flow1, AccessType.READ, runId1, ImmutableSet.of(flowlet12)), // Second run
    new Relation(data1, flow1, AccessType.READ, runId2, ImmutableSet.of(flowlet11)), new Relation(data1, flow1, AccessType.WRITE, runId2, ImmutableSet.of(flowlet11)), new Relation(data1, flow1, AccessType.READ, runId2, ImmutableSet.of(flowlet12)), new Relation(data1, flow1, AccessType.UNKNOWN, runId2, ImmutableSet.of(flowlet12)), // Third run
    new Relation(data1, flow1, AccessType.READ, runId3, ImmutableSet.of(flowlet12)), new Relation(data1, flow1, AccessType.UNKNOWN, runId3, ImmutableSet.of(flowlet12)));
    // Collapse on access type, run
    Assert.assertEquals(toSet(new CollapsedRelation(data1, flow1, toSet(AccessType.READ, AccessType.WRITE), toSet(runId1, runId2), toSet(flowlet11)), new CollapsedRelation(data1, flow1, toSet(AccessType.READ, AccessType.UNKNOWN), toSet(runId1, runId2, runId3), toSet(flowlet12))), LineageCollapser.collapseRelations(relations, toSet(CollapseType.ACCESS, CollapseType.RUN)));
    // Collapse on access type, component
    Assert.assertEquals(toSet(new CollapsedRelation(data1, flow1, toSet(AccessType.READ, AccessType.WRITE), toSet(runId1), toSet(flowlet11, flowlet12)), new CollapsedRelation(data1, flow1, toSet(AccessType.READ, AccessType.WRITE, AccessType.UNKNOWN), toSet(runId2), toSet(flowlet11, flowlet12)), new CollapsedRelation(data1, flow1, toSet(AccessType.READ, AccessType.UNKNOWN), toSet(runId3), toSet(flowlet12))), LineageCollapser.collapseRelations(relations, toSet(CollapseType.ACCESS, CollapseType.COMPONENT)));
    // Collapse on component, run
    Assert.assertEquals(toSet(new CollapsedRelation(data1, flow1, toSet(AccessType.READ), toSet(runId1, runId2, runId3), toSet(flowlet11, flowlet12)), new CollapsedRelation(data1, flow1, toSet(AccessType.WRITE), toSet(runId1, runId2), toSet(flowlet11)), new CollapsedRelation(data1, flow1, toSet(AccessType.UNKNOWN), toSet(runId2, runId3), toSet(flowlet12))), LineageCollapser.collapseRelations(relations, toSet(CollapseType.COMPONENT, CollapseType.RUN)));
    // Collapse on all three
    Assert.assertEquals(toSet(new CollapsedRelation(data1, flow1, toSet(AccessType.READ, AccessType.WRITE, AccessType.UNKNOWN), toSet(runId1, runId2, runId3), toSet(flowlet11, flowlet12))), LineageCollapser.collapseRelations(relations, toSet(CollapseType.COMPONENT, CollapseType.RUN, CollapseType.ACCESS)));
}
Also used : CollapsedRelation(co.cask.cdap.data2.metadata.lineage.CollapsedRelation) CollapsedRelation(co.cask.cdap.data2.metadata.lineage.CollapsedRelation) Relation(co.cask.cdap.data2.metadata.lineage.Relation) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)45 Relation (io.cdap.cdap.data2.metadata.lineage.Relation)38 Lineage (io.cdap.cdap.data2.metadata.lineage.Lineage)26 Relation (co.cask.cdap.data2.metadata.lineage.Relation)20 Store (io.cdap.cdap.app.store.Store)20 DefaultLineageStoreReader (io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader)20 LineageStoreReader (io.cdap.cdap.data2.metadata.lineage.LineageStoreReader)20 BasicLineageWriter (io.cdap.cdap.data2.metadata.writer.BasicLineageWriter)16 LineageWriter (io.cdap.cdap.data2.metadata.writer.LineageWriter)16 TransactionRunner (io.cdap.cdap.spi.data.transaction.TransactionRunner)16 RunId (org.apache.twill.api.RunId)15 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)14 DatasetId (io.cdap.cdap.proto.id.DatasetId)14 ProgramId (io.cdap.cdap.proto.id.ProgramId)14 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)14 HashSet (java.util.HashSet)12 Lineage (co.cask.cdap.data2.metadata.lineage.Lineage)10 CollapsedRelation (io.cdap.cdap.data2.metadata.lineage.CollapsedRelation)10 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)9 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)8