Search in sources :

Example 16 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageAdminTest method testBranchLoopLineage.

@Test
public void testBranchLoopLineage() {
    // Lineage for:
    // 
    // |-------------------------------------|
    // |                                     |
    // |                                     |
    // |    -> D4       -> D5 -> P3 -> D6 -> P5
    // |    |           |                    ^
    // V    |           |                    |
    // D1 -> P1 -> D2 -> P2 -> D3 ----------->|
    // |     |           |
    // |     |           |
    // S1 -->|     ---------------> P4 -> D7
    TransactionRunner transactionRunner = getInjector().getInstance(TransactionRunner.class);
    LineageStoreReader lineageReader = new DefaultLineageStoreReader(transactionRunner);
    LineageWriter lineageWriter = new BasicLineageWriter(transactionRunner);
    Store store = getInjector().getInstance(Store.class);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageReader, store);
    // Add accesses
    addRuns(store, run1, run2, run3, run4, run5);
    // It is okay to use current time here since access time is ignore during assertions
    lineageWriter.addAccess(run1, dataset1, AccessType.READ);
    lineageWriter.addAccess(run1, dataset2, AccessType.WRITE);
    lineageWriter.addAccess(run1, dataset4, AccessType.WRITE);
    lineageWriter.addAccess(run2, dataset2, AccessType.READ);
    lineageWriter.addAccess(run2, dataset3, AccessType.WRITE);
    lineageWriter.addAccess(run2, dataset5, AccessType.WRITE);
    lineageWriter.addAccess(run3, dataset5, AccessType.READ, null);
    lineageWriter.addAccess(run3, dataset6, AccessType.WRITE, null);
    lineageWriter.addAccess(run4, dataset2, AccessType.READ, null);
    lineageWriter.addAccess(run4, dataset3, AccessType.READ, null);
    lineageWriter.addAccess(run4, dataset7, AccessType.WRITE, null);
    lineageWriter.addAccess(run5, dataset3, AccessType.READ, null);
    lineageWriter.addAccess(run5, dataset6, AccessType.READ, null);
    lineageWriter.addAccess(run5, dataset1, AccessType.WRITE, null);
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program1, AccessType.READ, twillRunId(run1)), new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1)), new Relation(dataset4, program1, AccessType.WRITE, twillRunId(run1)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3)), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3)), new Relation(dataset2, program4, AccessType.READ, twillRunId(run4)), new Relation(dataset3, program4, AccessType.READ, twillRunId(run4)), new Relation(dataset7, program4, AccessType.WRITE, twillRunId(run4)), new Relation(dataset3, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset6, program5, AccessType.READ, twillRunId(run5)), new Relation(dataset1, program5, AccessType.WRITE, twillRunId(run5))));
    // Lineage for D1
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
    // Lineage for D5
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset5, 500, 20000, 100));
    // Lineage for D7
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset7, 500, 20000, 100));
    // Lineage for D5 for one level
    // -> D5 -> P3 -> D6
    // |
    // |
    // D2 -> P2 -> D3
    Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset5, 500, 20000, 1);
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset2, program2, AccessType.READ, twillRunId(run2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3)), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3))), oneLevelLineage.getRelations());
}
Also used : Relation(io.cdap.cdap.data2.metadata.lineage.Relation) BasicLineageWriter(io.cdap.cdap.data2.metadata.writer.BasicLineageWriter) LineageWriter(io.cdap.cdap.data2.metadata.writer.LineageWriter) TransactionRunner(io.cdap.cdap.spi.data.transaction.TransactionRunner) DefaultLineageStoreReader(io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader) LineageStoreReader(io.cdap.cdap.data2.metadata.lineage.LineageStoreReader) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) DefaultLineageStoreReader(io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader) Store(io.cdap.cdap.app.store.Store) BasicLineageWriter(io.cdap.cdap.data2.metadata.writer.BasicLineageWriter) Test(org.junit.Test)

Example 17 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageCollapserTest method testCollapseCombinations.

@Test
public void testCollapseCombinations() {
    Set<Relation> relations = ImmutableSet.of(// First run
    new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service1, AccessType.WRITE, runId1), new Relation(data1, service1, AccessType.READ, runId1), // Second run
    new Relation(data1, service1, AccessType.READ, runId2), new Relation(data1, service1, AccessType.WRITE, runId2), new Relation(data1, service1, AccessType.READ, runId2), new Relation(data1, service1, AccessType.UNKNOWN, runId2), // Third run
    new Relation(data1, service1, AccessType.READ, runId3), new Relation(data1, service1, AccessType.UNKNOWN, runId3));
    // Collapse on access type, run
    Assert.assertEquals(toSet(new CollapsedRelation(data1, service1, toSet(AccessType.READ, AccessType.WRITE, AccessType.UNKNOWN), toSet(runId1, runId2, runId3), Collections.emptySet())), LineageCollapser.collapseRelations(relations, toSet(CollapseType.ACCESS, CollapseType.RUN)));
    // Collapse on access type, component
    Assert.assertEquals(toSet(new CollapsedRelation(data1, service1, toSet(AccessType.READ, AccessType.WRITE), toSet(runId1), Collections.emptySet()), new CollapsedRelation(data1, service1, toSet(AccessType.READ, AccessType.WRITE, AccessType.UNKNOWN), toSet(runId2), Collections.emptySet()), new CollapsedRelation(data1, service1, toSet(AccessType.READ, AccessType.UNKNOWN), toSet(runId3), Collections.emptySet())), LineageCollapser.collapseRelations(relations, toSet(CollapseType.ACCESS, CollapseType.COMPONENT)));
    // Collapse on component, run
    Assert.assertEquals(toSet(new CollapsedRelation(data1, service1, toSet(AccessType.READ), toSet(runId1, runId2, runId3), Collections.emptySet()), new CollapsedRelation(data1, service1, toSet(AccessType.WRITE), toSet(runId1, runId2), Collections.emptySet()), new CollapsedRelation(data1, service1, toSet(AccessType.UNKNOWN), toSet(runId2, runId3), Collections.emptySet())), LineageCollapser.collapseRelations(relations, toSet(CollapseType.COMPONENT, CollapseType.RUN)));
    // Collapse on all three
    Assert.assertEquals(toSet(new CollapsedRelation(data1, service1, toSet(AccessType.READ, AccessType.WRITE, AccessType.UNKNOWN), toSet(runId1, runId2, runId3), Collections.emptySet())), LineageCollapser.collapseRelations(relations, toSet(CollapseType.COMPONENT, CollapseType.RUN, CollapseType.ACCESS)));
}
Also used : CollapsedRelation(io.cdap.cdap.data2.metadata.lineage.CollapsedRelation) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) CollapsedRelation(io.cdap.cdap.data2.metadata.lineage.CollapsedRelation) Test(org.junit.Test)

Example 18 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageCollapserTest method testCollapseRun.

@Test
public void testCollapseRun() {
    Set<Relation> relations = ImmutableSet.of(new Relation(data1, service1, AccessType.READ, runId1), new Relation(data1, service1, AccessType.WRITE, runId1), new Relation(data1, service1, AccessType.READ, runId2));
    // Collapse on run
    Assert.assertEquals(toSet(new CollapsedRelation(data1, service1, toSet(AccessType.READ), toSet(runId1, runId2), Collections.emptySet()), new CollapsedRelation(data1, service1, toSet(AccessType.WRITE), toSet(runId1), Collections.emptySet())), LineageCollapser.collapseRelations(relations, ImmutableSet.of(CollapseType.RUN)));
}
Also used : CollapsedRelation(io.cdap.cdap.data2.metadata.lineage.CollapsedRelation) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) CollapsedRelation(io.cdap.cdap.data2.metadata.lineage.CollapsedRelation) Test(org.junit.Test)

Example 19 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by cdapio.

the class LineageAdminTest method testSimpleLoopLineage.

@Test
public void testSimpleLoopLineage() {
    // Lineage for D1 -> P1 -> D2 -> P2 -> D3 -> P3 -> D4
    // |                 |
    // |                 V
    // |<-----------------
    // 
    TransactionRunner transactionRunner = getInjector().getInstance(TransactionRunner.class);
    LineageStoreReader lineageReader = new DefaultLineageStoreReader(transactionRunner);
    LineageWriter lineageWriter = new BasicLineageWriter(transactionRunner);
    Store store = getInjector().getInstance(Store.class);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageReader, store);
    // Add access
    addRuns(store, run1, run2, run3, run4, run5);
    // It is okay to use current time here since access time is ignore during assertions
    lineageWriter.addAccess(run1, dataset1, AccessType.READ);
    lineageWriter.addAccess(run1, dataset2, AccessType.WRITE);
    lineageWriter.addAccess(run2, dataset2, AccessType.READ);
    lineageWriter.addAccess(run2, dataset1, AccessType.WRITE);
    lineageWriter.addAccess(run2, dataset3, AccessType.WRITE);
    lineageWriter.addAccess(run3, dataset3, AccessType.READ, null);
    lineageWriter.addAccess(run3, dataset4, AccessType.WRITE, null);
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1)), new Relation(dataset1, program2, AccessType.WRITE, twillRunId(run2)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2)), new Relation(dataset4, program3, AccessType.WRITE, twillRunId(run3)), new Relation(dataset3, program3, AccessType.READ, twillRunId(run3))));
    // Lineage for D1
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
    // Lineage for D2
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset2, 500, 20000, 100));
    // Lineage for D1 for one level D1 -> P1 -> D2 -> P2 -> D3
    // |                 |
    // |                 V
    // |<-----------------
    // 
    Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, 20000, 1);
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1)), new Relation(dataset1, program2, AccessType.WRITE, twillRunId(run2)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2))), oneLevelLineage.getRelations());
}
Also used : Relation(io.cdap.cdap.data2.metadata.lineage.Relation) BasicLineageWriter(io.cdap.cdap.data2.metadata.writer.BasicLineageWriter) LineageWriter(io.cdap.cdap.data2.metadata.writer.LineageWriter) TransactionRunner(io.cdap.cdap.spi.data.transaction.TransactionRunner) DefaultLineageStoreReader(io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader) LineageStoreReader(io.cdap.cdap.data2.metadata.lineage.LineageStoreReader) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) DefaultLineageStoreReader(io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader) Store(io.cdap.cdap.app.store.Store) BasicLineageWriter(io.cdap.cdap.data2.metadata.writer.BasicLineageWriter) Test(org.junit.Test)

Example 20 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by cdapio.

the class LineageAdminTest method testSimpleLineage.

@Test
public void testSimpleLineage() {
    // Lineage for D3 -> P2 -> D2 -> P1 -> D1
    TransactionRunner transactionRunner = getInjector().getInstance(TransactionRunner.class);
    LineageStoreReader lineageReader = new DefaultLineageStoreReader(transactionRunner);
    LineageWriter lineageWriter = new BasicLineageWriter(transactionRunner);
    Store store = getInjector().getInstance(Store.class);
    LineageAdmin lineageAdmin = new LineageAdmin(lineageReader, store);
    // Add accesses for D3 -> P2 -> D2 -> P1 -> D1 <-> P3
    // We need to use current time here as metadata store stores access time using current time
    ProgramRunId run1 = program1.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run2 = program2.run(RunIds.generate(System.currentTimeMillis()).getId());
    ProgramRunId run3 = program3.run(RunIds.generate(System.currentTimeMillis()).getId());
    addRuns(store, run1, run2, run3);
    // It is okay to use current time here since access time is ignore during assertions
    lineageWriter.addAccess(run1, dataset1, AccessType.UNKNOWN);
    lineageWriter.addAccess(run1, dataset1, AccessType.WRITE);
    lineageWriter.addAccess(run1, dataset2, AccessType.READ);
    lineageWriter.addAccess(run2, dataset2, AccessType.WRITE);
    lineageWriter.addAccess(run2, dataset3, AccessType.READ);
    lineageWriter.addAccess(run3, dataset1, AccessType.UNKNOWN, null);
    // The UNKNOWN access type will get filtered out if there is READ/WRITE. It will be preserved if it is the
    // only access type
    Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1)), new Relation(dataset2, program1, AccessType.READ, twillRunId(run1)), new Relation(dataset2, program2, AccessType.WRITE, twillRunId(run2)), new Relation(dataset3, program2, AccessType.READ, twillRunId(run2)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))));
    // Lineage for D1
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 100));
    // Lineage for D2
    Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset2, 500, System.currentTimeMillis() + 10000, 100));
    // Lineage for D1 for one level should be D2 -> P1 -> D1 <-> P3
    Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, System.currentTimeMillis() + 10000, 1);
    Assert.assertEquals(ImmutableSet.of(new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1)), new Relation(dataset2, program1, AccessType.READ, twillRunId(run1)), new Relation(dataset1, program3, AccessType.UNKNOWN, twillRunId(run3))), oneLevelLineage.getRelations());
    // Assert that in a different namespace both lineage and metadata should be empty
    NamespaceId customNamespace = new NamespaceId("custom_namespace");
    DatasetId customDataset1 = customNamespace.dataset(dataset1.getEntityName());
    Assert.assertEquals(new Lineage(ImmutableSet.of()), lineageAdmin.computeLineage(customDataset1, 500, System.currentTimeMillis() + 10000, 100));
}
Also used : Relation(io.cdap.cdap.data2.metadata.lineage.Relation) BasicLineageWriter(io.cdap.cdap.data2.metadata.writer.BasicLineageWriter) LineageWriter(io.cdap.cdap.data2.metadata.writer.LineageWriter) TransactionRunner(io.cdap.cdap.spi.data.transaction.TransactionRunner) DefaultLineageStoreReader(io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader) LineageStoreReader(io.cdap.cdap.data2.metadata.lineage.LineageStoreReader) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) DefaultLineageStoreReader(io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader) Store(io.cdap.cdap.app.store.Store) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) BasicLineageWriter(io.cdap.cdap.data2.metadata.writer.BasicLineageWriter) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)45 Relation (io.cdap.cdap.data2.metadata.lineage.Relation)38 Lineage (io.cdap.cdap.data2.metadata.lineage.Lineage)26 Relation (co.cask.cdap.data2.metadata.lineage.Relation)20 Store (io.cdap.cdap.app.store.Store)20 DefaultLineageStoreReader (io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader)20 LineageStoreReader (io.cdap.cdap.data2.metadata.lineage.LineageStoreReader)20 BasicLineageWriter (io.cdap.cdap.data2.metadata.writer.BasicLineageWriter)16 LineageWriter (io.cdap.cdap.data2.metadata.writer.LineageWriter)16 TransactionRunner (io.cdap.cdap.spi.data.transaction.TransactionRunner)16 RunId (org.apache.twill.api.RunId)15 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)14 DatasetId (io.cdap.cdap.proto.id.DatasetId)14 ProgramId (io.cdap.cdap.proto.id.ProgramId)14 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)14 HashSet (java.util.HashSet)12 Lineage (co.cask.cdap.data2.metadata.lineage.Lineage)10 CollapsedRelation (io.cdap.cdap.data2.metadata.lineage.CollapsedRelation)10 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)9 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)8