use of co.cask.cdap.data2.metadata.lineage.LineageStore in project cdap by caskdata.
the class LineageAdminTest method testDirectCycleTwoRuns.
@Test
public void testDirectCycleTwoRuns() throws Exception {
// Lineage for:
//
// D1 -> P1 (run1)
//
// D1 <- P1 (run2)
//
LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testDirectCycleTwoRuns"));
Store store = getInjector().getInstance(Store.class);
MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
// Add accesses
addRuns(store, run1, run2, run3, run4, run5);
// It is okay to use current time here since access time is ignore during assertions
lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
// Write is in a different run
lineageStore.addAccess(new ProgramRunId(run1.getNamespace(), run1.getApplication(), run1.getParent().getType(), run1.getProgram(), run2.getEntityName()), dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run2), toSet(flowlet1))));
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
}
use of co.cask.cdap.data2.metadata.lineage.LineageStore in project cdap by caskdata.
the class LineageAdminTest method testBranchLoopLineage.
@Test
public void testBranchLoopLineage() throws Exception {
// Lineage for:
//
// |-------------------------------------|
// | |
// | |
// | -> D4 -> D5 -> P3 -> D6 -> P5
// | | | ^
// V | | |
// D1 -> P1 -> D2 -> P2 -> D3 ----------->|
// | | |
// | | |
// S1 -->| ---------------> P4 -> D7
LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testBranchLoopLineage"));
Store store = getInjector().getInstance(Store.class);
MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
// Add accesses
addRuns(store, run1, run2, run3, run4, run5);
// It is okay to use current time here since access time is ignore during assertions
lineageStore.addAccess(run1, stream1, AccessType.READ, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run1, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run1, dataset4, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run2, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run2, dataset3, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run2, dataset5, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run3, dataset5, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run3, dataset6, AccessType.WRITE, System.currentTimeMillis());
lineageStore.addAccess(run4, dataset2, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run4, dataset3, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run4, dataset7, AccessType.WRITE, System.currentTimeMillis());
lineageStore.addAccess(run5, dataset3, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run5, dataset6, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run5, dataset1, AccessType.WRITE, System.currentTimeMillis());
Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(stream1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset4, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3), emptySet()), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3), emptySet()), new Relation(dataset2, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset3, program4, AccessType.READ, twillRunId(run4), emptySet()), new Relation(dataset7, program4, AccessType.WRITE, twillRunId(run4), emptySet()), new Relation(dataset3, program5, AccessType.READ, twillRunId(run5), emptySet()), new Relation(dataset6, program5, AccessType.READ, twillRunId(run5), emptySet()), new Relation(dataset1, program5, AccessType.WRITE, twillRunId(run5), emptySet())));
// Lineage for D1
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
// Lineage for D5
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset5, 500, 20000, 100));
// Lineage for D7
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset7, 500, 20000, 100));
// Lineage for S1
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(stream1, 500, 20000, 100));
// Lineage for D5 for one level
// -> D5 -> P3 -> D6
// |
// |
// D2 -> P2 -> D3
Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset5, 500, 20000, 1);
Assert.assertEquals(ImmutableSet.of(new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset5, program3, AccessType.READ, twillRunId(run3), emptySet()), new Relation(dataset6, program3, AccessType.WRITE, twillRunId(run3), emptySet())), oneLevelLineage.getRelations());
// Lineage for S1 for one level
//
// -> D4
// |
// |
// D1 -> P1 -> D2
// |
// |
// S1 -->|
oneLevelLineage = lineageAdmin.computeLineage(stream1, 500, 20000, 1);
Assert.assertEquals(ImmutableSet.of(new Relation(stream1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset4, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1))), oneLevelLineage.getRelations());
}
use of co.cask.cdap.data2.metadata.lineage.LineageStore in project cdap by caskdata.
the class LineageAdminTest method testSimpleLoopLineage.
@Test
public void testSimpleLoopLineage() throws Exception {
// Lineage for D1 -> P1 -> D2 -> P2 -> D3 -> P3 -> D4
// | |
// | V
// |<-----------------
//
LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testSimpleLoopLineage"));
Store store = getInjector().getInstance(Store.class);
MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
// Add access
addRuns(store, run1, run2, run3, run4, run5);
// It is okay to use current time here since access time is ignore during assertions
lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run1, dataset2, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run2, dataset2, AccessType.READ, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run2, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run2, dataset3, AccessType.WRITE, System.currentTimeMillis(), flowlet2);
lineageStore.addAccess(run3, dataset3, AccessType.READ, System.currentTimeMillis());
lineageStore.addAccess(run3, dataset4, AccessType.WRITE, System.currentTimeMillis());
Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset4, program3, AccessType.WRITE, twillRunId(run3), emptySet()), new Relation(dataset3, program3, AccessType.READ, twillRunId(run3), emptySet())));
// Lineage for D1
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
// Lineage for D2
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset2, 500, 20000, 100));
// Lineage for D1 for one level D1 -> P1 -> D2 -> P2 -> D3
// | |
// | V
// |<-----------------
//
Lineage oneLevelLineage = lineageAdmin.computeLineage(dataset1, 500, 20000, 1);
Assert.assertEquals(ImmutableSet.of(new Relation(dataset2, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2)), new Relation(dataset2, program2, AccessType.READ, twillRunId(run2), toSet(flowlet2)), new Relation(dataset3, program2, AccessType.WRITE, twillRunId(run2), toSet(flowlet2))), oneLevelLineage.getRelations());
}
use of co.cask.cdap.data2.metadata.lineage.LineageStore in project cdap by caskdata.
the class BasicLineageWriterTest method testWrites.
@Test
public void testWrites() throws Exception {
Injector injector = getInjector();
MetadataStore metadataStore = injector.getInstance(MetadataStore.class);
LineageStore lineageStore = injector.getInstance(LineageStore.class);
LineageWriter lineageWriter = new BasicLineageWriter(lineageStore);
// Define entities
ProgramId program = new ProgramId(NamespaceId.DEFAULT.getNamespace(), "app", ProgramType.FLOW, "flow");
StreamId stream = new StreamId(NamespaceId.DEFAULT.getNamespace(), "stream");
ProgramRunId run1 = new ProgramRunId(program.getNamespace(), program.getApplication(), program.getType(), program.getEntityName(), RunIds.generate(10000).getId());
ProgramRunId run2 = new ProgramRunId(program.getNamespace(), program.getApplication(), program.getType(), program.getEntityName(), RunIds.generate(20000).getId());
// Tag stream
metadataStore.addTags(MetadataScope.USER, stream, "stag1", "stag2");
// Write access for run1
lineageWriter.addAccess(run1, stream, AccessType.READ);
Assert.assertEquals(ImmutableSet.of(program, stream), lineageStore.getEntitiesForRun(run1));
// Record time to verify duplicate writes.
long beforeSecondTag = System.currentTimeMillis();
// Wait for next millisecond, since access time is stored in milliseconds.
TimeUnit.MILLISECONDS.sleep(1);
// Add another tag to stream
metadataStore.addTags(MetadataScope.USER, stream, "stag3");
// Write access for run1 again
lineageWriter.addAccess(run1, stream, AccessType.READ);
// The write should be no-op, and access time for run1 should not be updated
Assert.assertTrue(lineageStore.getAccessTimesForRun(run1).get(0) < beforeSecondTag);
// However, you can write access for another run
lineageWriter.addAccess(run2, stream, AccessType.READ);
// Assert new access time is written
Assert.assertTrue(lineageStore.getAccessTimesForRun(run2).get(0) >= beforeSecondTag);
}
use of co.cask.cdap.data2.metadata.lineage.LineageStore in project cdap by caskdata.
the class LineageAdminTest method testDirectCycle.
@Test
public void testDirectCycle() throws Exception {
// Lineage for:
//
// D1 <-> P1
//
LineageStore lineageStore = new LineageStore(getTxExecFactory(), getDatasetFramework(), NamespaceId.DEFAULT.dataset("testDirectCycle"));
Store store = getInjector().getInstance(Store.class);
MetadataStore metadataStore = getInjector().getInstance(MetadataStore.class);
LineageAdmin lineageAdmin = new LineageAdmin(lineageStore, store, metadataStore, new NoOpEntityExistenceVerifier());
// Add accesses
addRuns(store, run1, run2, run3, run4, run5);
// It is okay to use current time here since access time is ignore during assertions
lineageStore.addAccess(run1, dataset1, AccessType.READ, System.currentTimeMillis(), flowlet1);
lineageStore.addAccess(run1, dataset1, AccessType.WRITE, System.currentTimeMillis(), flowlet1);
Lineage expectedLineage = new Lineage(ImmutableSet.of(new Relation(dataset1, program1, AccessType.WRITE, twillRunId(run1), toSet(flowlet1)), new Relation(dataset1, program1, AccessType.READ, twillRunId(run1), toSet(flowlet1))));
Assert.assertEquals(expectedLineage, lineageAdmin.computeLineage(dataset1, 500, 20000, 100));
}
Aggregations