Search in sources :

Example 6 with ProgramRunOperations

use of io.cdap.cdap.proto.metadata.lineage.ProgramRunOperations in project cdap by caskdata.

the class FieldLineageAdmin method processOperations.

private List<ProgramFieldOperationInfo> processOperations(List<ProgramRunOperations> programRunOperations) {
    List<ProgramFieldOperationInfo> result = new ArrayList<>();
    for (ProgramRunOperations entry : programRunOperations) {
        List<ProgramInfo> programInfo = computeProgramInfo(entry.getProgramRunIds());
        List<FieldOperationInfo> fieldOperationInfo = computeFieldOperationInfo(entry.getOperations());
        result.add(new ProgramFieldOperationInfo(programInfo, fieldOperationInfo));
    }
    return result;
}
Also used : ProgramRunOperations(io.cdap.cdap.proto.metadata.lineage.ProgramRunOperations) ProgramInfo(io.cdap.cdap.proto.metadata.lineage.ProgramInfo) ProgramFieldOperationInfo(io.cdap.cdap.proto.metadata.lineage.ProgramFieldOperationInfo) ArrayList(java.util.ArrayList) FieldOperationInfo(io.cdap.cdap.proto.metadata.lineage.FieldOperationInfo) ProgramFieldOperationInfo(io.cdap.cdap.proto.metadata.lineage.ProgramFieldOperationInfo)

Example 7 with ProgramRunOperations

use of io.cdap.cdap.proto.metadata.lineage.ProgramRunOperations in project cdap by caskdata.

the class LineageLimitingTest method testLineageLimiting.

@Test
public void testLineageLimiting() throws InterruptedException, ExecutionException, TimeoutException {
    LineageStoreReader lineageReader = getInjector().getInstance(LineageStoreReader.class);
    ProgramRunId run1 = service1.run(RunIds.generate());
    // Write out some lineage information
    LineageWriter lineageWriter = getInjector().getInstance(MessagingLineageWriter.class);
    lineageWriter.addAccess(run1, dataset1, AccessType.READ);
    lineageWriter.addAccess(run1, dataset2, AccessType.WRITE);
    // Write the field level lineage
    FieldLineageWriter fieldLineageWriter = getInjector().getInstance(MessagingLineageWriter.class);
    ProgramRunId spark1Run1 = spark1.run(RunIds.generate(100));
    ReadOperation read = new ReadOperation("read", "some read", EndPoint.of("ns", "endpoint1"), "offset", "body");
    TransformOperation parse = new TransformOperation("parse", "parse body", Collections.singletonList(InputField.of("read", "body")), "name", "address");
    WriteOperation write = new WriteOperation("write", "write data", EndPoint.of("ns", "endpoint2"), Arrays.asList(InputField.of("read", "offset"), InputField.of("parse", "name"), InputField.of("parse", "address")));
    List<Operation> operations = new ArrayList<>();
    operations.add(read);
    operations.add(write);
    operations.add(parse);
    FieldLineageInfo info1 = new FieldLineageInfo(operations);
    fieldLineageWriter.write(spark1Run1, info1);
    ProgramRunId spark1Run2 = spark1.run(RunIds.generate(200));
    fieldLineageWriter.write(spark1Run2, info1);
    // Verifies lineage has been written as it is smaller than maximum specified size
    Set<NamespacedEntityId> expectedLineage = new HashSet<>(Arrays.asList(run1.getParent(), dataset1, dataset2));
    Tasks.waitFor(true, () -> expectedLineage.equals(lineageReader.getEntitiesForRun(run1)), 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    FieldLineageReader fieldLineageReader = getInjector().getInstance(FieldLineageReader.class);
    // Verifies that empty lineage has been written
    EndPointField endPointField = new EndPointField(EndPoint.of("ns", "endpoint2"), "offset");
    List<ProgramRunOperations> incomingOperations = fieldLineageReader.getIncomingOperations(endPointField, 1L, Long.MAX_VALUE - 1);
    Assert.assertTrue(incomingOperations.isEmpty());
}
Also used : ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) ProgramRunOperations(io.cdap.cdap.proto.metadata.lineage.ProgramRunOperations) FieldLineageReader(io.cdap.cdap.data2.metadata.lineage.field.FieldLineageReader) EndPointField(io.cdap.cdap.data2.metadata.lineage.field.EndPointField) ArrayList(java.util.ArrayList) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) Operation(io.cdap.cdap.api.lineage.field.Operation) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) NamespacedEntityId(io.cdap.cdap.proto.id.NamespacedEntityId) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) FieldLineageWriter(io.cdap.cdap.data2.metadata.writer.FieldLineageWriter) LineageWriter(io.cdap.cdap.data2.metadata.writer.LineageWriter) MessagingLineageWriter(io.cdap.cdap.data2.metadata.writer.MessagingLineageWriter) LineageStoreReader(io.cdap.cdap.data2.metadata.lineage.LineageStoreReader) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) FieldLineageInfo(io.cdap.cdap.data2.metadata.lineage.field.FieldLineageInfo) FieldLineageWriter(io.cdap.cdap.data2.metadata.writer.FieldLineageWriter) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

ProgramRunOperations (io.cdap.cdap.proto.metadata.lineage.ProgramRunOperations)7 Operation (io.cdap.cdap.api.lineage.field.Operation)5 HashSet (java.util.HashSet)5 ReadOperation (io.cdap.cdap.api.lineage.field.ReadOperation)4 WriteOperation (io.cdap.cdap.api.lineage.field.WriteOperation)4 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)4 ArrayList (java.util.ArrayList)4 TransformOperation (io.cdap.cdap.api.lineage.field.TransformOperation)3 Test (org.junit.Test)3 EndPoint (io.cdap.cdap.api.lineage.field.EndPoint)2 LineageStoreReader (io.cdap.cdap.data2.metadata.lineage.LineageStoreReader)2 EndPointField (io.cdap.cdap.data2.metadata.lineage.field.EndPointField)2 FieldLineageInfo (io.cdap.cdap.data2.metadata.lineage.field.FieldLineageInfo)2 FieldLineageReader (io.cdap.cdap.data2.metadata.lineage.field.FieldLineageReader)2 FieldLineageWriter (io.cdap.cdap.data2.metadata.writer.FieldLineageWriter)2 LineageWriter (io.cdap.cdap.data2.metadata.writer.LineageWriter)2 MessagingLineageWriter (io.cdap.cdap.data2.metadata.writer.MessagingLineageWriter)2 NamespacedEntityId (io.cdap.cdap.proto.id.NamespacedEntityId)2 ProgramId (io.cdap.cdap.proto.id.ProgramId)2 JsonSyntaxException (com.google.gson.JsonSyntaxException)1