Search in sources :

Example 6 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageCollapserTest method testCollapseMulti.

@Test
public void testCollapseMulti() throws Exception {
    Set<Relation> relations = ImmutableSet.of(new Relation(data1, flow1, AccessType.READ, runId1, ImmutableSet.of(flowlet11)), new Relation(data1, flow1, AccessType.WRITE, runId1, ImmutableSet.of(flowlet11)), new Relation(data1, flow1, AccessType.READ, runId1, ImmutableSet.of(flowlet12)), new Relation(data1, flow2, AccessType.READ, runId1, ImmutableSet.of(flowlet11)), new Relation(data1, flow2, AccessType.READ, runId1, ImmutableSet.of(flowlet11)), new Relation(data2, flow1, AccessType.READ, runId1, ImmutableSet.of(flowlet11)), new Relation(data2, flow1, AccessType.READ, runId1, ImmutableSet.of(flowlet11)));
    // Collapse on access
    Assert.assertEquals(toSet(new CollapsedRelation(data1, flow1, toSet(AccessType.READ, AccessType.WRITE), toSet(runId1), toSet(flowlet11)), new CollapsedRelation(data1, flow1, toSet(AccessType.READ), toSet(runId1), toSet(flowlet12)), new CollapsedRelation(data1, flow2, toSet(AccessType.READ), toSet(runId1), toSet(flowlet11)), new CollapsedRelation(data2, flow1, toSet(AccessType.READ), toSet(runId1), toSet(flowlet11))), LineageCollapser.collapseRelations(relations, ImmutableSet.of(CollapseType.ACCESS)));
}
Also used : CollapsedRelation(co.cask.cdap.data2.metadata.lineage.CollapsedRelation) CollapsedRelation(co.cask.cdap.data2.metadata.lineage.CollapsedRelation) Relation(co.cask.cdap.data2.metadata.lineage.Relation) Test(org.junit.Test)

Example 7 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageCollapserTest method testCollapseAccess.

@Test
public void testCollapseAccess() throws Exception {
    Set<Relation> relations = ImmutableSet.of(new Relation(data1, flow1, AccessType.READ, runId1, ImmutableSet.of(flowlet11)), new Relation(data1, flow1, AccessType.WRITE, runId1, ImmutableSet.of(flowlet11)), new Relation(data1, flow1, AccessType.READ, runId1, ImmutableSet.of(flowlet12)));
    // Collapse on access
    Assert.assertEquals(toSet(new CollapsedRelation(data1, flow1, toSet(AccessType.READ, AccessType.WRITE), toSet(runId1), toSet(flowlet11)), new CollapsedRelation(data1, flow1, toSet(AccessType.READ), toSet(runId1), toSet(flowlet12))), LineageCollapser.collapseRelations(relations, ImmutableSet.of(CollapseType.ACCESS)));
}
Also used : CollapsedRelation(co.cask.cdap.data2.metadata.lineage.CollapsedRelation) CollapsedRelation(co.cask.cdap.data2.metadata.lineage.CollapsedRelation) Relation(co.cask.cdap.data2.metadata.lineage.Relation) Test(org.junit.Test)

Example 8 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageAdmin method getRollupRelations.

private Multimap<RelationKey, Relation> getRollupRelations(Multimap<RelationKey, Relation> relations, Map<ProgramRunId, RunRecordMeta> runRecordMap, Map<String, ProgramRunId> workflowIdMap) throws NotFoundException {
    Multimap<RelationKey, Relation> relationsNew = HashMultimap.create();
    for (Map.Entry<RelationKey, Collection<Relation>> entry : relations.asMap().entrySet()) {
        for (Relation relation : entry.getValue()) {
            ProgramRunId workflowProgramRunId = getWorkflowProgramRunid(relation, runRecordMap, workflowIdMap);
            if (workflowProgramRunId == null) {
                relationsNew.put(entry.getKey(), relation);
            } else {
                ProgramId workflowProgramId = new ProgramId(workflowProgramRunId.getNamespace(), workflowProgramRunId.getApplication(), workflowProgramRunId.getType(), workflowProgramRunId.getProgram());
                Relation workflowRelation;
                NamespacedEntityId data = relation.getData();
                if (data instanceof DatasetId) {
                    workflowRelation = new Relation((DatasetId) data, workflowProgramId, relation.getAccess(), RunIds.fromString(workflowProgramRunId.getRun()));
                } else {
                    workflowRelation = new Relation((StreamId) data, workflowProgramId, relation.getAccess(), RunIds.fromString(workflowProgramRunId.getRun()));
                }
                relationsNew.put(entry.getKey(), workflowRelation);
            }
        }
    }
    return relationsNew;
}
Also used : Relation(co.cask.cdap.data2.metadata.lineage.Relation) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) StreamId(co.cask.cdap.proto.id.StreamId) Collection(java.util.Collection) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramId(co.cask.cdap.proto.id.ProgramId) HashMap(java.util.HashMap) Map(java.util.Map) DatasetId(co.cask.cdap.proto.id.DatasetId)

Example 9 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageAdmin method getWorkflowIds.

private Set<String> getWorkflowIds(Multimap<RelationKey, Relation> relations, Map<ProgramRunId, RunRecordMeta> runRecordMap) throws NotFoundException {
    final Set<String> workflowIDs = new HashSet<>();
    for (Relation relation : Iterables.concat(relations.values())) {
        RunRecordMeta runRecord = runRecordMap.get(new ProgramRunId(relation.getProgram().getNamespace(), relation.getProgram().getApplication(), relation.getProgram().getType(), relation.getProgram().getProgram(), relation.getRun().getId()));
        if (runRecord != null && runRecord.getProperties().containsKey("workflowrunid")) {
            String workflowRunId = runRecord.getProperties().get("workflowrunid");
            workflowIDs.add(workflowRunId);
        }
    }
    return workflowIDs;
}
Also used : Relation(co.cask.cdap.data2.metadata.lineage.Relation) RunRecordMeta(co.cask.cdap.internal.app.store.RunRecordMeta) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) HashSet(java.util.HashSet)

Example 10 with Relation

use of io.cdap.cdap.data2.metadata.lineage.Relation in project cdap by caskdata.

the class LineageAdmin method filterAndAddRelations.

/**
 * Filter the relations based on the rollUp flag, if set to true, the method will replace the inner program with
 * the workflow using the map and ignore the local datasets relations. The local dataset always ends with the run
 * id of the workflow. The set of filtered local datasets is returned
 */
private Set<DatasetId> filterAndAddRelations(boolean rollUpWorkflow, Multimap<RelationKey, Relation> relations, Map<ProgramRunId, ProgramRunId> programWorkflowMap, Set<Relation> relationss) {
    Set<DatasetId> localDatasets = new HashSet<>();
    for (Relation relation : relationss) {
        if (rollUpWorkflow && programWorkflowMap.containsKey(relation.getProgramRunId())) {
            ProgramRunId workflowId = programWorkflowMap.get(relation.getProgramRunId());
            // skip the relation for local datasets, local datasets always end with the workflow run id
            DatasetId data = (DatasetId) relation.getData();
            if (data.getDataset().endsWith(workflowId.getRun())) {
                localDatasets.add(data);
                continue;
            }
            relation = new Relation(data, workflowId.getParent(), relation.getAccess(), RunIds.fromString(workflowId.getRun()));
        }
        relations.put(new RelationKey(relation), relation);
    }
    return localDatasets;
}
Also used : Relation(io.cdap.cdap.data2.metadata.lineage.Relation) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) DatasetId(io.cdap.cdap.proto.id.DatasetId) HashSet(java.util.HashSet)

Aggregations

Test (org.junit.Test)45 Relation (io.cdap.cdap.data2.metadata.lineage.Relation)38 Lineage (io.cdap.cdap.data2.metadata.lineage.Lineage)26 Relation (co.cask.cdap.data2.metadata.lineage.Relation)20 Store (io.cdap.cdap.app.store.Store)20 DefaultLineageStoreReader (io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader)20 LineageStoreReader (io.cdap.cdap.data2.metadata.lineage.LineageStoreReader)20 BasicLineageWriter (io.cdap.cdap.data2.metadata.writer.BasicLineageWriter)16 LineageWriter (io.cdap.cdap.data2.metadata.writer.LineageWriter)16 TransactionRunner (io.cdap.cdap.spi.data.transaction.TransactionRunner)16 RunId (org.apache.twill.api.RunId)15 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)14 DatasetId (io.cdap.cdap.proto.id.DatasetId)14 ProgramId (io.cdap.cdap.proto.id.ProgramId)14 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)14 HashSet (java.util.HashSet)12 Lineage (co.cask.cdap.data2.metadata.lineage.Lineage)10 CollapsedRelation (io.cdap.cdap.data2.metadata.lineage.CollapsedRelation)10 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)9 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)8