Search in sources :

Example 1 with UsageRegistry

use of co.cask.cdap.data2.registry.UsageRegistry in project cdap by caskdata.

the class UsageRegistryTest method testUsageRegistry.

@Test
public void testUsageRegistry() {
    // instantiate a usage registry
    UsageRegistry registry = new DefaultUsageRegistry(new TransactionExecutorFactory() {

        @Override
        public TransactionExecutor createExecutor(Iterable<TransactionAware> iterable) {
            return dsFrameworkUtil.newInMemoryTransactionExecutor(iterable);
        }
    }, new ForwardingDatasetFramework(dsFrameworkUtil.getFramework()) {

        @Nullable
        @Override
        public <T extends Dataset> T getDataset(DatasetId datasetInstanceId, Map<String, String> arguments, @Nullable ClassLoader classLoader) throws DatasetManagementException, IOException {
            T t = super.getDataset(datasetInstanceId, arguments, classLoader);
            if (t instanceof UsageDataset) {
                @SuppressWarnings("unchecked") T t1 = (T) new WrappedUsageDataset((UsageDataset) t);
                return t1;
            }
            return t;
        }
    });
    // register usage for a stream and a dataset for single and multiple "owners", including a non-program
    registry.register(flow11, datasetInstance1);
    registry.register(flow12, stream1);
    registry.registerAll(ImmutableList.of(flow21, flow22), datasetInstance2);
    registry.registerAll(ImmutableList.of(flow21, flow22), stream1);
    int count = WrappedUsageDataset.registerCount;
    // validate usage
    Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
    Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
    Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
    Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
    // register datasets again
    registry.register(flow11, datasetInstance1);
    registry.registerAll(ImmutableList.of(flow21, flow22), datasetInstance2);
    // validate that this does re-register previous usages (DefaultUsageRegistry no longer avoids re-registration)
    count += 3;
    Assert.assertEquals(count, WrappedUsageDataset.registerCount);
    // validate usage
    Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
    Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
    Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
    Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
    // unregister app
    registry.unregister(flow11.getParent());
    // validate usage for that app is gone
    Assert.assertEquals(ImmutableSet.of(), registry.getDatasets(flow11));
    Assert.assertEquals(ImmutableSet.of(), registry.getStreams(flow12));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
    Assert.assertEquals(ImmutableSet.of(), registry.getPrograms(datasetInstance1));
    Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
    Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(stream1));
    // register application 1 again
    registry.register(flow11, datasetInstance1);
    registry.register(flow12, stream1);
    // validate it was re-registered
    Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
    Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
    Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
    Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
    Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
    Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
    // validate that this actually re-registered previous usages (through code in wrapped usage dataset)
    Assert.assertEquals(count + 2, WrappedUsageDataset.registerCount);
}
Also used : TransactionExecutor(org.apache.tephra.TransactionExecutor) IOException(java.io.IOException) TransactionExecutorFactory(org.apache.tephra.TransactionExecutorFactory) DatasetId(co.cask.cdap.proto.id.DatasetId) DatasetManagementException(co.cask.cdap.api.dataset.DatasetManagementException) TransactionAware(org.apache.tephra.TransactionAware) ForwardingDatasetFramework(co.cask.cdap.data2.dataset2.ForwardingDatasetFramework) Nullable(javax.annotation.Nullable) Test(org.junit.Test)

Example 2 with UsageRegistry

use of co.cask.cdap.data2.registry.UsageRegistry in project cdap by caskdata.

the class MetadataSubscriberServiceTest method testSubscriber.

@Test
public void testSubscriber() throws InterruptedException, ExecutionException, TimeoutException {
    LineageStoreReader lineageReader = getInjector().getInstance(LineageStoreReader.class);
    ProgramRunId run1 = service1.run(RunIds.generate());
    // Try to read lineage, which should be empty since we haven't start the MetadataSubscriberService yet.
    Set<NamespacedEntityId> entities = lineageReader.getEntitiesForRun(run1);
    Assert.assertTrue(entities.isEmpty());
    // Write out some lineage information
    LineageWriter lineageWriter = getInjector().getInstance(MessagingLineageWriter.class);
    lineageWriter.addAccess(run1, dataset1, AccessType.READ);
    lineageWriter.addAccess(run1, dataset2, AccessType.WRITE);
    // Write the field level lineage
    FieldLineageWriter fieldLineageWriter = getInjector().getInstance(MessagingLineageWriter.class);
    ProgramRunId spark1Run1 = spark1.run(RunIds.generate(100));
    ReadOperation read = new ReadOperation("read", "some read", EndPoint.of("ns", "endpoint1"), "offset", "body");
    TransformOperation parse = new TransformOperation("parse", "parse body", Collections.singletonList(InputField.of("read", "body")), "name", "address");
    WriteOperation write = new WriteOperation("write", "write data", EndPoint.of("ns", "endpoint2"), Arrays.asList(InputField.of("read", "offset"), InputField.of("parse", "name"), InputField.of("parse", "address")));
    List<Operation> operations = new ArrayList<>();
    operations.add(read);
    operations.add(write);
    operations.add(parse);
    FieldLineageInfo info1 = new FieldLineageInfo(operations);
    fieldLineageWriter.write(spark1Run1, info1);
    ProgramRunId spark1Run2 = spark1.run(RunIds.generate(200));
    fieldLineageWriter.write(spark1Run2, info1);
    List<Operation> operations2 = new ArrayList<>();
    operations2.add(read);
    operations2.add(parse);
    TransformOperation normalize = new TransformOperation("normalize", "normalize address", Collections.singletonList(InputField.of("parse", "address")), "address");
    operations2.add(normalize);
    WriteOperation anotherWrite = new WriteOperation("anotherwrite", "write data", EndPoint.of("ns", "endpoint2"), Arrays.asList(InputField.of("read", "offset"), InputField.of("parse", "name"), InputField.of("normalize", "address")));
    operations2.add(anotherWrite);
    FieldLineageInfo info2 = new FieldLineageInfo(operations2);
    ProgramRunId spark1Run3 = spark1.run(RunIds.generate(300));
    fieldLineageWriter.write(spark1Run3, info2);
    // Emit some usages
    UsageWriter usageWriter = getInjector().getInstance(MessagingUsageWriter.class);
    usageWriter.register(spark1, dataset1);
    usageWriter.registerAll(Collections.singleton(spark1), dataset3);
    // Verifies lineage has been written
    Set<NamespacedEntityId> expectedLineage = new HashSet<>(Arrays.asList(run1.getParent(), dataset1, dataset2));
    Tasks.waitFor(true, () -> expectedLineage.equals(lineageReader.getEntitiesForRun(run1)), 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    // There shouldn't be any lineage for the "spark1" program, as only usage has been emitted.
    Assert.assertTrue(lineageReader.getRelations(spark1, 0L, Long.MAX_VALUE, x -> true).isEmpty());
    FieldLineageReader fieldLineageReader = getInjector().getInstance(FieldLineageReader.class);
    Set<Operation> expectedOperations = new HashSet<>();
    expectedOperations.add(read);
    expectedOperations.add(anotherWrite);
    List<ProgramRunOperations> expected = new ArrayList<>();
    // Descending order of program execution
    expected.add(new ProgramRunOperations(Collections.singleton(spark1Run3), expectedOperations));
    expectedOperations = new HashSet<>();
    expectedOperations.add(read);
    expectedOperations.add(write);
    expected.add(new ProgramRunOperations(new HashSet<>(Arrays.asList(spark1Run1, spark1Run2)), expectedOperations));
    EndPointField endPointField = new EndPointField(EndPoint.of("ns", "endpoint2"), "offset");
    Tasks.waitFor(expected, () -> fieldLineageReader.getIncomingOperations(endPointField, 1L, Long.MAX_VALUE - 1), 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
    // Verifies usage has been written
    Set<EntityId> expectedUsage = new HashSet<>(Arrays.asList(dataset1, dataset3));
    UsageRegistry usageRegistry = getInjector().getInstance(UsageRegistry.class);
    Tasks.waitFor(true, () -> expectedUsage.equals(usageRegistry.getDatasets(spark1)), 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
}
Also used : ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) ProgramRunOperations(io.cdap.cdap.proto.metadata.lineage.ProgramRunOperations) UsageWriter(io.cdap.cdap.data2.registry.UsageWriter) MessagingUsageWriter(io.cdap.cdap.data2.registry.MessagingUsageWriter) FieldLineageReader(io.cdap.cdap.data2.metadata.lineage.field.FieldLineageReader) EndPointField(io.cdap.cdap.data2.metadata.lineage.field.EndPointField) UsageRegistry(io.cdap.cdap.data2.registry.UsageRegistry) ArrayList(java.util.ArrayList) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) MetadataOperation(io.cdap.cdap.data2.metadata.writer.MetadataOperation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) Operation(io.cdap.cdap.api.lineage.field.Operation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) NamespacedEntityId(io.cdap.cdap.proto.id.NamespacedEntityId) EntityId(io.cdap.cdap.proto.id.EntityId) NamespacedEntityId(io.cdap.cdap.proto.id.NamespacedEntityId) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) MessagingLineageWriter(io.cdap.cdap.data2.metadata.writer.MessagingLineageWriter) FieldLineageWriter(io.cdap.cdap.data2.metadata.writer.FieldLineageWriter) LineageWriter(io.cdap.cdap.data2.metadata.writer.LineageWriter) LineageStoreReader(io.cdap.cdap.data2.metadata.lineage.LineageStoreReader) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) FieldLineageInfo(io.cdap.cdap.data2.metadata.lineage.field.FieldLineageInfo) FieldLineageWriter(io.cdap.cdap.data2.metadata.writer.FieldLineageWriter) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with UsageRegistry

use of co.cask.cdap.data2.registry.UsageRegistry in project cdap by caskdata.

the class RemoteRuntimeUsageRegistryTest method setup.

@BeforeClass
public static void setup() throws Exception {
    Injector injector = getInjector();
    usageRegistry = injector.getInstance(UsageRegistry.class);
    runtimeUsageRegistry = injector.getInstance(RemoteRuntimeUsageRegistry.class);
}
Also used : Injector(com.google.inject.Injector) UsageRegistry(co.cask.cdap.data2.registry.UsageRegistry) BeforeClass(org.junit.BeforeClass)

Aggregations

Test (org.junit.Test)2 DatasetManagementException (co.cask.cdap.api.dataset.DatasetManagementException)1 ForwardingDatasetFramework (co.cask.cdap.data2.dataset2.ForwardingDatasetFramework)1 UsageRegistry (co.cask.cdap.data2.registry.UsageRegistry)1 DatasetId (co.cask.cdap.proto.id.DatasetId)1 Injector (com.google.inject.Injector)1 Operation (io.cdap.cdap.api.lineage.field.Operation)1 ReadOperation (io.cdap.cdap.api.lineage.field.ReadOperation)1 TransformOperation (io.cdap.cdap.api.lineage.field.TransformOperation)1 WriteOperation (io.cdap.cdap.api.lineage.field.WriteOperation)1 LineageStoreReader (io.cdap.cdap.data2.metadata.lineage.LineageStoreReader)1 EndPointField (io.cdap.cdap.data2.metadata.lineage.field.EndPointField)1 FieldLineageInfo (io.cdap.cdap.data2.metadata.lineage.field.FieldLineageInfo)1 FieldLineageReader (io.cdap.cdap.data2.metadata.lineage.field.FieldLineageReader)1 FieldLineageWriter (io.cdap.cdap.data2.metadata.writer.FieldLineageWriter)1 LineageWriter (io.cdap.cdap.data2.metadata.writer.LineageWriter)1 MessagingLineageWriter (io.cdap.cdap.data2.metadata.writer.MessagingLineageWriter)1 MetadataOperation (io.cdap.cdap.data2.metadata.writer.MetadataOperation)1 MessagingUsageWriter (io.cdap.cdap.data2.registry.MessagingUsageWriter)1 UsageRegistry (io.cdap.cdap.data2.registry.UsageRegistry)1