use of co.cask.cdap.data2.registry.UsageRegistry in project cdap by caskdata.
the class UsageRegistryTest method testUsageRegistry.
@Test
public void testUsageRegistry() {
// instantiate a usage registry
UsageRegistry registry = new DefaultUsageRegistry(new TransactionExecutorFactory() {
@Override
public TransactionExecutor createExecutor(Iterable<TransactionAware> iterable) {
return dsFrameworkUtil.newInMemoryTransactionExecutor(iterable);
}
}, new ForwardingDatasetFramework(dsFrameworkUtil.getFramework()) {
@Nullable
@Override
public <T extends Dataset> T getDataset(DatasetId datasetInstanceId, Map<String, String> arguments, @Nullable ClassLoader classLoader) throws DatasetManagementException, IOException {
T t = super.getDataset(datasetInstanceId, arguments, classLoader);
if (t instanceof UsageDataset) {
@SuppressWarnings("unchecked") T t1 = (T) new WrappedUsageDataset((UsageDataset) t);
return t1;
}
return t;
}
});
// register usage for a stream and a dataset for single and multiple "owners", including a non-program
registry.register(flow11, datasetInstance1);
registry.register(flow12, stream1);
registry.registerAll(ImmutableList.of(flow21, flow22), datasetInstance2);
registry.registerAll(ImmutableList.of(flow21, flow22), stream1);
int count = WrappedUsageDataset.registerCount;
// validate usage
Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
// register datasets again
registry.register(flow11, datasetInstance1);
registry.registerAll(ImmutableList.of(flow21, flow22), datasetInstance2);
// validate that this does re-register previous usages (DefaultUsageRegistry no longer avoids re-registration)
count += 3;
Assert.assertEquals(count, WrappedUsageDataset.registerCount);
// validate usage
Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
// unregister app
registry.unregister(flow11.getParent());
// validate usage for that app is gone
Assert.assertEquals(ImmutableSet.of(), registry.getDatasets(flow11));
Assert.assertEquals(ImmutableSet.of(), registry.getStreams(flow12));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
Assert.assertEquals(ImmutableSet.of(), registry.getPrograms(datasetInstance1));
Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(stream1));
// register application 1 again
registry.register(flow11, datasetInstance1);
registry.register(flow12, stream1);
// validate it was re-registered
Assert.assertEquals(ImmutableSet.of(datasetInstance1), registry.getDatasets(flow11));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow12));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow21));
Assert.assertEquals(ImmutableSet.of(datasetInstance2), registry.getDatasets(flow22));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow21));
Assert.assertEquals(ImmutableSet.of(stream1), registry.getStreams(flow22));
Assert.assertEquals(ImmutableSet.of(flow11), registry.getPrograms(datasetInstance1));
Assert.assertEquals(ImmutableSet.of(flow21, flow22), registry.getPrograms(datasetInstance2));
Assert.assertEquals(ImmutableSet.of(flow12, flow21, flow22), registry.getPrograms(stream1));
// validate that this actually re-registered previous usages (through code in wrapped usage dataset)
Assert.assertEquals(count + 2, WrappedUsageDataset.registerCount);
}
use of co.cask.cdap.data2.registry.UsageRegistry in project cdap by caskdata.
the class MetadataSubscriberServiceTest method testSubscriber.
@Test
public void testSubscriber() throws InterruptedException, ExecutionException, TimeoutException {
LineageStoreReader lineageReader = getInjector().getInstance(LineageStoreReader.class);
ProgramRunId run1 = service1.run(RunIds.generate());
// Try to read lineage, which should be empty since we haven't start the MetadataSubscriberService yet.
Set<NamespacedEntityId> entities = lineageReader.getEntitiesForRun(run1);
Assert.assertTrue(entities.isEmpty());
// Write out some lineage information
LineageWriter lineageWriter = getInjector().getInstance(MessagingLineageWriter.class);
lineageWriter.addAccess(run1, dataset1, AccessType.READ);
lineageWriter.addAccess(run1, dataset2, AccessType.WRITE);
// Write the field level lineage
FieldLineageWriter fieldLineageWriter = getInjector().getInstance(MessagingLineageWriter.class);
ProgramRunId spark1Run1 = spark1.run(RunIds.generate(100));
ReadOperation read = new ReadOperation("read", "some read", EndPoint.of("ns", "endpoint1"), "offset", "body");
TransformOperation parse = new TransformOperation("parse", "parse body", Collections.singletonList(InputField.of("read", "body")), "name", "address");
WriteOperation write = new WriteOperation("write", "write data", EndPoint.of("ns", "endpoint2"), Arrays.asList(InputField.of("read", "offset"), InputField.of("parse", "name"), InputField.of("parse", "address")));
List<Operation> operations = new ArrayList<>();
operations.add(read);
operations.add(write);
operations.add(parse);
FieldLineageInfo info1 = new FieldLineageInfo(operations);
fieldLineageWriter.write(spark1Run1, info1);
ProgramRunId spark1Run2 = spark1.run(RunIds.generate(200));
fieldLineageWriter.write(spark1Run2, info1);
List<Operation> operations2 = new ArrayList<>();
operations2.add(read);
operations2.add(parse);
TransformOperation normalize = new TransformOperation("normalize", "normalize address", Collections.singletonList(InputField.of("parse", "address")), "address");
operations2.add(normalize);
WriteOperation anotherWrite = new WriteOperation("anotherwrite", "write data", EndPoint.of("ns", "endpoint2"), Arrays.asList(InputField.of("read", "offset"), InputField.of("parse", "name"), InputField.of("normalize", "address")));
operations2.add(anotherWrite);
FieldLineageInfo info2 = new FieldLineageInfo(operations2);
ProgramRunId spark1Run3 = spark1.run(RunIds.generate(300));
fieldLineageWriter.write(spark1Run3, info2);
// Emit some usages
UsageWriter usageWriter = getInjector().getInstance(MessagingUsageWriter.class);
usageWriter.register(spark1, dataset1);
usageWriter.registerAll(Collections.singleton(spark1), dataset3);
// Verifies lineage has been written
Set<NamespacedEntityId> expectedLineage = new HashSet<>(Arrays.asList(run1.getParent(), dataset1, dataset2));
Tasks.waitFor(true, () -> expectedLineage.equals(lineageReader.getEntitiesForRun(run1)), 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
// There shouldn't be any lineage for the "spark1" program, as only usage has been emitted.
Assert.assertTrue(lineageReader.getRelations(spark1, 0L, Long.MAX_VALUE, x -> true).isEmpty());
FieldLineageReader fieldLineageReader = getInjector().getInstance(FieldLineageReader.class);
Set<Operation> expectedOperations = new HashSet<>();
expectedOperations.add(read);
expectedOperations.add(anotherWrite);
List<ProgramRunOperations> expected = new ArrayList<>();
// Descending order of program execution
expected.add(new ProgramRunOperations(Collections.singleton(spark1Run3), expectedOperations));
expectedOperations = new HashSet<>();
expectedOperations.add(read);
expectedOperations.add(write);
expected.add(new ProgramRunOperations(new HashSet<>(Arrays.asList(spark1Run1, spark1Run2)), expectedOperations));
EndPointField endPointField = new EndPointField(EndPoint.of("ns", "endpoint2"), "offset");
Tasks.waitFor(expected, () -> fieldLineageReader.getIncomingOperations(endPointField, 1L, Long.MAX_VALUE - 1), 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
// Verifies usage has been written
Set<EntityId> expectedUsage = new HashSet<>(Arrays.asList(dataset1, dataset3));
UsageRegistry usageRegistry = getInjector().getInstance(UsageRegistry.class);
Tasks.waitFor(true, () -> expectedUsage.equals(usageRegistry.getDatasets(spark1)), 10, TimeUnit.SECONDS, 100, TimeUnit.MILLISECONDS);
}
use of co.cask.cdap.data2.registry.UsageRegistry in project cdap by caskdata.
the class RemoteRuntimeUsageRegistryTest method setup.
@BeforeClass
public static void setup() throws Exception {
Injector injector = getInjector();
usageRegistry = injector.getInstance(UsageRegistry.class);
runtimeUsageRegistry = injector.getInstance(RemoteRuntimeUsageRegistry.class);
}
Aggregations