Search in sources :

Example 86 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class LineageDatasetTest method testMultipleRelations.

@Test
public void testMultipleRelations() throws Exception {
    final LineageDataset lineageDataset = getLineageDataset("testMultipleRelations");
    Assert.assertNotNull(lineageDataset);
    TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) lineageDataset);
    final RunId runId1 = RunIds.generate(10000);
    final RunId runId2 = RunIds.generate(20000);
    final RunId runId3 = RunIds.generate(30000);
    final RunId runId4 = RunIds.generate(40000);
    final DatasetId datasetInstance1 = NamespaceId.DEFAULT.dataset("dataset1");
    final DatasetId datasetInstance2 = NamespaceId.DEFAULT.dataset("dataset2");
    final StreamId stream1 = NamespaceId.DEFAULT.stream("stream1");
    final StreamId stream2 = NamespaceId.DEFAULT.stream("stream2");
    final ProgramId program1 = NamespaceId.DEFAULT.app("app1").flow("flow1");
    final FlowletId flowlet1 = program1.flowlet("flowlet1");
    final ProgramId program2 = NamespaceId.DEFAULT.app("app2").worker("worker2");
    final ProgramId program3 = NamespaceId.DEFAULT.app("app3").service("service3");
    final ProgramRunId run11 = program1.run(runId1.getId());
    final ProgramRunId run22 = program2.run(runId2.getId());
    final ProgramRunId run23 = program2.run(runId3.getId());
    final ProgramRunId run34 = program3.run(runId4.getId());
    final long now = System.currentTimeMillis();
    final long run11Data1AccessTime = now;
    final long run22Data2AccessTime = now + 1;
    final long run22Stream1AccessTime = now + 2;
    final long run23Stream2AccessTime = now + 1;
    final long run23Data2AccessTime = now + 3;
    //noinspection UnnecessaryLocalVariable
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            lineageDataset.addAccess(run11, datasetInstance1, AccessType.READ, run11Data1AccessTime, flowlet1);
            lineageDataset.addAccess(run22, datasetInstance2, AccessType.WRITE, run22Data2AccessTime);
            lineageDataset.addAccess(run22, stream1, AccessType.READ, run22Stream1AccessTime);
            lineageDataset.addAccess(run23, stream2, AccessType.READ, run23Stream2AccessTime);
            lineageDataset.addAccess(run23, datasetInstance2, AccessType.WRITE, run23Data2AccessTime);
            lineageDataset.addAccess(run34, datasetInstance2, AccessType.READ_WRITE, System.currentTimeMillis());
            lineageDataset.addAccess(run34, stream2, AccessType.UNKNOWN, System.currentTimeMillis());
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Assert.assertEquals(ImmutableSet.of(new Relation(datasetInstance1, program1, AccessType.READ, runId1, ImmutableSet.of(flowlet1))), lineageDataset.getRelations(datasetInstance1, 0, 100000, Predicates.<Relation>alwaysTrue()));
            Assert.assertEquals(ImmutableSet.of(new Relation(datasetInstance2, program2, AccessType.WRITE, runId2), new Relation(datasetInstance2, program2, AccessType.WRITE, runId3), new Relation(datasetInstance2, program3, AccessType.READ_WRITE, runId4)), lineageDataset.getRelations(datasetInstance2, 0, 100000, Predicates.<Relation>alwaysTrue()));
            Assert.assertEquals(ImmutableSet.of(new Relation(stream1, program2, AccessType.READ, runId2)), lineageDataset.getRelations(stream1, 0, 100000, Predicates.<Relation>alwaysTrue()));
            Assert.assertEquals(ImmutableSet.of(new Relation(stream2, program2, AccessType.READ, runId3), new Relation(stream2, program3, AccessType.UNKNOWN, runId4)), lineageDataset.getRelations(stream2, 0, 100000, Predicates.<Relation>alwaysTrue()));
            Assert.assertEquals(ImmutableSet.of(new Relation(datasetInstance2, program2, AccessType.WRITE, runId2), new Relation(stream1, program2, AccessType.READ, runId2), new Relation(datasetInstance2, program2, AccessType.WRITE, runId3), new Relation(stream2, program2, AccessType.READ, runId3)), lineageDataset.getRelations(program2, 0, 100000, Predicates.<Relation>alwaysTrue()));
            // Reduced time range
            Assert.assertEquals(ImmutableSet.of(new Relation(datasetInstance2, program2, AccessType.WRITE, runId2), new Relation(datasetInstance2, program2, AccessType.WRITE, runId3)), lineageDataset.getRelations(datasetInstance2, 0, 35000, Predicates.<Relation>alwaysTrue()));
            Assert.assertEquals(toSet(program1, datasetInstance1), lineageDataset.getEntitiesForRun(run11));
            Assert.assertEquals(ImmutableList.of(run11Data1AccessTime), lineageDataset.getAccessTimesForRun(run11));
            Assert.assertEquals(toSet(program2, datasetInstance2, stream1), lineageDataset.getEntitiesForRun(run22));
            Assert.assertEquals(ImmutableList.of(run22Data2AccessTime, run22Stream1AccessTime), lineageDataset.getAccessTimesForRun(run22));
            Assert.assertEquals(toSet(program2, datasetInstance2, stream2), lineageDataset.getEntitiesForRun(run23));
            Assert.assertEquals(ImmutableList.of(run23Data2AccessTime, run23Stream2AccessTime), lineageDataset.getAccessTimesForRun(run23));
            Assert.assertEquals(toSet(program3, datasetInstance2, stream2), lineageDataset.getEntitiesForRun(run34));
        }
    });
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) TransactionExecutor(org.apache.tephra.TransactionExecutor) ProgramId(co.cask.cdap.proto.id.ProgramId) DatasetId(co.cask.cdap.proto.id.DatasetId) FlowletId(co.cask.cdap.proto.id.FlowletId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Test(org.junit.Test)

Example 87 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class StreamHandlerTest method testNamespacedStreamEvents.

@Test
public void testNamespacedStreamEvents() throws Exception {
    // Create two streams with the same name, in different namespaces.
    String streamName = "testNamespacedEvents";
    StreamId streamId1 = new StreamId(TEST_NAMESPACE1, streamName);
    StreamId streamId2 = new StreamId(TEST_NAMESPACE2, streamName);
    createStream(streamId1);
    createStream(streamId2);
    List<String> eventsSentToStream1 = Lists.newArrayList();
    // Enqueue 10 entries to the stream in the first namespace
    for (int i = 0; i < 10; ++i) {
        String body = streamId1.getNamespace() + i;
        sendEvent(streamId1, body);
        eventsSentToStream1.add(body);
    }
    List<String> eventsSentToStream2 = Lists.newArrayList();
    // Enqueue only 5 entries to the stream in the second namespace, decrementing the value each time
    for (int i = 0; i > -5; --i) {
        String body = streamId1.getNamespace() + i;
        sendEvent(streamId2, body);
        eventsSentToStream2.add(body);
    }
    // Test that even though the stream names are the same, the events ingested into the individual streams
    // are exactly what are fetched from the individual streams.
    List<String> eventsFetchedFromStream1 = fetchEvents(streamId1);
    Assert.assertEquals(eventsSentToStream1, eventsFetchedFromStream1);
    List<String> eventsFetchedFromStream2 = fetchEvents(streamId2);
    Assert.assertEquals(eventsSentToStream2, eventsFetchedFromStream2);
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) Test(org.junit.Test)

Example 88 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class StreamHandlerTest method testNamespacedMetrics.

@Test
public void testNamespacedMetrics() throws Exception {
    // Create two streams with the same name, in different namespaces.
    String streamName = "testNamespacedStreamMetrics";
    StreamId streamId1 = new StreamId(TEST_NAMESPACE1, streamName);
    StreamId streamId2 = new StreamId(TEST_NAMESPACE2, streamName);
    createStream(streamId1);
    createStream(streamId2);
    // Enqueue 10 entries to the stream in the first namespace
    for (int i = 0; i < 10; ++i) {
        sendEvent(streamId1, Integer.toString(i));
    }
    // Enqueue only 2 entries to the stream in the second namespace
    for (int i = 0; i < 2; ++i) {
        sendEvent(streamId2, Integer.toString(i));
    }
    // Check metrics to verify that the metric for events processed is specific to each stream
    checkEventsProcessed(streamId1, 10L, 10);
    checkEventsProcessed(streamId2, 2L, 10);
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) Test(org.junit.Test)

Example 89 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class StreamHandlerTest method testStreamCreateInNonexistentNamespace.

@Test
public void testStreamCreateInNonexistentNamespace() throws Exception {
    NamespaceId originallyNonExistentNamespace = new NamespaceId("originallyNonExistentNamespace");
    StreamId streamId = originallyNonExistentNamespace.stream("streamName");
    HttpResponse response = createStream(streamId, 404);
    Assert.assertEquals(HttpResponseStatus.NOT_FOUND.getCode(), response.getResponseCode());
    // once the namespace exists, the same stream create works.
    namespaceAdmin.create(new NamespaceMeta.Builder().setName(originallyNonExistentNamespace).build());
    createStream(streamId);
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) NamespaceMeta(co.cask.cdap.proto.NamespaceMeta) HttpResponse(co.cask.common.http.HttpResponse) NamespaceId(co.cask.cdap.proto.id.NamespaceId) Test(org.junit.Test)

Example 90 with StreamId

use of co.cask.cdap.proto.id.StreamId in project cdap by caskdata.

the class LineageTestRun method testAllProgramsLineage.

@Test
public void testAllProgramsLineage() throws Exception {
    NamespaceId namespace = new NamespaceId("testAllProgramsLineage");
    ApplicationId app = namespace.app(AllProgramsApp.NAME);
    ProgramId flow = app.flow(AllProgramsApp.NoOpFlow.NAME);
    ProgramId mapreduce = app.mr(AllProgramsApp.NoOpMR.NAME);
    ProgramId mapreduce2 = app.mr(AllProgramsApp.NoOpMR2.NAME);
    ProgramId spark = app.spark(AllProgramsApp.NoOpSpark.NAME);
    ProgramId service = app.service(AllProgramsApp.NoOpService.NAME);
    ProgramId worker = app.worker(AllProgramsApp.NoOpWorker.NAME);
    ProgramId workflow = app.workflow(AllProgramsApp.NoOpWorkflow.NAME);
    DatasetId dataset = namespace.dataset(AllProgramsApp.DATASET_NAME);
    DatasetId dataset2 = namespace.dataset(AllProgramsApp.DATASET_NAME2);
    DatasetId dataset3 = namespace.dataset(AllProgramsApp.DATASET_NAME3);
    StreamId stream = namespace.stream(AllProgramsApp.STREAM_NAME);
    namespaceClient.create(new NamespaceMeta.Builder().setName(namespace.getNamespace()).build());
    try {
        appClient.deploy(namespace, createAppJarFile(AllProgramsApp.class));
        // Add metadata
        ImmutableSet<String> sparkTags = ImmutableSet.of("spark-tag1", "spark-tag2");
        addTags(spark, sparkTags);
        Assert.assertEquals(sparkTags, getTags(spark, MetadataScope.USER));
        ImmutableSet<String> workerTags = ImmutableSet.of("worker-tag1");
        addTags(worker, workerTags);
        Assert.assertEquals(workerTags, getTags(worker, MetadataScope.USER));
        ImmutableMap<String, String> datasetProperties = ImmutableMap.of("data-key1", "data-value1");
        addProperties(dataset, datasetProperties);
        Assert.assertEquals(datasetProperties, getProperties(dataset, MetadataScope.USER));
        // Start all programs
        RunId flowRunId = runAndWait(flow);
        RunId mrRunId = runAndWait(mapreduce);
        RunId mrRunId2 = runAndWait(mapreduce2);
        RunId sparkRunId = runAndWait(spark);
        runAndWait(workflow);
        RunId workflowMrRunId = getRunId(mapreduce, mrRunId);
        RunId serviceRunId = runAndWait(service);
        // Worker makes a call to service to make it access datasets,
        // hence need to make sure service starts before worker, and stops after it.
        RunId workerRunId = runAndWait(worker);
        // Wait for programs to finish
        waitForStop(flow, true);
        waitForStop(mapreduce, false);
        waitForStop(mapreduce2, false);
        waitForStop(spark, false);
        waitForStop(workflow, false);
        waitForStop(worker, false);
        waitForStop(service, true);
        long now = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
        long oneHour = TimeUnit.HOURS.toSeconds(1);
        // Fetch dataset lineage
        LineageRecord lineage = fetchLineage(dataset, now - oneHour, now + oneHour, toSet(CollapseType.ACCESS), 10);
        // dataset is accessed by all programs
        LineageRecord expected = LineageSerializer.toLineageRecord(now - oneHour, now + oneHour, new Lineage(ImmutableSet.of(// Dataset access
        new Relation(dataset, flow, AccessType.UNKNOWN, flowRunId, toSet(flow.flowlet(AllProgramsApp.A.NAME))), new Relation(dataset, mapreduce, AccessType.WRITE, mrRunId), new Relation(dataset, mapreduce2, AccessType.WRITE, mrRunId2), new Relation(dataset2, mapreduce2, AccessType.READ, mrRunId2), new Relation(dataset, spark, AccessType.READ, sparkRunId), new Relation(dataset2, spark, AccessType.WRITE, sparkRunId), new Relation(dataset3, spark, AccessType.READ, sparkRunId), new Relation(dataset3, spark, AccessType.WRITE, sparkRunId), new Relation(dataset, mapreduce, AccessType.WRITE, workflowMrRunId), new Relation(dataset, service, AccessType.WRITE, serviceRunId), new Relation(dataset, worker, AccessType.WRITE, workerRunId), // Stream access
        new Relation(stream, flow, AccessType.READ, flowRunId, ImmutableSet.of(flow.flowlet(AllProgramsApp.A.NAME))), new Relation(stream, mapreduce, AccessType.READ, mrRunId), new Relation(stream, spark, AccessType.READ, sparkRunId), new Relation(stream, mapreduce, AccessType.READ, workflowMrRunId), new Relation(stream, worker, AccessType.WRITE, workerRunId))), toSet(CollapseType.ACCESS));
        Assert.assertEquals(expected, lineage);
        // Fetch stream lineage
        lineage = fetchLineage(stream, now - oneHour, now + oneHour, toSet(CollapseType.ACCESS), 10);
        // stream too is accessed by all programs
        Assert.assertEquals(expected, lineage);
        // Assert metadata
        // Id.Flow needs conversion to Id.Program JIRA - CDAP-3658
        Assert.assertEquals(toSet(new MetadataRecord(app, MetadataScope.USER, emptyMap(), emptySet()), new MetadataRecord(flow, MetadataScope.USER, emptyMap(), emptySet()), new MetadataRecord(dataset, MetadataScope.USER, datasetProperties, emptySet()), new MetadataRecord(stream, MetadataScope.USER, emptyMap(), emptySet())), fetchRunMetadata(flow.run(flowRunId.getId())));
        // Id.Worker needs conversion to Id.Program JIRA - CDAP-3658
        ProgramId programForWorker = new ProgramId(worker.getNamespace(), worker.getApplication(), worker.getType(), worker.getEntityName());
        Assert.assertEquals(toSet(new MetadataRecord(app, MetadataScope.USER, emptyMap(), emptySet()), new MetadataRecord(programForWorker, MetadataScope.USER, emptyMap(), workerTags), new MetadataRecord(dataset, MetadataScope.USER, datasetProperties, emptySet()), new MetadataRecord(stream, MetadataScope.USER, emptyMap(), emptySet())), fetchRunMetadata(worker.run(workerRunId.getId())));
        // Id.Spark needs conversion to Id.Program JIRA - CDAP-3658
        ProgramId programForSpark = new ProgramId(spark.getNamespace(), spark.getApplication(), spark.getType(), spark.getEntityName());
        Assert.assertEquals(toSet(new MetadataRecord(app, MetadataScope.USER, emptyMap(), emptySet()), new MetadataRecord(programForSpark, MetadataScope.USER, emptyMap(), sparkTags), new MetadataRecord(dataset, MetadataScope.USER, datasetProperties, emptySet()), new MetadataRecord(dataset2, MetadataScope.USER, emptyMap(), emptySet()), new MetadataRecord(dataset3, MetadataScope.USER, emptyMap(), emptySet()), new MetadataRecord(stream, MetadataScope.USER, emptyMap(), emptySet())), fetchRunMetadata(spark.run(sparkRunId.getId())));
    } finally {
        namespaceClient.delete(namespace);
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) AllProgramsApp(co.cask.cdap.client.app.AllProgramsApp) ProgramId(co.cask.cdap.proto.id.ProgramId) DatasetId(co.cask.cdap.proto.id.DatasetId) Relation(co.cask.cdap.data2.metadata.lineage.Relation) LineageRecord(co.cask.cdap.proto.metadata.lineage.LineageRecord) NamespaceMeta(co.cask.cdap.proto.NamespaceMeta) NamespaceId(co.cask.cdap.proto.id.NamespaceId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) RunId(org.apache.twill.api.RunId) MetadataRecord(co.cask.cdap.proto.metadata.MetadataRecord) Test(org.junit.Test)

Aggregations

StreamId (co.cask.cdap.proto.id.StreamId)166 Test (org.junit.Test)88 DatasetId (co.cask.cdap.proto.id.DatasetId)33 ProgramId (co.cask.cdap.proto.id.ProgramId)30 NamespaceId (co.cask.cdap.proto.id.NamespaceId)27 Path (javax.ws.rs.Path)27 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)24 ApplicationId (co.cask.cdap.proto.id.ApplicationId)22 IOException (java.io.IOException)20 StreamProperties (co.cask.cdap.proto.StreamProperties)17 FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)16 StreamViewId (co.cask.cdap.proto.id.StreamViewId)16 Location (org.apache.twill.filesystem.Location)15 StreamConfig (co.cask.cdap.data2.transaction.stream.StreamConfig)12 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)12 StreamAdmin (co.cask.cdap.data2.transaction.stream.StreamAdmin)11 ViewSpecification (co.cask.cdap.proto.ViewSpecification)10 MetadataSearchResultRecord (co.cask.cdap.proto.metadata.MetadataSearchResultRecord)10 Action (co.cask.cdap.proto.security.Action)10 GET (javax.ws.rs.GET)10