Search in sources :

Example 11 with DataStreamsConfig

use of co.cask.cdap.etl.proto.v2.DataStreamsConfig in project cdap by caskdata.

the class DataStreamsTest method testWindower.

@Test
public void testWindower() throws Exception {
    /*
     * source --> window(width=10,interval=1) --> aggregator --> filter --> sink
     */
    Schema schema = Schema.recordOf("data", Schema.Field.of("x", Schema.of(Schema.Type.STRING)));
    List<StructuredRecord> input = ImmutableList.of(StructuredRecord.builder(schema).set("x", "abc").build(), StructuredRecord.builder(schema).set("x", "abc").build(), StructuredRecord.builder(schema).set("x", "abc").build());
    String sinkName = "windowOut";
    // source sleeps 1 second between outputs
    DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(schema, input, 1000L))).addStage(new ETLStage("window", Window.getPlugin(30, 1))).addStage(new ETLStage("agg", FieldCountAggregator.getPlugin("x", "string"))).addStage(new ETLStage("filter", StringValueFilterTransform.getPlugin("x", "all"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "window").addConnection("window", "agg").addConnection("agg", "filter").addConnection("filter", "sink").setBatchInterval("1s").build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("WindowerApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.start();
    sparkManager.waitForStatus(true, 10, 1);
    // the sink should contain at least one record with count of 3, and no records with more than 3.
    // less than 3 if the window doesn't contain all 3 records yet, but there should eventually be a window
    // that contains all 3.
    final DataSetManager<Table> outputManager = getDataset(sinkName);
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            outputManager.flush();
            boolean sawThree = false;
            for (StructuredRecord record : MockSink.readOutput(outputManager)) {
                long count = record.get("ct");
                if (count == 3L) {
                    sawThree = true;
                }
                Assert.assertTrue(count <= 3L);
            }
            return sawThree;
        }
    }, 2, TimeUnit.MINUTES);
    sparkManager.stop();
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) TimeoutException(java.util.concurrent.TimeoutException) TopicNotFoundException(co.cask.cdap.api.messaging.TopicNotFoundException) DataStreamsConfig(co.cask.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ApplicationId(co.cask.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 12 with DataStreamsConfig

use of co.cask.cdap.etl.proto.v2.DataStreamsConfig in project cdap by caskdata.

the class PreviewDataStreamsTest method testDataStreamsPreviewRun.

@Test
public void testDataStreamsPreviewRun() throws Exception {
    PreviewManager previewManager = getPreviewManager();
    String sinkTableName = "singleOutput";
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    List<StructuredRecord> records = new ArrayList<>();
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    StructuredRecord recordTest = StructuredRecord.builder(schema).set("name", "test").build();
    records.add(recordSamuel);
    records.add(recordBob);
    records.add(recordTest);
    /*
     * source --> transform -> sink
     */
    DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(schema, records))).addStage(new ETLStage("transform", IdentityTransform.getPlugin())).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setNumOfRecordsPreview(100).setBatchInterval("1s").build();
    // Construct the preview config with the program name and program type.
    PreviewConfig previewConfig = new PreviewConfig(DataStreamsSparkLauncher.NAME, ProgramType.SPARK, Collections.<String, String>emptyMap(), 1);
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig, previewConfig);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
    final PreviewRunner previewRunner = previewManager.getRunner(previewId);
    // Wait for the preview to be running and wait until the records are processed in the sink.
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            Map<String, List<JsonElement>> data = previewRunner.getData("sink");
            return data != null && data.get(DATA_TRACER_PROPERTY) != null && data.get(DATA_TRACER_PROPERTY).size() == 3;
        }
    }, 1, TimeUnit.MINUTES);
    // check data in source and transform
    checkPreviewStore(previewRunner, "source", 3);
    checkPreviewStore(previewRunner, "transform", 3);
    // Wait for the pipeline to be shutdown by timer.
    TimeUnit.MINUTES.sleep(1);
    Tasks.waitFor(PreviewStatus.Status.KILLED_BY_TIMER, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            return previewRunner.getStatus().getStatus();
        }
    }, 1, TimeUnit.MINUTES);
    // Validate the metrics for preview
    validateMetric(3, previewId, "source.records.out", previewRunner);
    validateMetric(3, previewId, "transform.records.in", previewRunner);
    validateMetric(3, previewId, "transform.records.out", previewRunner);
    validateMetric(3, previewId, "sink.records.in", previewRunner);
    validateMetric(3, previewId, "sink.records.out", previewRunner);
    // Check the sink table is not created in the real space.
    DataSetManager<Table> sinkManager = getDataset(sinkTableName);
    Assert.assertNull(sinkManager.get());
}
Also used : PreviewStatus(co.cask.cdap.app.preview.PreviewStatus) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) TimeoutException(java.util.concurrent.TimeoutException) DataStreamsConfig(co.cask.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(co.cask.cdap.proto.artifact.AppRequest) PreviewManager(co.cask.cdap.app.preview.PreviewManager) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) JsonElement(com.google.gson.JsonElement) PreviewRunner(co.cask.cdap.app.preview.PreviewRunner) ApplicationId(co.cask.cdap.proto.id.ApplicationId) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) PreviewConfig(co.cask.cdap.proto.artifact.preview.PreviewConfig) Test(org.junit.Test)

Aggregations

DataStreamsConfig (co.cask.cdap.etl.proto.v2.DataStreamsConfig)12 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)11 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)10 Schema (co.cask.cdap.api.data.schema.Schema)10 AppRequest (co.cask.cdap.proto.artifact.AppRequest)10 ApplicationId (co.cask.cdap.proto.id.ApplicationId)10 Test (org.junit.Test)10 ApplicationManager (co.cask.cdap.test.ApplicationManager)9 Table (co.cask.cdap.api.dataset.table.Table)8 TimeoutException (java.util.concurrent.TimeoutException)8 TopicNotFoundException (co.cask.cdap.api.messaging.TopicNotFoundException)7 SparkManager (co.cask.cdap.test.SparkManager)7 HashSet (java.util.HashSet)7 ImmutableSet (com.google.common.collect.ImmutableSet)6 Set (java.util.Set)6 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)2 CloseableIterator (co.cask.cdap.api.dataset.lib.CloseableIterator)1 Message (co.cask.cdap.api.messaging.Message)1 MessageFetcher (co.cask.cdap.api.messaging.MessageFetcher)1