Search in sources :

Example 16 with DataStreamsConfig

use of io.cdap.cdap.etl.proto.v2.DataStreamsConfig in project cdap by cdapio.

the class DataStreamsTest method testWindower.

@Test
public void testWindower() throws Exception {
    /*
     * source --> window(width=10,interval=1) --> aggregator --> filter --> sink
     */
    Schema schema = Schema.recordOf("data", Schema.Field.of("x", Schema.of(Schema.Type.STRING)));
    List<StructuredRecord> input = ImmutableList.of(StructuredRecord.builder(schema).set("x", "abc").build(), StructuredRecord.builder(schema).set("x", "abc").build(), StructuredRecord.builder(schema).set("x", "abc").build());
    String sinkName = "windowOut";
    // source sleeps 1 second between outputs
    DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(schema, input, 1000L))).addStage(new ETLStage("window", Window.getPlugin(30, 1))).addStage(new ETLStage("agg", FieldCountAggregator.getPlugin("x", "string"))).addStage(new ETLStage("filter", StringValueFilterTransform.getPlugin("x", "all"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source", "window").addConnection("window", "agg").addConnection("agg", "filter").addConnection("filter", "sink").setBatchInterval("1s").setCheckpointDir(checkpointDir).build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("WindowerApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.start();
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    // the sink should contain at least one record with count of 3, and no records with more than 3.
    // less than 3 if the window doesn't contain all 3 records yet, but there should eventually be a window
    // that contains all 3.
    final DataSetManager<Table> outputManager = getDataset(sinkName);
    Tasks.waitFor(true, new Callable<Boolean>() {

        @Override
        public Boolean call() throws Exception {
            outputManager.flush();
            boolean sawThree = false;
            for (StructuredRecord record : MockSink.readOutput(outputManager)) {
                long count = record.get("ct");
                if (count == 3L) {
                    sawThree = true;
                }
                Assert.assertTrue(count <= 3L);
            }
            return sawThree;
        }
    }, 2, TimeUnit.MINUTES);
    sparkManager.stop();
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) TimeoutException(java.util.concurrent.TimeoutException) TopicNotFoundException(io.cdap.cdap.api.messaging.TopicNotFoundException) DataStreamsConfig(io.cdap.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 17 with DataStreamsConfig

use of io.cdap.cdap.etl.proto.v2.DataStreamsConfig in project cdap by cdapio.

the class DataStreamsTest method testAutoJoin.

@Test
public void testAutoJoin() throws Exception {
    /*
     * customers ----------|
     *                     |
     *                     |---> join ---> sink
     *                     |
     * transactions -------|
     */
    Schema inputSchema1 = Schema.recordOf("customer", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema2 = Schema.recordOf("transaction", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)));
    Schema outSchema = Schema.recordOf("customers.transactions", Schema.Field.of("customers_customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("customers_customer_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("transactions_t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("transactions_customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("transactions_item_id", Schema.of(Schema.Type.STRING)));
    StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema1).set("customer_id", "1").set("customer_name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(inputSchema1).set("customer_id", "2").set("customer_name", "bob").build();
    StructuredRecord recordJane = StructuredRecord.builder(inputSchema1).set("customer_id", "3").set("customer_name", "jane").build();
    StructuredRecord tx1 = StructuredRecord.builder(inputSchema2).set("t_id", "1").set("customer_id", "1").set("item_id", "11").build();
    StructuredRecord tx2 = StructuredRecord.builder(inputSchema2).set("t_id", "2").set("customer_id", "3").set("item_id", "22").build();
    StructuredRecord tx3 = StructuredRecord.builder(inputSchema2).set("t_id", "3").set("customer_id", "4").set("item_id", "33").build();
    List<StructuredRecord> input1 = ImmutableList.of(recordSamuel, recordBob, recordJane);
    List<StructuredRecord> input2 = ImmutableList.of(tx1, tx2, tx3);
    String outputName = UUID.randomUUID().toString();
    DataStreamsConfig etlConfig = DataStreamsConfig.builder().addStage(new ETLStage("customers", MockSource.getPlugin(inputSchema1, input1))).addStage(new ETLStage("transactions", MockSource.getPlugin(inputSchema2, input2))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(Arrays.asList("customers", "transactions"), Collections.singletonList("customer_id"), Collections.singletonList("transactions"), Collections.emptyList(), Collections.emptyList(), true))).addStage(new ETLStage("sink", MockSink.getPlugin(outputName))).addConnection("customers", "join").addConnection("transactions", "join").addConnection("join", "sink").setBatchInterval("5s").setCheckpointDir(checkpointDir).build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("AutoJoinerApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.start();
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 10, TimeUnit.SECONDS);
    StructuredRecord join1 = StructuredRecord.builder(outSchema).set("customers_customer_id", "1").set("customers_customer_name", "samuel").set("transactions_t_id", "1").set("transactions_customer_id", "1").set("transactions_item_id", "11").build();
    StructuredRecord join2 = StructuredRecord.builder(outSchema).set("customers_customer_id", "3").set("customers_customer_name", "jane").set("transactions_t_id", "2").set("transactions_customer_id", "3").set("transactions_item_id", "22").build();
    StructuredRecord join3 = StructuredRecord.builder(outSchema).set("transactions_t_id", "3").set("transactions_customer_id", "4").set("transactions_item_id", "33").build();
    Set<StructuredRecord> expected = ImmutableSet.of(join1, join2, join3);
    DataSetManager<Table> outputManager = getDataset(outputName);
    Tasks.waitFor(true, () -> {
        outputManager.flush();
        Set<StructuredRecord> outputRecords = new HashSet<>(MockSink.readOutput(outputManager));
        return expected.equals(outputRecords);
    }, 4, TimeUnit.MINUTES);
    sparkManager.stop();
    sparkManager.waitForStopped(10, TimeUnit.SECONDS);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) DataStreamsConfig(io.cdap.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 18 with DataStreamsConfig

use of io.cdap.cdap.etl.proto.v2.DataStreamsConfig in project cdap by cdapio.

the class DataStreamsTest method testStageConsolidation.

@Test
public void testStageConsolidation() throws Exception {
    /*
                                   |non-null port --> sink1
        items1 --> null splitter --|
                                   |null port--> sink2
                                                   ^
                 |--> filter out id == 0  ---------|
        items2 --|    |
                 |    |--> error collector --> sink3
                 |
                 |                 |--> filter out id == 1 --> sink4
                 |--> aggregator --|
                                   |--> filter out id == 2 --> sink5
     */
    Schema schema = Schema.recordOf("item", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.INT))));
    StructuredRecord itemNull = StructuredRecord.builder(schema).build();
    StructuredRecord item0 = StructuredRecord.builder(schema).set("id", 0).build();
    StructuredRecord item1 = StructuredRecord.builder(schema).set("id", 1).build();
    StructuredRecord item2 = StructuredRecord.builder(schema).set("id", 2).build();
    StructuredRecord item3 = StructuredRecord.builder(schema).set("id", 3).build();
    List<StructuredRecord> input1Records = Arrays.asList(itemNull, item3);
    List<StructuredRecord> input2Records = Arrays.asList(item0, item1, item2);
    File outputDir = TMP_FOLDER.newFolder();
    String output1 = new File(outputDir, "output1").getAbsolutePath();
    String output2 = new File(outputDir, "output2").getAbsolutePath();
    String output3 = new File(outputDir, "output3").getAbsolutePath();
    String output4 = new File(outputDir, "output4").getAbsolutePath();
    String output5 = new File(outputDir, "output5").getAbsolutePath();
    DataStreamsConfig config = DataStreamsConfig.builder().addStage(new ETLStage("items1", MockSource.getPlugin(schema, input1Records))).addStage(new ETLStage("items2", MockSource.getPlugin(schema, input2Records))).addStage(new ETLStage("nullSplitter", NullFieldSplitterTransform.getPlugin("id"))).addStage(new ETLStage("filter0", IntValueFilterTransform.getPlugin("id", 0))).addStage(new ETLStage("collector", FlattenErrorTransform.getPlugin())).addStage(new ETLStage("filter1", IntValueFilterTransform.getPlugin("id", 1))).addStage(new ETLStage("filter2", IntValueFilterTransform.getPlugin("id", 2))).addStage(new ETLStage("identityAggregator", IdentityAggregator.getPlugin())).addStage(new ETLStage("sink1", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "s1", output1))).addStage(new ETLStage("sink2", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "s2", output2))).addStage(new ETLStage("sink3", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "s3", output3))).addStage(new ETLStage("sink4", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "s4", output4))).addStage(new ETLStage("sink5", MockExternalSink.getPlugin(UUID.randomUUID().toString(), "s5", output5))).addConnection("items1", "nullSplitter").addConnection("nullSplitter", "sink1", "non-null").addConnection("nullSplitter", "sink2", "null").addConnection("items2", "filter0").addConnection("items2", "identityAggregator").addConnection("filter0", "sink2").addConnection("filter0", "collector").addConnection("identityAggregator", "filter1").addConnection("identityAggregator", "filter2").addConnection("collector", "sink3").addConnection("filter1", "sink4").addConnection("filter2", "sink5").setProperties(Collections.singletonMap(io.cdap.cdap.etl.common.Constants.SPARK_PIPELINE_AUTOCACHE_ENABLE_FLAG, "false")).build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("StageConsolidationTest");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // run pipeline
    Map<String, String> args = Collections.singletonMap(io.cdap.cdap.etl.common.Constants.CONSOLIDATE_STAGES, "true");
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.startAndWaitForGoodRun(args, ProgramRunStatus.RUNNING, 5, TimeUnit.MINUTES);
    Schema errorSchema = Schema.recordOf("erroritem", Schema.Field.of("id", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("errMsg", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("errCode", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("errStage", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    StructuredRecord expectedError = StructuredRecord.builder(errorSchema).set("id", 0).set("errMsg", IntValueFilterTransform.ERROR_MESSAGE).set("errCode", IntValueFilterTransform.ERROR_CODE).set("errStage", "filter0").build();
    Set<StructuredRecord> sink1Expected = Collections.singleton(item3);
    Set<StructuredRecord> sink2Expected = new HashSet<>(Arrays.asList(itemNull, item1, item2));
    Set<StructuredRecord> sink3Expected = Collections.singleton(expectedError);
    Set<StructuredRecord> sink4Expected = new HashSet<>(Arrays.asList(item0, item2));
    Set<StructuredRecord> sink5Expected = new HashSet<>(Arrays.asList(item0, item1));
    Tasks.waitFor(true, () -> sink1Expected.equals(new HashSet<>(MockExternalSink.readOutput(output1, schema))) && sink2Expected.equals(new HashSet<>(MockExternalSink.readOutput(output2, schema))) && sink3Expected.equals(new HashSet<>(MockExternalSink.readOutput(output3, errorSchema))) && sink4Expected.equals(new HashSet<>(MockExternalSink.readOutput(output4, schema))) && sink5Expected.equals(new HashSet<>(MockExternalSink.readOutput(output5, schema))), 3, TimeUnit.MINUTES);
    sparkManager.stop();
    sparkManager.waitForStopped(1, TimeUnit.MINUTES);
    // check output
    validateMetric(appId, "nullSplitter.records.in", 2);
    validateMetric(appId, "nullSplitter.records.out.null", 1);
    validateMetric(appId, "nullSplitter.records.out.non-null", 1);
    validateMetric(appId, "filter0.records.out", 2);
    validateMetric(appId, "filter0.records.error", 1);
    validateMetric(appId, "identityAggregator.records.in", 3);
    validateMetric(appId, "identityAggregator.records.out", 3);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) DataStreamsConfig(io.cdap.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) File(java.io.File) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 19 with DataStreamsConfig

use of io.cdap.cdap.etl.proto.v2.DataStreamsConfig in project hydrator-plugins by cdapio.

the class SparkPluginTest method testHttpStreamingSource.

@Test
public void testHttpStreamingSource() throws Exception {
    Assert.assertEquals(200, resetFeeds());
    final String content = "samuel jackson\ndwayne johnson\nchristopher walken";
    Assert.assertEquals(200, writeFeed("people", content));
    Map<String, String> properties = ImmutableMap.of("referenceName", "peopleFeed", "url", httpBase + "/feeds/people", "interval", "1");
    DataStreamsConfig pipelineConfig = DataStreamsConfig.builder().addStage(new ETLStage("source", new ETLPlugin("HTTPPoller", StreamingSource.PLUGIN_TYPE, properties, null))).addStage(new ETLStage("sink", MockSink.getPlugin("httpOutput"))).addConnection("source", "sink").setBatchInterval("1s").build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(DATASTREAMS_ARTIFACT, pipelineConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("HTTPSourceApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.start();
    sparkManager.waitForStatus(true, 10, 1);
    final DataSetManager<Table> outputManager = getDataset("httpOutput");
    Tasks.waitFor(true, () -> {
        outputManager.flush();
        Set<String> contents = new HashSet<>();
        for (StructuredRecord record : MockSink.readOutput(outputManager)) {
            contents.add(record.get("body"));
        }
        return contents.size() == 1 && contents.contains(content);
    }, 4, TimeUnit.MINUTES);
    sparkManager.stop();
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) Table(io.cdap.cdap.api.dataset.table.Table) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) DataStreamsConfig(io.cdap.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 20 with DataStreamsConfig

use of io.cdap.cdap.etl.proto.v2.DataStreamsConfig in project hydrator-plugins by cdapio.

the class SparkPluginTest method testFileSource.

@Test
public void testFileSource() throws Exception {
    Schema schema = Schema.recordOf("user", Schema.Field.of("id", Schema.of(Schema.Type.LONG)), Schema.Field.of("first", Schema.of(Schema.Type.STRING)), Schema.Field.of("last", Schema.of(Schema.Type.STRING)));
    File folder = tmpFolder.newFolder("fileSourceTest");
    File input1 = new File(folder, "input1.txt");
    File input2 = new File(folder, "input2.csv");
    File ignore1 = new File(folder, "input1.txt.done");
    File ignore2 = new File(folder, "input1");
    CharStreams.write("1,samuel,jackson\n2,dwayne,johnson", Files.newWriterSupplier(input1, Charsets.UTF_8));
    CharStreams.write("3,christopher,walken", Files.newWriterSupplier(input2, Charsets.UTF_8));
    CharStreams.write("0,nicolas,cage", Files.newWriterSupplier(ignore1, Charsets.UTF_8));
    CharStreams.write("0,orlando,bloom", Files.newWriterSupplier(ignore2, Charsets.UTF_8));
    Map<String, String> properties = ImmutableMap.<String, String>builder().put("path", folder.getAbsolutePath()).put("format", "csv").put("schema", schema.toString()).put("referenceName", "fileSourceTestInput").put("ignoreThreshold", "300").put("extensions", "txt,csv").build();
    DataStreamsConfig pipelineCfg = DataStreamsConfig.builder().addStage(new ETLStage("source", new ETLPlugin("File", StreamingSource.PLUGIN_TYPE, properties, null))).addStage(new ETLStage("sink", MockSink.getPlugin("fileOutput"))).addConnection("source", "sink").setBatchInterval("1s").build();
    AppRequest<DataStreamsConfig> appRequest = new AppRequest<>(DATASTREAMS_ARTIFACT, pipelineCfg);
    ApplicationId appId = NamespaceId.DEFAULT.app("FileSourceApp");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    SparkManager sparkManager = appManager.getSparkManager(DataStreamsSparkLauncher.NAME);
    sparkManager.start();
    sparkManager.waitForRun(ProgramRunStatus.RUNNING, 1, TimeUnit.MINUTES);
    Map<Long, String> expected = ImmutableMap.of(1L, "samuel jackson", 2L, "dwayne johnson", 3L, "christopher walken");
    final DataSetManager<Table> outputManager = getDataset("fileOutput");
    Tasks.waitFor(true, () -> {
        outputManager.flush();
        Map<Long, String> actual = new HashMap<>();
        for (StructuredRecord outputRecord : MockSink.readOutput(outputManager)) {
            actual.put(outputRecord.get("id"), outputRecord.get("first") + " " + outputRecord.get("last"));
        }
        return expected.equals(actual);
    }, 4, TimeUnit.MINUTES);
    // now write a new file to make sure new files are picked up.
    File input3 = new File(folder, "input3.txt");
    CharStreams.write("4,terry,crews\n5,rocky,balboa", Files.newWriterSupplier(input3, Charsets.UTF_8));
    Map<Long, String> expected2 = ImmutableMap.of(4L, "terry crews", 5L, "rocky balboa");
    Table outputTable = outputManager.get();
    Scanner scanner = outputTable.scan(null, null);
    Row row;
    while ((row = scanner.next()) != null) {
        outputTable.delete(row.getRow());
    }
    outputManager.flush();
    Tasks.waitFor(true, () -> {
        outputManager.flush();
        Map<Long, String> actual = new HashMap<>();
        for (StructuredRecord outputRecord : MockSink.readOutput(outputManager)) {
            actual.put(outputRecord.get("id"), outputRecord.get("first") + " " + outputRecord.get("last"));
        }
        return expected2.equals(actual);
    }, 4, TimeUnit.MINUTES);
    sparkManager.stop();
}
Also used : Scanner(io.cdap.cdap.api.dataset.table.Scanner) ApplicationManager(io.cdap.cdap.test.ApplicationManager) SparkManager(io.cdap.cdap.test.SparkManager) Table(io.cdap.cdap.api.dataset.table.Table) HashMap(java.util.HashMap) Schema(io.cdap.cdap.api.data.schema.Schema) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) DataStreamsConfig(io.cdap.cdap.etl.proto.v2.DataStreamsConfig) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Row(io.cdap.cdap.api.dataset.table.Row) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) File(java.io.File) Test(org.junit.Test)

Aggregations

DataStreamsConfig (io.cdap.cdap.etl.proto.v2.DataStreamsConfig)36 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)34 Test (org.junit.Test)34 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)30 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)30 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)30 HashSet (java.util.HashSet)30 Schema (io.cdap.cdap.api.data.schema.Schema)29 ApplicationManager (io.cdap.cdap.test.ApplicationManager)28 SparkManager (io.cdap.cdap.test.SparkManager)22 Table (io.cdap.cdap.api.dataset.table.Table)18 DataStreamsConfig (co.cask.cdap.etl.proto.v2.DataStreamsConfig)12 TimeoutException (java.util.concurrent.TimeoutException)12 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)11 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)10 Schema (co.cask.cdap.api.data.schema.Schema)10 AppRequest (co.cask.cdap.proto.artifact.AppRequest)10 ApplicationId (co.cask.cdap.proto.id.ApplicationId)10 ApplicationManager (co.cask.cdap.test.ApplicationManager)9 ArrayList (java.util.ArrayList)9