Examples with ETLConfig - co.cask.cdap.etl.proto.v2.ETLConfig

Example 46 with ETLConfig

use of co.cask.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.

the class DataPipelineTest method testSinglePhaseWithSparkSink.

private void testSinglePhaseWithSparkSink() throws Exception {
    /*
     * source1 ---|
     *            |--> sparksink
     * source2 ---|
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source1", MockSource.getPlugin("messages1", SpamMessage.SCHEMA))).addStage(new ETLStage("source2", MockSource.getPlugin("messages2", SpamMessage.SCHEMA))).addStage(new ETLStage("customsink", new ETLPlugin(NaiveBayesTrainer.PLUGIN_NAME, SparkSink.PLUGIN_TYPE, ImmutableMap.of("fileSetName", "modelFileSet", "path", "output", "fieldToClassify", SpamMessage.TEXT_FIELD, "predictionField", SpamMessage.SPAM_PREDICTION_FIELD), null))).addConnection("source1", "customsink").addConnection("source2", "customsink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("SparkSinkApp");
    ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
    // set up five spam messages and five non-spam messages to be used for classification
    List<StructuredRecord> messagesToWrite = new ArrayList<>();
    messagesToWrite.add(new SpamMessage("buy our clothes", 1.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("sell your used books to us", 1.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("earn money for free", 1.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("this is definitely not spam", 1.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("you won the lottery", 1.0).toStructuredRecord());
    // write records to source1
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset("messages1"));
    MockSource.writeInput(inputManager, messagesToWrite);
    messagesToWrite.clear();
    messagesToWrite.add(new SpamMessage("how was your day", 0.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("what are you up to", 0.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("this is a genuine message", 0.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("this is an even more genuine message", 0.0).toStructuredRecord());
    messagesToWrite.add(new SpamMessage("could you send me the report", 0.0).toStructuredRecord());
    // write records to source2
    inputManager = getDataset(NamespaceId.DEFAULT.dataset("messages2"));
    MockSource.writeInput(inputManager, messagesToWrite);
    // ingest in some messages to be classified
    StreamManager textsToClassify = getStreamManager(NaiveBayesTrainer.TEXTS_TO_CLASSIFY);
    textsToClassify.send("how are you doing today");
    textsToClassify.send("free money money");
    textsToClassify.send("what are you doing today");
    textsToClassify.send("genuine report");
    // manually trigger the pipeline
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<KeyValueTable> classifiedTexts = getDataset(NaiveBayesTrainer.CLASSIFIED_TEXTS);
    Assert.assertEquals(0.0d, Bytes.toDouble(classifiedTexts.get().read("how are you doing today")), 0.01d);
    // only 'free money money' should be predicated as spam
    Assert.assertEquals(1.0d, Bytes.toDouble(classifiedTexts.get().read("free money money")), 0.01d);
    Assert.assertEquals(0.0d, Bytes.toDouble(classifiedTexts.get().read("what are you doing today")), 0.01d);
    Assert.assertEquals(0.0d, Bytes.toDouble(classifiedTexts.get().read("genuine report")), 0.01d);
    validateMetric(5, appId, "source1.records.out");
    validateMetric(5, appId, "source2.records.out");
    validateMetric(10, appId, "customsink.records.in");
}

Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) SpamMessage(co.cask.cdap.datapipeline.mock.SpamMessage) WorkflowManager(co.cask.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) ETLPlugin(co.cask.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) StreamManager(co.cask.cdap.test.StreamManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ApplicationId(co.cask.cdap.proto.id.ApplicationId)

Example 47 with ETLConfig

use of co.cask.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.

the class DataPipelineTest method testSinglePhaseWithSparkCompute.

private void testSinglePhaseWithSparkCompute() throws Exception {
    /*
     * source --> sparkcompute --> sink
     */
    String classifiedTextsTable = "classifiedTextTable";
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(NaiveBayesTrainer.TEXTS_TO_CLASSIFY, SpamMessage.SCHEMA))).addStage(new ETLStage("sparkcompute", new ETLPlugin(NaiveBayesClassifier.PLUGIN_NAME, SparkCompute.PLUGIN_TYPE, ImmutableMap.of("fileSetName", "modelFileSet", "path", "output", "fieldToClassify", SpamMessage.TEXT_FIELD, "fieldToSet", SpamMessage.SPAM_PREDICTION_FIELD), null))).addStage(new ETLStage("sink", MockSink.getPlugin(classifiedTextsTable))).addConnection("source", "sparkcompute").addConnection("sparkcompute", "sink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("SparkComputeApp");
    ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
    // write some some messages to be classified
    List<StructuredRecord> messagesToWrite = new ArrayList<>();
    messagesToWrite.add(new SpamMessage("how are you doing today").toStructuredRecord());
    messagesToWrite.add(new SpamMessage("free money money").toStructuredRecord());
    messagesToWrite.add(new SpamMessage("what are you doing today").toStructuredRecord());
    messagesToWrite.add(new SpamMessage("genuine report").toStructuredRecord());
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(NaiveBayesTrainer.TEXTS_TO_CLASSIFY));
    MockSource.writeInput(inputManager, messagesToWrite);
    // manually trigger the pipeline
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> classifiedTexts = getDataset(classifiedTextsTable);
    List<StructuredRecord> structuredRecords = MockSink.readOutput(classifiedTexts);
    Set<SpamMessage> results = new HashSet<>();
    for (StructuredRecord structuredRecord : structuredRecords) {
        results.add(SpamMessage.fromStructuredRecord(structuredRecord));
    }
    Set<SpamMessage> expected = new HashSet<>();
    expected.add(new SpamMessage("how are you doing today", 0.0));
    // only 'free money money' should be predicated as spam
    expected.add(new SpamMessage("free money money", 1.0));
    expected.add(new SpamMessage("what are you doing today", 0.0));
    expected.add(new SpamMessage("genuine report", 0.0));
    Assert.assertEquals(expected, results);
    validateMetric(4, appId, "source.records.out");
    validateMetric(4, appId, "sparkcompute.records.in");
    validateMetric(4, appId, "sink.records.in");
}

Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) SpamMessage(co.cask.cdap.datapipeline.mock.SpamMessage) WorkflowManager(co.cask.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) ETLPlugin(co.cask.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ApplicationId(co.cask.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Example 48 with ETLConfig

use of co.cask.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.

the class DataPipelineTest method testKVTableLookup.

@Test
public void testKVTableLookup() throws Exception {
    addDatasetInstance(KeyValueTable.class.getName(), "ageTable");
    DataSetManager<KeyValueTable> lookupTable = getDataset("ageTable");
    lookupTable.get().write("samuel".getBytes(Charsets.UTF_8), "12".getBytes(Charsets.UTF_8));
    lookupTable.get().write("bob".getBytes(Charsets.UTF_8), "36".getBytes(Charsets.UTF_8));
    lookupTable.get().write("jane".getBytes(Charsets.UTF_8), "25".getBytes(Charsets.UTF_8));
    lookupTable.flush();
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin("inputTable"))).addStage(new ETLStage("transform", LookupTransform.getPlugin("person", "age", "ageTable"))).addStage(new ETLStage("sink", MockSink.getPlugin("outputTable"))).addConnection("source", "transform").addConnection("transform", "sink").build();
    ApplicationId appId = NamespaceId.DEFAULT.app("testKVTableLookup");
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // set up input data
    Schema inputSchema = Schema.recordOf("person", Schema.Field.of("person", Schema.of(Schema.Type.STRING)));
    StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema).set("person", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(inputSchema).set("person", "bob").build();
    StructuredRecord recordJane = StructuredRecord.builder(inputSchema).set("person", "jane").build();
    DataSetManager<Table> inputTable = getDataset("inputTable");
    MockSource.writeInput(inputTable, ImmutableList.of(recordSamuel, recordBob, recordJane));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME).start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    Schema schema = Schema.recordOf("person", Schema.Field.of("person", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.of(Schema.Type.STRING)));
    Set<StructuredRecord> expected = new HashSet<>();
    expected.add(StructuredRecord.builder(schema).set("person", "samuel").set("age", "12").build());
    expected.add(StructuredRecord.builder(schema).set("person", "bob").set("age", "36").build());
    expected.add(StructuredRecord.builder(schema).set("person", "jane").set("age", "25").build());
    DataSetManager<Table> outputTable = getDataset("outputTable");
    Set<StructuredRecord> actual = new HashSet<>(MockSink.readOutput(outputTable));
    Assert.assertEquals(expected, actual);
    validateMetric(3, appId, "source.records.out");
    validateMetric(3, appId, "sink.records.in");
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset("inputTable"));
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset("outputTable"));
}

Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) WorkflowManager(co.cask.cdap.test.WorkflowManager) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ApplicationId(co.cask.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 49 with ETLConfig

use of co.cask.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.

the class PipelineSpecGenerator method validateConfig.

/**
   * Validate that this is a valid pipeline. A valid pipeline has the following properties:
   *
   * All stages in the pipeline have a unique name.
   * Source stages have at least one output and no inputs.
   * Sink stages have at least one input and no outputs.
   * There are no cycles in the pipeline.
   * All inputs into a stage have the same schema.
   * ErrorTransforms only have BatchSource, Transform, or BatchAggregator as input stages
   *
   * Returns the stages in the order they should be configured to ensure that all input stages are configured
   * before their output.
   *
   * @param config the user provided configuration
   * @return the order to configure the stages in
   * @throws IllegalArgumentException if the pipeline is invalid
   */
private List<StageConnections> validateConfig(ETLConfig config) {
    config.validate();
    if (config.getStages().isEmpty()) {
        throw new IllegalArgumentException("A pipeline must contain at least one stage.");
    }
    Set<String> actionStages = new HashSet<>();
    Map<String, String> stageTypes = new HashMap<>();
    // check stage name uniqueness
    Set<String> stageNames = new HashSet<>();
    for (ETLStage stage : config.getStages()) {
        if (!stageNames.add(stage.getName())) {
            throw new IllegalArgumentException(String.format("Invalid pipeline. Multiple stages are named %s. Please ensure all stage names are unique", stage.getName()));
        }
        // if stage is Action stage, add it to the Action stage set
        if (isAction(stage.getPlugin().getType())) {
            actionStages.add(stage.getName());
        }
        stageTypes.put(stage.getName(), stage.getPlugin().getType());
    }
    // check that the from and to are names of actual stages
    for (Connection connection : config.getConnections()) {
        if (!stageNames.contains(connection.getFrom())) {
            throw new IllegalArgumentException(String.format("Invalid connection %s. %s is not a stage.", connection, connection.getFrom()));
        }
        if (!stageNames.contains(connection.getTo())) {
            throw new IllegalArgumentException(String.format("Invalid connection %s. %s is not a stage.", connection, connection.getTo()));
        }
    }
    List<StageConnections> traversalOrder = new ArrayList<>(stageNames.size());
    // can only have empty connections if the pipeline consists of a single action.
    if (config.getConnections().isEmpty()) {
        if (actionStages.size() == 1 && stageNames.size() == 1) {
            traversalOrder.add(new StageConnections(config.getStages().iterator().next(), Collections.<String>emptyList(), Collections.<String>emptyList()));
            return traversalOrder;
        } else {
            throw new IllegalArgumentException("Invalid pipeline. There are no connections between stages. " + "This is only allowed if the pipeline consists of a single action plugin.");
        }
    }
    Dag dag = new Dag(config.getConnections());
    Map<String, StageConnections> stages = new HashMap<>();
    for (ETLStage stage : config.getStages()) {
        String stageName = stage.getName();
        Set<String> stageInputs = dag.getNodeInputs(stageName);
        Set<String> stageOutputs = dag.getNodeOutputs(stageName);
        String stageType = stage.getPlugin().getType();
        // check source plugins are sources in the dag
        if (isSource(stageType)) {
            if (!stageInputs.isEmpty() && !actionStages.containsAll(stageInputs)) {
                throw new IllegalArgumentException(String.format("Source %s has incoming connections from %s. Sources cannot have any incoming connections.", stageName, Joiner.on(',').join(stageInputs)));
            }
        } else if (isSink(stageType)) {
            if (!stageOutputs.isEmpty() && !actionStages.containsAll(stageOutputs)) {
                throw new IllegalArgumentException(String.format("Sink %s has outgoing connections to %s. Sinks cannot have any outgoing connections.", stageName, Joiner.on(',').join(stageOutputs)));
            }
        } else {
            // check that other non-action plugins are not sources or sinks in the dag
            if (!isAction(stageType)) {
                if (stageInputs.isEmpty()) {
                    throw new IllegalArgumentException(String.format("Stage %s is unreachable, it has no incoming connections.", stageName));
                }
                if (stageOutputs.isEmpty()) {
                    throw new IllegalArgumentException(String.format("Stage %s is a dead end, it has no outgoing connections.", stageName));
                }
            }
            // check that error transforms only have stages that can emit errors as input
            boolean isErrorTransform = ErrorTransform.PLUGIN_TYPE.equals(stageType);
            if (isErrorTransform) {
                for (String inputStage : stageInputs) {
                    String inputType = stageTypes.get(inputStage);
                    if (!VALID_ERROR_INPUTS.contains(inputType)) {
                        throw new IllegalArgumentException(String.format("ErrorTransform %s cannot have stage %s of type %s as input. Only %s stages can emit errors.", stageName, inputStage, inputType, Joiner.on(',').join(VALID_ERROR_INPUTS)));
                    }
                }
            }
        }
        stages.put(stageName, new StageConnections(stage, stageInputs, stageOutputs));
    }
    for (String stageName : dag.getTopologicalOrder()) {
        traversalOrder.add(stages.get(stageName));
    }
    return traversalOrder;
}

Also used : HashMap(java.util.HashMap) Connection(co.cask.cdap.etl.proto.Connection) ArrayList(java.util.ArrayList) Dag(co.cask.cdap.etl.planner.Dag) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) HashSet(java.util.HashSet)

Example 50 with ETLConfig

use of co.cask.cdap.etl.proto.v2.ETLConfig in project cdap by caskdata.

the class PipelineSpecGenerator method configureStages.

/**
   * Performs most of the validation and configuration needed by a pipeline.
   * Handles stages, connections, resources, and stage logging settings.
   *
   * @param config user provided ETL config
   * @param specBuilder builder for creating a pipeline spec.
   */
protected void configureStages(ETLConfig config, PipelineSpec.Builder specBuilder) {
    // validate the config and determine the order we should configure the stages in.
    List<StageConnections> traversalOrder = validateConfig(config);
    Map<String, DefaultPipelineConfigurer> pluginConfigurers = new HashMap<>(traversalOrder.size());
    Map<String, String> pluginTypes = new HashMap<>(traversalOrder.size());
    for (StageConnections stageConnections : traversalOrder) {
        String stageName = stageConnections.getStage().getName();
        pluginTypes.put(stageName, stageConnections.getStage().getPlugin().getType());
        pluginConfigurers.put(stageName, new DefaultPipelineConfigurer(configurer, stageName, engine));
    }
    // anything prefixed by 'system.[engine].' is a pipeline property.
    Map<String, String> pipelineProperties = new HashMap<>();
    String prefix = String.format("system.%s.", engine.name().toLowerCase());
    int prefixLength = prefix.length();
    for (Map.Entry<String, String> property : config.getProperties().entrySet()) {
        if (property.getKey().startsWith(prefix)) {
            String strippedKey = property.getKey().substring(prefixLength);
            pipelineProperties.put(strippedKey, property.getValue());
        }
    }
    // row = property name, column = property value, val = stage that set the property
    // this is used so that we can error with a nice message about which stages are setting conflicting properties
    Table<String, String, String> propertiesFromStages = HashBasedTable.create();
    // configure the stages in order and build up the stage specs
    for (StageConnections stageConnections : traversalOrder) {
        ETLStage stage = stageConnections.getStage();
        String stageName = stage.getName();
        DefaultPipelineConfigurer pluginConfigurer = pluginConfigurers.get(stageName);
        ConfiguredStage configuredStage = configureStage(stageConnections, pluginConfigurer);
        Schema outputSchema = configuredStage.stageSpec.getOutputSchema();
        Schema outputErrorSchema = configuredStage.stageSpec.getErrorSchema();
        // for each output, set their input schema to our output schema
        for (String outputStageName : stageConnections.getOutputs()) {
            String outputStageType = pluginTypes.get(outputStageName);
            // no need to set any input schemas for an Action plug
            if (Action.PLUGIN_TYPE.equals(outputStageType)) {
                continue;
            }
            DefaultStageConfigurer outputStageConfigurer = pluginConfigurers.get(outputStageName).getStageConfigurer();
            // Do not allow null input schema for Joiner
            if (BatchJoiner.PLUGIN_TYPE.equals(outputStageType) && outputSchema == null) {
                throw new IllegalArgumentException(String.format("Joiner cannot have any null input schemas, but stage %s " + "outputs a null schema.", stageName));
            }
            // if the output stage is an error transform, it takes the error schema of this stage as its input.
            // all other plugin types that the output schema of this stage as its input.
            Schema nextStageInputSchema = ErrorTransform.PLUGIN_TYPE.equals(outputStageType) ? outputErrorSchema : outputSchema;
            // Do not allow more than one input schema for stages other than Joiner
            if (!BatchJoiner.PLUGIN_TYPE.equals(outputStageType) && !hasSameSchema(outputStageConfigurer.getInputSchemas(), nextStageInputSchema)) {
                throw new IllegalArgumentException("Two different input schema were set for the stage " + outputStageName);
            }
            outputStageConfigurer.addInputSchema(stageName, nextStageInputSchema);
        }
        specBuilder.addStage(configuredStage.stageSpec);
        for (Map.Entry<String, String> propertyEntry : configuredStage.pipelineProperties.entrySet()) {
            propertiesFromStages.put(propertyEntry.getKey(), propertyEntry.getValue(), stageName);
        }
    }
    // check that multiple stages did not set conflicting properties
    for (String propertyName : propertiesFromStages.rowKeySet()) {
        // go through all values set for the property name. If there is more than one, we have a conflict.
        Map<String, String> propertyValues = propertiesFromStages.row(propertyName);
        if (propertyValues.size() > 1) {
            StringBuilder errMsg = new StringBuilder("Pipeline property '").append(propertyName).append("' is being set to different values by stages.");
            for (Map.Entry<String, String> valueEntry : propertyValues.entrySet()) {
                String propertyValue = valueEntry.getKey();
                String fromStage = valueEntry.getValue();
                errMsg.append(" stage '").append(fromStage).append("' = '").append(propertyValue).append("',");
            }
            errMsg.deleteCharAt(errMsg.length() - 1);
            throw new IllegalArgumentException(errMsg.toString());
        }
        pipelineProperties.put(propertyName, propertyValues.keySet().iterator().next());
    }
    specBuilder.addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).setProperties(pipelineProperties).build();
}

Also used : HashMap(java.util.HashMap) Schema(co.cask.cdap.api.data.schema.Schema) DefaultStageConfigurer(co.cask.cdap.etl.common.DefaultStageConfigurer) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) DefaultPipelineConfigurer(co.cask.cdap.etl.common.DefaultPipelineConfigurer) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)50 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)37 AppRequest (co.cask.cdap.proto.artifact.AppRequest)35 ApplicationId (co.cask.cdap.proto.id.ApplicationId)35 ApplicationManager (co.cask.cdap.test.ApplicationManager)32 Test (org.junit.Test)31 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)27 Schema (co.cask.cdap.api.data.schema.Schema)26 WorkflowManager (co.cask.cdap.test.WorkflowManager)24 Table (co.cask.cdap.api.dataset.table.Table)23 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)19 ArrayList (java.util.ArrayList)9 HashSet (java.util.HashSet)6 DataStreamsConfig (co.cask.cdap.etl.proto.v2.DataStreamsConfig)5 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)5 File (java.io.File)5 TimeoutException (java.util.concurrent.TimeoutException)5 ETLRealtimeConfig (co.cask.cdap.etl.proto.v2.ETLRealtimeConfig)4 WorkerManager (co.cask.cdap.test.WorkerManager)4 PreviewManager (co.cask.cdap.app.preview.PreviewManager)3