Search in sources :

Example 1 with PreviewManager

use of io.cdap.cdap.app.preview.PreviewManager in project cdap by caskdata.

the class DataPipelineConnectionTest method testUsingConnections.

private void testUsingConnections(Engine engine) throws Exception {
    String sourceConnName = "sourceConn " + engine;
    String sinkConnName = "sinkConn " + engine;
    String srcTableName = "src" + engine;
    String sinkTableName = "sink" + engine;
    // add some bad json object to the property
    addConnection(sourceConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("tableName", srcTableName, "key1", "${badval}"), new ArtifactSelectorConfig())));
    addConnection(sinkConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("tableName", sinkTableName, "key1", "${badval}"), new ArtifactSelectorConfig())));
    // add json string to the runtime arguments to ensure plugin can get instantiated under such condition
    Map<String, String> runtimeArguments = Collections.singletonMap("badval", "{\"a\" : 1}");
    // source -> sink
    ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPluginUsingConnection(sourceConnName))).addStage(new ETLStage("sink", MockSink.getPluginUsingConnection(sinkConnName))).addConnection("source", "sink").build();
    Schema schema = Schema.recordOf("x", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    StructuredRecord samuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord dwayne = StructuredRecord.builder(schema).set("name", "dwayne").build();
    // add the dataset by the test, the source won't create it since table name is macro enabled
    addDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName), Table.class.getName());
    DataSetManager<Table> sourceTable = getDataset(srcTableName);
    MockSource.writeInput(sourceTable, ImmutableList.of(samuel, dwayne));
    // verify preview can run successfully using connections
    PreviewManager previewManager = getPreviewManager();
    PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, runtimeArguments, 10);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, new AppRequest<>(APP_ARTIFACT, config, previewConfig));
    // Wait for the preview status go into COMPLETED.
    Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            PreviewStatus status = previewManager.getStatus(previewId);
            return status == null ? null : status.getStatus();
        }
    }, 5, TimeUnit.MINUTES);
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("testApp" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // start the actual pipeline run
    WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    manager.startAndWaitForGoodRun(runtimeArguments, ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
    DataSetManager<Table> sinkTable = getDataset(sinkTableName);
    List<StructuredRecord> outputRecords = MockSink.readOutput(sinkTable);
    Assert.assertEquals(ImmutableSet.of(dwayne, samuel), new HashSet<>(outputRecords));
    // modify the connection to use a new table name for source and sink
    String newSrcTableName = "new" + srcTableName;
    String newSinkTableName = "new" + sinkTableName;
    addConnection(sourceConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", newSrcTableName), new ArtifactSelectorConfig())));
    addConnection(sinkConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", newSinkTableName), new ArtifactSelectorConfig())));
    addDatasetInstance(NamespaceId.DEFAULT.dataset(newSrcTableName), Table.class.getName());
    StructuredRecord newRecord1 = StructuredRecord.builder(schema).set("name", "john").build();
    StructuredRecord newRecord2 = StructuredRecord.builder(schema).set("name", "tom").build();
    sourceTable = getDataset(newSrcTableName);
    MockSource.writeInput(sourceTable, ImmutableList.of(newRecord1, newRecord2));
    // run the program again, it should use the new table to read and write
    manager.start(runtimeArguments);
    manager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 3, TimeUnit.MINUTES);
    sinkTable = getDataset(newSinkTableName);
    outputRecords = MockSink.readOutput(sinkTable);
    Assert.assertEquals(ImmutableSet.of(newRecord1, newRecord2), new HashSet<>(outputRecords));
    deleteConnection(sourceConnName);
    deleteConnection(sinkConnName);
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName));
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sinkTableName));
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(newSrcTableName));
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(newSinkTableName));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PreviewManager(io.cdap.cdap.app.preview.PreviewManager) ConnectionCreationRequest(io.cdap.cdap.etl.proto.connection.ConnectionCreationRequest) PluginInfo(io.cdap.cdap.etl.proto.connection.PluginInfo) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) Table(io.cdap.cdap.api.dataset.table.Table) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) PreviewConfig(io.cdap.cdap.proto.artifact.preview.PreviewConfig)

Example 2 with PreviewManager

use of io.cdap.cdap.app.preview.PreviewManager in project cdap by caskdata.

the class PreviewDataPipelineTest method testMultiplePhase.

private void testMultiplePhase(Engine engine) throws Exception {
    /*
     * source1 ----> t1 ------
     *                        | --> innerjoin ----> t4 ------
     * source2 ----> t2 ------                                 |
     *                                                         | ---> outerjoin --> sink1
     *                                                         |
     * source3 -------------------- t3 ------------------------
     */
    PreviewManager previewManager = getPreviewManager();
    Schema inputSchema1 = Schema.recordOf("customerRecord", Schema.Field.of("customer_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("customer_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema2 = Schema.recordOf("itemRecord", Schema.Field.of("item_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("item_price", Schema.of(Schema.Type.LONG)), Schema.Field.of("cust_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("cust_name", Schema.of(Schema.Type.STRING)));
    Schema inputSchema3 = Schema.recordOf("transactionRecord", Schema.Field.of("t_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("c_id", Schema.of(Schema.Type.STRING)), Schema.Field.of("i_id", Schema.of(Schema.Type.STRING)));
    Schema outSchema2 = Schema.recordOf("join.output", Schema.Field.of("t_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("i_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("customer_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("customer_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("item_price", Schema.nullableOf(Schema.of(Schema.Type.LONG))), Schema.Field.of("cust_id", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("cust_name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    String source1MulitJoinInput = "multiJoinSource1-" + engine;
    String source2MultiJoinInput = "multiJoinSource2-" + engine;
    String source3MultiJoinInput = "multiJoinSource3-" + engine;
    String outputName = "multiJoinOutput-" + engine;
    String sinkName = "multiJoinOutputSink-" + engine;
    String outerJoinName = "multiJoinOuter-" + engine;
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source1", MockSource.getPlugin(source1MulitJoinInput, inputSchema1))).addStage(new ETLStage("source2", MockSource.getPlugin(source2MultiJoinInput, inputSchema2))).addStage(new ETLStage("source3", MockSource.getPlugin(source3MultiJoinInput, inputSchema3))).addStage(new ETLStage("t1", IdentityTransform.getPlugin())).addStage(new ETLStage("t2", IdentityTransform.getPlugin())).addStage(new ETLStage("t3", IdentityTransform.getPlugin())).addStage(new ETLStage("t4", IdentityTransform.getPlugin())).addStage(new ETLStage("innerjoin", MockJoiner.getPlugin("t1.customer_id=t2.cust_id", "t1,t2", ""))).addStage(new ETLStage(outerJoinName, MockJoiner.getPlugin("t4.item_id=t3.i_id", "", ""))).addStage(new ETLStage(sinkName, MockSink.getPlugin(outputName))).addConnection("source1", "t1").addConnection("source2", "t2").addConnection("source3", "t3").addConnection("t1", "innerjoin").addConnection("t2", "innerjoin").addConnection("innerjoin", "t4").addConnection("t3", outerJoinName).addConnection("t4", outerJoinName).addConnection(outerJoinName, sinkName).setEngine(engine).setNumOfRecordsPreview(100).build();
    // Construct the preview config with the program name and program type
    PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
    // Create the table for the mock source
    addDatasetInstance(Table.class.getName(), source1MulitJoinInput, DatasetProperties.of(ImmutableMap.of("schema", inputSchema1.toString())));
    addDatasetInstance(Table.class.getName(), source2MultiJoinInput, DatasetProperties.of(ImmutableMap.of("schema", inputSchema2.toString())));
    addDatasetInstance(Table.class.getName(), source3MultiJoinInput, DatasetProperties.of(ImmutableMap.of("schema", inputSchema3.toString())));
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig, previewConfig);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
    ingestData(inputSchema1, inputSchema2, inputSchema3, source1MulitJoinInput, source2MultiJoinInput, source3MultiJoinInput);
    // Wait for the preview status go into COMPLETED.
    Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            PreviewStatus status = previewManager.getStatus(previewId);
            return status == null ? null : status.getStatus();
        }
    }, 5, TimeUnit.MINUTES);
    checkPreviewStore(previewManager, previewId, sinkName, 3);
    validateMetric(3L, previewId, sinkName + ".records.in", previewManager);
}
Also used : PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) TimeoutException(java.util.concurrent.TimeoutException) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) PreviewManager(io.cdap.cdap.app.preview.PreviewManager) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) PreviewConfig(io.cdap.cdap.proto.artifact.preview.PreviewConfig)

Example 3 with PreviewManager

use of io.cdap.cdap.app.preview.PreviewManager in project cdap by caskdata.

the class DataPipelineConnectionTest method testConnectionsWithPluginMacros.

private void testConnectionsWithPluginMacros(Engine engine) throws Exception {
    String sourceConnName = "sourceConnPluginMacros " + engine;
    String transformConnName = "transformConnPluginMacros " + engine;
    String sinkConnName = "sinkConnPluginMacros " + engine;
    String srcTableName = "srcPluginMacros" + engine;
    String sinkTableName = "sinkPluginMacros" + engine;
    addConnection(sourceConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", "${srcTable}"), new ArtifactSelectorConfig())));
    addConnection(transformConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("plugin1", "${plugin1}", "plugin1Type", "${plugin1Type}"), new ArtifactSelectorConfig())));
    addConnection(sinkConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", "${sinkTable}"), new ArtifactSelectorConfig())));
    // source -> pluginValidation transform -> sink
    ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPluginUsingConnection(sourceConnName))).addStage(new ETLStage("transform", PluginValidationTransform.getPluginUsingConnection(transformConnName, "${plugin2}", "${plugin2Type}"))).addStage(new ETLStage("sink", MockSink.getPluginUsingConnection(sinkConnName))).addConnection("source", "transform").addConnection("transform", "sink").build();
    // runtime arguments
    Map<String, String> runtimeArguments = ImmutableMap.<String, String>builder().put("srcTable", srcTableName).put("sinkTable", sinkTableName).put("plugin1", "Identity").put("plugin1Type", Transform.PLUGIN_TYPE).put("plugin2", "Double").put("plugin2Type", Transform.PLUGIN_TYPE).build();
    Schema schema = Schema.recordOf("x", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    StructuredRecord samuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord dwayne = StructuredRecord.builder(schema).set("name", "dwayne").build();
    addDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName), Table.class.getName());
    DataSetManager<Table> sourceTable = getDataset(srcTableName);
    MockSource.writeInput(sourceTable, ImmutableList.of(samuel, dwayne));
    // verify preview can run successfully using connections
    PreviewManager previewManager = getPreviewManager();
    PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, runtimeArguments, 10);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, new AppRequest<>(APP_ARTIFACT, config, previewConfig));
    // Wait for the preview status go into COMPLETED.
    Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            PreviewStatus status = previewManager.getStatus(previewId);
            return status == null ? null : status.getStatus();
        }
    }, 5, TimeUnit.MINUTES);
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("testConnectionsWithPluginMacros" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // start the actual pipeline run
    WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    manager.startAndWaitForGoodRun(runtimeArguments, ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
    DataSetManager<Table> sinkTable = getDataset(sinkTableName);
    List<StructuredRecord> outputRecords = MockSink.readOutput(sinkTable);
    Assert.assertEquals(ImmutableSet.of(dwayne, samuel), new HashSet<>(outputRecords));
    deleteConnection(sourceConnName);
    deleteConnection(sinkConnName);
    deleteConnection(transformConnName);
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName));
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sinkTableName));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PreviewManager(io.cdap.cdap.app.preview.PreviewManager) ConnectionCreationRequest(io.cdap.cdap.etl.proto.connection.ConnectionCreationRequest) PluginInfo(io.cdap.cdap.etl.proto.connection.PluginInfo) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) Table(io.cdap.cdap.api.dataset.table.Table) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) PreviewConfig(io.cdap.cdap.proto.artifact.preview.PreviewConfig)

Example 4 with PreviewManager

use of io.cdap.cdap.app.preview.PreviewManager in project cdap by caskdata.

the class PreviewDataPipelineTest method testLogicalTypePreviewRun.

private void testLogicalTypePreviewRun(Engine engine) throws Exception {
    PreviewManager previewManager = getPreviewManager();
    String sourceTableName = "singleInput";
    String sinkTableName = "singleOutput";
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("date", Schema.of(Schema.LogicalType.DATE)), Schema.Field.of("ts", Schema.of(Schema.LogicalType.TIMESTAMP_MILLIS)));
    /*
     * source --> transform -> sink
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", IdentityTransform.getPlugin())).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setEngine(engine).setNumOfRecordsPreview(100).build();
    // Construct the preview config with the program name and program type
    PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
    // Create the table for the mock source
    addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
    ZonedDateTime expectedMillis = ZonedDateTime.of(2018, 11, 11, 11, 11, 11, 123 * 1000 * 1000, ZoneId.ofOffset("UTC", ZoneOffset.UTC));
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").setDate("date", LocalDate.of(2002, 11, 18)).setTimestamp("ts", expectedMillis).build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").setDate("date", LocalDate.of(2003, 11, 18)).setTimestamp("ts", expectedMillis).build();
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig, previewConfig);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
    // Wait for the preview status go into COMPLETED.
    Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            PreviewStatus status = previewManager.getStatus(previewId);
            return status == null ? null : status.getStatus();
        }
    }, 5, TimeUnit.MINUTES);
    // Get the data for stage "source" in the PreviewStore, should contain two records.
    checkPreviewStore(previewManager, previewId, "source", 2);
    // Make sure correct logical type values are being returned from tracers
    List<JsonElement> data = previewManager.getData(previewId, "source").get(DATA_TRACER_PROPERTY);
    StructuredRecord actualRecordOne = GSON.fromJson(data.get(0), StructuredRecord.class);
    StructuredRecord actualRecordTwo = GSON.fromJson(data.get(1), StructuredRecord.class);
    Assert.assertEquals(ImmutableSet.of("2002-11-18", "2003-11-18"), ImmutableSet.of(actualRecordOne.get("date"), actualRecordTwo.get("date")));
    Assert.assertEquals(ImmutableSet.of("2018-11-11T11:11:11.123Z[UTC]", "2018-11-11T11:11:11.123Z[UTC]"), ImmutableSet.of(actualRecordOne.get("ts"), actualRecordTwo.get("ts")));
    // Get the data for stage "transform" in the PreviewStore, should contain two records.
    checkPreviewStore(previewManager, previewId, "transform", 2);
    // Get the data for stage "sink" in the PreviewStore, should contain two records.
    checkPreviewStore(previewManager, previewId, "sink", 2);
    // Validate the metrics for preview
    validateMetric(2, previewId, "source.records.in", previewManager);
    validateMetric(2, previewId, "source.records.out", previewManager);
    validateMetric(2, previewId, "transform.records.in", previewManager);
    validateMetric(2, previewId, "transform.records.out", previewManager);
    validateMetric(2, previewId, "sink.records.out", previewManager);
    validateMetric(2, previewId, "sink.records.in", previewManager);
    // Check the sink table is not created in the real space.
    DataSetManager<Table> sinkManager = getDataset(sinkTableName);
    Assert.assertNull(sinkManager.get());
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
    Assert.assertNotNull(previewManager.getRunId(previewId));
}
Also used : PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) TimeoutException(java.util.concurrent.TimeoutException) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) PreviewManager(io.cdap.cdap.app.preview.PreviewManager) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ZonedDateTime(java.time.ZonedDateTime) JsonElement(com.google.gson.JsonElement) PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) PreviewConfig(io.cdap.cdap.proto.artifact.preview.PreviewConfig)

Example 5 with PreviewManager

use of io.cdap.cdap.app.preview.PreviewManager in project cdap by caskdata.

the class PreviewDataPipelineTest method testDataPipelinePreviewRun.

private void testDataPipelinePreviewRun(Engine engine) throws Exception {
    PreviewManager previewManager = getPreviewManager();
    String sourceTableName = "singleInput";
    String sinkTableName = "singleOutput";
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    /*
     * source --> transform -> sink
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", IdentityTransform.getPlugin())).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setEngine(engine).setNumOfRecordsPreview(100).build();
    // Construct the preview config with the program name and program type
    PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
    // Create the table for the mock source
    addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig, previewConfig);
    // Start the preview and get the corresponding PreviewRunner.
    ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
    // Wait for the preview status go into COMPLETED.
    Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {

        @Override
        public PreviewStatus.Status call() throws Exception {
            PreviewStatus status = previewManager.getStatus(previewId);
            return status == null ? null : status.getStatus();
        }
    }, 5, TimeUnit.MINUTES);
    // Get the data for stage "source" in the PreviewStore, should contain two records.
    checkPreviewStore(previewManager, previewId, "source", 2);
    // Get the data for stage "transform" in the PreviewStore, should contain two records.
    checkPreviewStore(previewManager, previewId, "transform", 2);
    // Get the data for stage "sink" in the PreviewStore, should contain two records.
    checkPreviewStore(previewManager, previewId, "sink", 2);
    // Validate the metrics for preview
    validateMetric(2, previewId, "source.records.in", previewManager);
    validateMetric(2, previewId, "source.records.out", previewManager);
    validateMetric(2, previewId, "transform.records.in", previewManager);
    validateMetric(2, previewId, "transform.records.out", previewManager);
    validateMetric(2, previewId, "sink.records.out", previewManager);
    validateMetric(2, previewId, "sink.records.in", previewManager);
    // Check the sink table is not created in the real space.
    DataSetManager<Table> sinkManager = getDataset(sinkTableName);
    Assert.assertNull(sinkManager.get());
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
    Assert.assertNotNull(previewManager.getRunId(previewId));
}
Also used : PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) Table(io.cdap.cdap.api.dataset.table.Table) Schema(io.cdap.cdap.api.data.schema.Schema) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) TimeoutException(java.util.concurrent.TimeoutException) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) PreviewManager(io.cdap.cdap.app.preview.PreviewManager) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PreviewStatus(io.cdap.cdap.app.preview.PreviewStatus) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) PreviewConfig(io.cdap.cdap.proto.artifact.preview.PreviewConfig)

Aggregations

Schema (io.cdap.cdap.api.data.schema.Schema)8 Table (io.cdap.cdap.api.dataset.table.Table)8 PreviewManager (io.cdap.cdap.app.preview.PreviewManager)8 PreviewStatus (io.cdap.cdap.app.preview.PreviewStatus)8 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)8 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)8 PreviewConfig (io.cdap.cdap.proto.artifact.preview.PreviewConfig)8 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)8 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)7 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)7 TimeoutException (java.util.concurrent.TimeoutException)7 JsonElement (com.google.gson.JsonElement)2 ArtifactSelectorConfig (io.cdap.cdap.etl.proto.ArtifactSelectorConfig)2 ConnectionCreationRequest (io.cdap.cdap.etl.proto.connection.ConnectionCreationRequest)2 PluginInfo (io.cdap.cdap.etl.proto.connection.PluginInfo)2 ProgramRunStatus (io.cdap.cdap.proto.ProgramRunStatus)2 ApplicationManager (io.cdap.cdap.test.ApplicationManager)2 WorkflowManager (io.cdap.cdap.test.WorkflowManager)2 IOException (java.io.IOException)2 ExecutionException (java.util.concurrent.ExecutionException)2