use of co.cask.cdap.proto.artifact.AppRequest in project cdap by caskdata.
the class DataPipelineTest method testExternalSparkProgramPipelines.
@Test
public void testExternalSparkProgramPipelines() throws Exception {
File testDir = TMP_FOLDER.newFolder("sparkProgramTest");
File input = new File(testDir, "poem.txt");
try (PrintWriter writer = new PrintWriter(input.getAbsolutePath())) {
writer.println("this");
writer.println("is");
writer.println("a");
writer.println("poem");
writer.println("it");
writer.println("is");
writer.println("a");
writer.println("bad");
writer.println("poem");
}
File wordCountOutput = new File(testDir, "poem_counts");
File filterOutput = new File(testDir, "poem_filtered");
String args = String.format("%s %s", input.getAbsolutePath(), wordCountOutput.getAbsolutePath());
Map<String, String> wordCountProperties = ImmutableMap.of("program.args", args);
Map<String, String> filterProperties = ImmutableMap.of("inputPath", input.getAbsolutePath(), "outputPath", filterOutput.getAbsolutePath(), "filterStr", "bad");
ETLBatchConfig etlConfig = co.cask.cdap.etl.proto.v2.ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("wordcount", new ETLPlugin(WORDCOUNT_PLUGIN, SPARK_TYPE, wordCountProperties, null))).addStage(new ETLStage("filter", new ETLPlugin(FILTER_PLUGIN, SPARK_TYPE, filterProperties, null))).addConnection("wordcount", "filter").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("sparkProgramTest");
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
manager.start();
manager.waitForRun(ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
// check wordcount output
/*
this is a poem
it is a bad poem
*/
Map<String, Integer> expected = new HashMap<>();
expected.put("this", 1);
expected.put("is", 2);
expected.put("a", 2);
expected.put("poem", 2);
expected.put("it", 1);
expected.put("bad", 1);
Map<String, Integer> counts = new HashMap<>();
File[] files = wordCountOutput.listFiles();
Assert.assertNotNull("No output files for wordcount found.", files);
for (File file : files) {
String fileName = file.getName();
if (fileName.startsWith(".") || fileName.equals("_SUCCESS")) {
continue;
}
try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
String line;
while ((line = reader.readLine()) != null) {
String[] fields = line.split(" ");
counts.put(fields[0], Integer.parseInt(fields[1]));
}
}
}
Assert.assertEquals(expected, counts);
// check filter output
files = filterOutput.listFiles();
Assert.assertNotNull("No output files for filter program found.", files);
List<String> expectedLines = ImmutableList.of("this", "is", "a", "poem", "it", "is", "a", "poem");
List<String> actualLines = new ArrayList<>();
for (File file : files) {
String fileName = file.getName();
if (fileName.startsWith(".") || fileName.equals("_SUCCESS")) {
continue;
}
try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
String line;
while ((line = reader.readLine()) != null) {
actualLines.add(line);
}
}
}
Assert.assertEquals(expectedLines, actualLines);
}
use of co.cask.cdap.proto.artifact.AppRequest in project cdap by caskdata.
the class DataPipelineTest method testNoMacroMapReduce.
/**
* Tests that if no macro is provided to the dataset name property, datasets will be created at config time.
*/
@Test
public void testNoMacroMapReduce() throws Exception {
/*
* Trivial MapReduce pipeline from batch source to batch sink.
*
* source --------- sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockRuntimeDatasetSource.getPlugin("mrinput", "configTimeMockSourceDataset"))).addStage(new ETLStage("sink", MockRuntimeDatasetSink.getPlugin("mroutput", "configTimeMockSinkDataset"))).addConnection("source", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("MRApp");
ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
// set runtime arguments for macro substitution
Map<String, String> runtimeArguments = ImmutableMap.of("runtime", "mockRuntime", "sink", "SinkDataset", "source", "Source", "runtimeSource", "mockRuntimeSourceDataset");
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
// make sure the datasets were created at configure time
Assert.assertNotNull(getDataset("configTimeMockSourceDataset").get());
Assert.assertNotNull(getDataset("configTimeMockSinkDataset").get());
workflowManager.setRuntimeArgs(runtimeArguments);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
}
use of co.cask.cdap.proto.artifact.AppRequest in project cdap by caskdata.
the class DataPipelineTest method testSequentialAggregators.
private void testSequentialAggregators(Engine engine) throws Exception {
String sourceName = "linearAggInput-" + engine.name();
String sinkName = "linearAggOutput-" + engine.name();
/*
* source --> filter1 --> aggregator1 --> aggregator2 --> filter2 --> sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").setEngine(engine).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addStage(new ETLStage("filter1", StringValueFilterTransform.getPlugin("name", "bob"))).addStage(new ETLStage("filter2", StringValueFilterTransform.getPlugin("name", "jane"))).addStage(new ETLStage("aggregator1", IdentityAggregator.getPlugin())).addStage(new ETLStage("aggregator2", IdentityAggregator.getPlugin())).addConnection("source", "filter1").addConnection("filter1", "aggregator1").addConnection("aggregator1", "aggregator2").addConnection("aggregator2", "filter2").addConnection("filter2", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("LinearAggApp-" + engine);
ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(schema).set("name", "jane").build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceName));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check output
DataSetManager<Table> sinkManager = getDataset(sinkName);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(3, appId, "source.records.out");
validateMetric(3, appId, "filter1.records.in");
validateMetric(2, appId, "filter1.records.out");
validateMetric(2, appId, "aggregator1.records.in");
validateMetric(2, appId, "aggregator1.records.out");
validateMetric(2, appId, "aggregator2.records.in");
validateMetric(2, appId, "aggregator2.records.out");
validateMetric(2, appId, "filter2.records.in");
validateMetric(1, appId, "filter2.records.out");
validateMetric(1, appId, "sink.records.out");
}
use of co.cask.cdap.proto.artifact.AppRequest in project cdap by caskdata.
the class ServiceClientTestRun method deployApp.
/**
* Deploy app of given version
*/
private void deployApp(ApplicationId app) throws Exception {
AppRequest createRequest = new AppRequest<>(new ArtifactSummary(artifactId.getArtifact(), artifactId.getVersion()));
appClient.deploy(app, createRequest);
}
use of co.cask.cdap.proto.artifact.AppRequest in project cdap by caskdata.
the class TestFrameworkTestRun method testAppVersionsCreation.
@Test
public void testAppVersionsCreation() throws Exception {
ArtifactId artifactId = new ArtifactId(NamespaceId.DEFAULT.getNamespace(), "cfg-app", "1.0.0-SNAPSHOT");
addAppArtifact(artifactId, ConfigTestApp.class);
ApplicationId appId = new ApplicationId(NamespaceId.DEFAULT.getNamespace(), "AppV1", "version1");
AppRequest<ConfigTestApp.ConfigClass> createRequest = new AppRequest<>(new ArtifactSummary(artifactId.getArtifact(), artifactId.getVersion()), new ConfigTestApp.ConfigClass("tS1", "tD1", "tV1"));
ApplicationManager appManager = deployApplication(appId, createRequest);
ServiceManager serviceManager = appManager.getServiceManager(ConfigTestApp.SERVICE_NAME);
serviceManager.start();
URL serviceURL = serviceManager.getServiceURL();
Gson gson = new Gson();
Assert.assertEquals("tV1", gson.fromJson(callServiceGet(serviceURL, "ping"), String.class));
serviceManager.stop();
appId = new ApplicationId(NamespaceId.DEFAULT.getNamespace(), "AppV1", "version2");
createRequest = new AppRequest<>(new ArtifactSummary(artifactId.getArtifact(), artifactId.getVersion()), new ConfigTestApp.ConfigClass("tS2", "tD2", "tV2"));
appManager = deployApplication(appId, createRequest);
serviceManager = appManager.getServiceManager(ConfigTestApp.SERVICE_NAME);
serviceManager.start();
serviceURL = serviceManager.getServiceURL();
Assert.assertEquals("tV2", gson.fromJson(callServiceGet(serviceURL, "ping"), String.class));
serviceManager.stop();
}
Aggregations