use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class ETLWorkerTest method testOneSourceOneSink.
@Test
@Category(SlowTests.class)
public void testOneSourceOneSink() throws Exception {
Schema schema = Schema.recordOf("test", Schema.Field.of("id", Schema.of(Schema.Type.STRING)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
List<StructuredRecord> input = new ArrayList<>();
input.add(StructuredRecord.builder(schema).set("id", "123").set("name", "samuel").build());
input.add(StructuredRecord.builder(schema).set("id", "456").set("name", "jackson").build());
File tmpDir = TMP_FOLDER.newFolder();
ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(input))).addStage(new ETLStage("sink", MockSink.getPlugin(tmpDir))).addConnection("source", "sink").build();
ApplicationId appId = NamespaceId.DEFAULT.app("simpleApp");
AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
workerManager.start();
workerManager.waitForStatus(true, 10, 1);
try {
List<StructuredRecord> written = MockSink.getRecords(tmpDir, 0, 10, TimeUnit.SECONDS);
Assert.assertEquals(input, written);
} finally {
stopWorker(workerManager);
}
validateMetric(2, appId, "source.records.out");
validateMetric(2, appId, "sink.records.in");
}
use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class ETLWorkerTest method testLookup.
@Test
public void testLookup() throws Exception {
addDatasetInstance(KeyValueTable.class.getName(), "lookupTable");
DataSetManager<KeyValueTable> lookupTable = getDataset("lookupTable");
lookupTable.get().write("Bob".getBytes(Charsets.UTF_8), "123".getBytes(Charsets.UTF_8));
lookupTable.flush();
File outDir = TMP_FOLDER.newFolder();
ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", LookupSource.getPlugin(ImmutableSet.of("Bob", "Bill"), "lookupTable"))).addStage(new ETLStage("sink", MockSink.getPlugin(outDir))).addConnection("source", "sink").build();
ApplicationId appId = NamespaceId.DEFAULT.app("lookupTestApp");
AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
workerManager.start();
workerManager.waitForStatus(true, 10, 1);
Schema schema = Schema.recordOf("bobbill", Schema.Field.of("Bob", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("Bill", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
List<StructuredRecord> expected = new ArrayList<>();
expected.add(StructuredRecord.builder(schema).set("Bob", "123").build());
try {
List<StructuredRecord> actual = MockSink.getRecords(outDir, 0, 10, TimeUnit.SECONDS);
Assert.assertEquals(expected, actual);
} finally {
stopWorker(workerManager);
}
validateMetric(1, appId, "source.records.out");
validateMetric(1, appId, "sink.records.in");
}
use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class ETLWorkerTest method testEmptyProperties.
@Test
public void testEmptyProperties() throws Exception {
// Set properties to null to test if ETLTemplate can handle it.
ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(null))).addStage(new ETLStage("sink", MockSink.getPlugin(null))).addConnection("source", "sink").setInstances(2).build();
ApplicationId appId = NamespaceId.DEFAULT.app("emptyTest");
AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationManager appManager = deployApplication(appId, appRequest);
Assert.assertNotNull(appManager);
WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
workerManager.start();
workerManager.waitForStatus(true, 10, 1);
try {
Assert.assertEquals(2, workerManager.getInstances());
} finally {
stopWorker(workerManager);
}
}
use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class ETLRealtimeConfigTest method testUpgrade.
@Test
public void testUpgrade() throws Exception {
final ArtifactSelectorConfig artifact = new ArtifactSelectorConfig("SYSTEM", "universal", "1.0.0");
ETLStage source = new ETLStage("source", new Plugin("DataGenerator", ImmutableMap.of("p1", "v1"), artifact), "errorDS");
co.cask.cdap.etl.proto.v2.ETLStage sourceNew = from(source, RealtimeSource.PLUGIN_TYPE);
ETLStage transform1 = new ETLStage("transform1", new Plugin("Script", ImmutableMap.of("script", "something"), null));
co.cask.cdap.etl.proto.v2.ETLStage transform1New = from(transform1, Transform.PLUGIN_TYPE);
ETLStage transform2 = new ETLStage("transform2", new Plugin("Script", null, null));
co.cask.cdap.etl.proto.v2.ETLStage transform2New = from(transform2, Transform.PLUGIN_TYPE);
ETLStage transform3 = new ETLStage("transform3", new Plugin("Validator", ImmutableMap.of("p1", "v1", "p2", "v2")), "errorDS");
co.cask.cdap.etl.proto.v2.ETLStage transform3New = from(transform3, Transform.PLUGIN_TYPE);
ETLStage sink1 = new ETLStage("sink1", new Plugin("Table", ImmutableMap.of("rowkey", "xyz"), artifact), null);
co.cask.cdap.etl.proto.v2.ETLStage sink1New = from(sink1, RealtimeSink.PLUGIN_TYPE);
ETLStage sink2 = new ETLStage("sink2", new Plugin("HDFS", ImmutableMap.of("name", "abc"), artifact), null);
co.cask.cdap.etl.proto.v2.ETLStage sink2New = from(sink2, RealtimeSink.PLUGIN_TYPE);
Set<Connection> connections = new HashSet<>();
connections.add(new Connection(sourceNew.getName(), transform1New.getName()));
connections.add(new Connection(transform1New.getName(), transform2New.getName()));
connections.add(new Connection(transform2New.getName(), transform3New.getName()));
connections.add(new Connection(transform3New.getName(), sink1New.getName()));
connections.add(new Connection(transform3New.getName(), sink2New.getName()));
Resources resources = new Resources(1024, 1);
ETLRealtimeConfig config = ETLRealtimeConfig.builder().setInstances(1).setSource(source).addSink(sink1).addSink(sink2).addTransform(transform1).addTransform(transform2).addTransform(transform3).addConnections(connections).setResources(resources).build();
co.cask.cdap.etl.proto.v2.ETLRealtimeConfig configNew = co.cask.cdap.etl.proto.v2.ETLRealtimeConfig.builder().setInstances(1).addStage(sourceNew).addStage(sink1New).addStage(sink2New).addStage(transform1New).addStage(transform2New).addStage(transform3New).addConnections(connections).setResources(resources).build();
Assert.assertEquals(configNew, config.upgrade(new UpgradeContext() {
@Nullable
@Override
public ArtifactSelectorConfig getPluginArtifact(String pluginType, String pluginName) {
return null;
}
}));
}
use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class DataPipelineTest method testExternalDatasetTracking.
private void testExternalDatasetTracking(Engine engine, boolean backwardsCompatible) throws Exception {
String suffix = engine.name() + (backwardsCompatible ? "-bc" : "");
// Define input/output datasets
String expectedExternalDatasetInput = "fileInput-" + suffix;
String expectedExternalDatasetOutput = "fileOutput-" + suffix;
// Define input/output directories
File inputDir = TMP_FOLDER.newFolder("input-" + suffix);
String inputFile = "input-file1.txt";
File outputDir = TMP_FOLDER.newFolder("output-" + suffix);
File outputSubDir1 = new File(outputDir, "subdir1");
File outputSubDir2 = new File(outputDir, "subdir2");
if (!backwardsCompatible) {
// Assert that there are no external datasets
Assert.assertNull(getDataset(NamespaceId.DEFAULT.dataset(expectedExternalDatasetInput)).get());
Assert.assertNull(getDataset(NamespaceId.DEFAULT.dataset(expectedExternalDatasetOutput)).get());
}
ETLBatchConfig.Builder builder = ETLBatchConfig.builder("* * * * *");
ETLBatchConfig etlConfig = builder.setEngine(engine).addStage(new ETLStage("source", MockExternalSource.getPlugin(expectedExternalDatasetInput, inputDir.getAbsolutePath()))).addStage(new ETLStage("sink1", MockExternalSink.getPlugin(backwardsCompatible ? null : expectedExternalDatasetOutput, "dir1", outputSubDir1.getAbsolutePath()))).addStage(new ETLStage("sink2", MockExternalSink.getPlugin(backwardsCompatible ? null : expectedExternalDatasetOutput, "dir2", outputSubDir2.getAbsolutePath()))).addConnection("source", "sink1").addConnection("source", "sink2").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("ExternalDatasetApp-" + suffix);
ApplicationManager appManager = deployApplication(appId, appRequest);
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(schema).set("name", "jane").build();
ImmutableList<StructuredRecord> allInput = ImmutableList.of(recordSamuel, recordBob, recordJane);
// Create input files
MockExternalSource.writeInput(new File(inputDir, inputFile).getAbsolutePath(), allInput);
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
List<RunRecord> history = workflowManager.getHistory();
// there should be only one completed run
Assert.assertEquals(1, history.size());
Assert.assertEquals(ProgramRunStatus.COMPLETED, history.get(0).getStatus());
// Assert output
Assert.assertEquals(allInput, MockExternalSink.readOutput(outputSubDir1.getAbsolutePath()));
Assert.assertEquals(allInput, MockExternalSink.readOutput(outputSubDir2.getAbsolutePath()));
if (!backwardsCompatible) {
// Assert that external datasets got created
Assert.assertNotNull(getDataset(NamespaceId.DEFAULT.dataset(expectedExternalDatasetInput)).get());
Assert.assertNotNull(getDataset(NamespaceId.DEFAULT.dataset(expectedExternalDatasetOutput)).get());
}
}
Aggregations