Search in sources :

Example 1 with ETLRealtimeConfig

use of co.cask.cdap.etl.proto.v2.ETLRealtimeConfig in project cdap by caskdata.

the class ETLWorker method configure.

@Override
public void configure() {
    setName(NAME);
    setDescription("Worker Driver for Realtime ETL Pipelines");
    int instances = config.getInstances();
    if (instances < 1) {
        throw new IllegalArgumentException("instances must be greater than 0.");
    }
    setInstances(instances);
    if (config.getResources() != null) {
        setResources(config.getResources());
    }
    PipelineSpecGenerator<ETLRealtimeConfig, PipelineSpec> specGenerator = new RealtimePipelineSpecGenerator(getConfigurer(), ImmutableSet.of(RealtimeSource.PLUGIN_TYPE), ImmutableSet.of(RealtimeSink.PLUGIN_TYPE), Table.class, TableProperties.builder().setSchema(ERROR_SCHEMA).build());
    PipelineSpec spec = specGenerator.generateSpec(config);
    int sourceCount = 0;
    for (StageSpec stageSpec : spec.getStages()) {
        if (RealtimeSource.PLUGIN_TYPE.equals(stageSpec.getPlugin().getType())) {
            sourceCount++;
        }
    }
    if (sourceCount != 1) {
        throw new IllegalArgumentException("Invalid pipeline. There must only be one source.");
    }
    PipelinePlanner planner = new PipelinePlanner(SUPPORTED_PLUGIN_TYPES, ImmutableSet.<String>of(), ImmutableSet.<String>of(), ImmutableSet.<String>of());
    PipelinePlan plan = planner.plan(spec);
    if (plan.getPhases().size() != 1) {
        // should never happen
        throw new IllegalArgumentException("There was an error planning the pipeline. There should only be one phase.");
    }
    PipelinePhase pipeline = plan.getPhases().values().iterator().next();
    Map<String, String> properties = new HashMap<>();
    properties.put(Constants.PIPELINE_SPEC_KEY, GSON.toJson(spec));
    properties.put(Constants.PIPELINEID, GSON.toJson(pipeline));
    // Generate unique id for this app creation.
    properties.put(UNIQUE_ID, String.valueOf(System.currentTimeMillis()));
    properties.put(Constants.STAGE_LOGGING_ENABLED, String.valueOf(config.isStageLoggingEnabled()));
    setProperties(properties);
}
Also used : PipelinePlan(co.cask.cdap.etl.planner.PipelinePlan) PipelinePlanner(co.cask.cdap.etl.planner.PipelinePlanner) HashMap(java.util.HashMap) ETLRealtimeConfig(co.cask.cdap.etl.proto.v2.ETLRealtimeConfig) PipelineSpec(co.cask.cdap.etl.spec.PipelineSpec) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec)

Example 2 with ETLRealtimeConfig

use of co.cask.cdap.etl.proto.v2.ETLRealtimeConfig in project cdap by caskdata.

the class ETLWorkerTest method testOneSourceOneSink.

@Test
@Category(SlowTests.class)
public void testOneSourceOneSink() throws Exception {
    Schema schema = Schema.recordOf("test", Schema.Field.of("id", Schema.of(Schema.Type.STRING)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    List<StructuredRecord> input = new ArrayList<>();
    input.add(StructuredRecord.builder(schema).set("id", "123").set("name", "samuel").build());
    input.add(StructuredRecord.builder(schema).set("id", "456").set("name", "jackson").build());
    File tmpDir = TMP_FOLDER.newFolder();
    ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(input))).addStage(new ETLStage("sink", MockSink.getPlugin(tmpDir))).addConnection("source", "sink").build();
    ApplicationId appId = NamespaceId.DEFAULT.app("simpleApp");
    AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
    workerManager.start();
    workerManager.waitForStatus(true, 10, 1);
    try {
        List<StructuredRecord> written = MockSink.getRecords(tmpDir, 0, 10, TimeUnit.SECONDS);
        Assert.assertEquals(input, written);
    } finally {
        stopWorker(workerManager);
    }
    validateMetric(2, appId, "source.records.out");
    validateMetric(2, appId, "sink.records.in");
}
Also used : WorkerManager(co.cask.cdap.test.WorkerManager) ApplicationManager(co.cask.cdap.test.ApplicationManager) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) ETLRealtimeConfig(co.cask.cdap.etl.proto.v2.ETLRealtimeConfig) ApplicationId(co.cask.cdap.proto.id.ApplicationId) File(java.io.File) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 3 with ETLRealtimeConfig

use of co.cask.cdap.etl.proto.v2.ETLRealtimeConfig in project cdap by caskdata.

the class ETLWorkerTest method testLookup.

@Test
public void testLookup() throws Exception {
    addDatasetInstance(KeyValueTable.class.getName(), "lookupTable");
    DataSetManager<KeyValueTable> lookupTable = getDataset("lookupTable");
    lookupTable.get().write("Bob".getBytes(Charsets.UTF_8), "123".getBytes(Charsets.UTF_8));
    lookupTable.flush();
    File outDir = TMP_FOLDER.newFolder();
    ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", LookupSource.getPlugin(ImmutableSet.of("Bob", "Bill"), "lookupTable"))).addStage(new ETLStage("sink", MockSink.getPlugin(outDir))).addConnection("source", "sink").build();
    ApplicationId appId = NamespaceId.DEFAULT.app("lookupTestApp");
    AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
    workerManager.start();
    workerManager.waitForStatus(true, 10, 1);
    Schema schema = Schema.recordOf("bobbill", Schema.Field.of("Bob", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("Bill", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    List<StructuredRecord> expected = new ArrayList<>();
    expected.add(StructuredRecord.builder(schema).set("Bob", "123").build());
    try {
        List<StructuredRecord> actual = MockSink.getRecords(outDir, 0, 10, TimeUnit.SECONDS);
        Assert.assertEquals(expected, actual);
    } finally {
        stopWorker(workerManager);
    }
    validateMetric(1, appId, "source.records.out");
    validateMetric(1, appId, "sink.records.in");
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) ETLRealtimeConfig(co.cask.cdap.etl.proto.v2.ETLRealtimeConfig) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) WorkerManager(co.cask.cdap.test.WorkerManager) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) ApplicationId(co.cask.cdap.proto.id.ApplicationId) File(java.io.File) Test(org.junit.Test)

Example 4 with ETLRealtimeConfig

use of co.cask.cdap.etl.proto.v2.ETLRealtimeConfig in project cdap by caskdata.

the class ETLWorkerTest method testEmptyProperties.

@Test
public void testEmptyProperties() throws Exception {
    // Set properties to null to test if ETLTemplate can handle it.
    ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(null))).addStage(new ETLStage("sink", MockSink.getPlugin(null))).addConnection("source", "sink").setInstances(2).build();
    ApplicationId appId = NamespaceId.DEFAULT.app("emptyTest");
    AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    Assert.assertNotNull(appManager);
    WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
    workerManager.start();
    workerManager.waitForStatus(true, 10, 1);
    try {
        Assert.assertEquals(2, workerManager.getInstances());
    } finally {
        stopWorker(workerManager);
    }
}
Also used : WorkerManager(co.cask.cdap.test.WorkerManager) ApplicationManager(co.cask.cdap.test.ApplicationManager) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ETLRealtimeConfig(co.cask.cdap.etl.proto.v2.ETLRealtimeConfig) ApplicationId(co.cask.cdap.proto.id.ApplicationId) AppRequest(co.cask.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Example 5 with ETLRealtimeConfig

use of co.cask.cdap.etl.proto.v2.ETLRealtimeConfig in project cdap by caskdata.

the class ETLRealtimeConfigTest method testUpgrade.

@Test
public void testUpgrade() throws Exception {
    final ArtifactSelectorConfig artifact = new ArtifactSelectorConfig("SYSTEM", "universal", "1.0.0");
    ETLStage source = new ETLStage("source", new Plugin("DataGenerator", ImmutableMap.of("p1", "v1"), artifact), "errorDS");
    co.cask.cdap.etl.proto.v2.ETLStage sourceNew = from(source, RealtimeSource.PLUGIN_TYPE);
    ETLStage transform1 = new ETLStage("transform1", new Plugin("Script", ImmutableMap.of("script", "something"), null));
    co.cask.cdap.etl.proto.v2.ETLStage transform1New = from(transform1, Transform.PLUGIN_TYPE);
    ETLStage transform2 = new ETLStage("transform2", new Plugin("Script", null, null));
    co.cask.cdap.etl.proto.v2.ETLStage transform2New = from(transform2, Transform.PLUGIN_TYPE);
    ETLStage transform3 = new ETLStage("transform3", new Plugin("Validator", ImmutableMap.of("p1", "v1", "p2", "v2")), "errorDS");
    co.cask.cdap.etl.proto.v2.ETLStage transform3New = from(transform3, Transform.PLUGIN_TYPE);
    ETLStage sink1 = new ETLStage("sink1", new Plugin("Table", ImmutableMap.of("rowkey", "xyz"), artifact), null);
    co.cask.cdap.etl.proto.v2.ETLStage sink1New = from(sink1, RealtimeSink.PLUGIN_TYPE);
    ETLStage sink2 = new ETLStage("sink2", new Plugin("HDFS", ImmutableMap.of("name", "abc"), artifact), null);
    co.cask.cdap.etl.proto.v2.ETLStage sink2New = from(sink2, RealtimeSink.PLUGIN_TYPE);
    Set<Connection> connections = new HashSet<>();
    connections.add(new Connection(sourceNew.getName(), transform1New.getName()));
    connections.add(new Connection(transform1New.getName(), transform2New.getName()));
    connections.add(new Connection(transform2New.getName(), transform3New.getName()));
    connections.add(new Connection(transform3New.getName(), sink1New.getName()));
    connections.add(new Connection(transform3New.getName(), sink2New.getName()));
    Resources resources = new Resources(1024, 1);
    ETLRealtimeConfig config = ETLRealtimeConfig.builder().setInstances(1).setSource(source).addSink(sink1).addSink(sink2).addTransform(transform1).addTransform(transform2).addTransform(transform3).addConnections(connections).setResources(resources).build();
    co.cask.cdap.etl.proto.v2.ETLRealtimeConfig configNew = co.cask.cdap.etl.proto.v2.ETLRealtimeConfig.builder().setInstances(1).addStage(sourceNew).addStage(sink1New).addStage(sink2New).addStage(transform1New).addStage(transform2New).addStage(transform3New).addConnections(connections).setResources(resources).build();
    Assert.assertEquals(configNew, config.upgrade(new UpgradeContext() {

        @Nullable
        @Override
        public ArtifactSelectorConfig getPluginArtifact(String pluginType, String pluginName) {
            return null;
        }
    }));
}
Also used : ArtifactSelectorConfig(co.cask.cdap.etl.proto.ArtifactSelectorConfig) Connection(co.cask.cdap.etl.proto.Connection) UpgradeContext(co.cask.cdap.etl.proto.UpgradeContext) Resources(co.cask.cdap.api.Resources) ETLPlugin(co.cask.cdap.etl.proto.v2.ETLPlugin) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

ETLRealtimeConfig (co.cask.cdap.etl.proto.v2.ETLRealtimeConfig)6 Test (org.junit.Test)5 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)4 AppRequest (co.cask.cdap.proto.artifact.AppRequest)4 ApplicationId (co.cask.cdap.proto.id.ApplicationId)4 ApplicationManager (co.cask.cdap.test.ApplicationManager)4 WorkerManager (co.cask.cdap.test.WorkerManager)4 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)3 Schema (co.cask.cdap.api.data.schema.Schema)3 File (java.io.File)3 ArrayList (java.util.ArrayList)2 Resources (co.cask.cdap.api.Resources)1 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)1 PipelinePhase (co.cask.cdap.etl.common.PipelinePhase)1 PipelinePlan (co.cask.cdap.etl.planner.PipelinePlan)1 PipelinePlanner (co.cask.cdap.etl.planner.PipelinePlanner)1 ArtifactSelectorConfig (co.cask.cdap.etl.proto.ArtifactSelectorConfig)1 Connection (co.cask.cdap.etl.proto.Connection)1 UpgradeContext (co.cask.cdap.etl.proto.UpgradeContext)1 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)1