Search in sources :

Example 21 with Resources

use of co.cask.cdap.api.Resources in project cdap by caskdata.

the class MapReduceSpecificationCodec method deserialize.

@Override
public MapReduceSpecification deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
    JsonObject jsonObj = json.getAsJsonObject();
    String className = jsonObj.get("className").getAsString();
    String name = jsonObj.get("name").getAsString();
    String description = jsonObj.get("description").getAsString();
    Resources driverResources = deserializeResources(jsonObj, "driver", context);
    Resources mapperResources = deserializeResources(jsonObj, "mapper", context);
    Resources reducerResources = deserializeResources(jsonObj, "reducer", context);
    JsonElement inputDataSetElem = jsonObj.get("inputDataSet");
    String inputDataSet = inputDataSetElem == null ? null : inputDataSetElem.getAsString();
    JsonElement outputDataSetElem = jsonObj.get("outputDataSet");
    String outputDataSet = outputDataSetElem == null ? null : outputDataSetElem.getAsString();
    Set<String> dataSets = deserializeSet(jsonObj.get("datasets"), context, String.class);
    Map<String, String> properties = deserializeMap(jsonObj.get("properties"), context, String.class);
    return new MapReduceSpecification(className, name, description, inputDataSet, outputDataSet, dataSets, properties, driverResources, mapperResources, reducerResources);
}
Also used : JsonElement(com.google.gson.JsonElement) MapReduceSpecification(co.cask.cdap.api.mapreduce.MapReduceSpecification) JsonObject(com.google.gson.JsonObject) Resources(co.cask.cdap.api.Resources)

Example 22 with Resources

use of co.cask.cdap.api.Resources in project cdap by caskdata.

the class DistributedWorkflowProgramRunner method findDriverResources.

/**
   * Returns the {@link Resources} requirement for the workflow runnable deduced by Spark
   * or MapReduce driver resources requirement.
   */
private Resources findDriverResources(Map<String, SparkSpecification> sparkSpecs, Map<String, MapReduceSpecification> mrSpecs, WorkflowSpecification spec) {
    // Find the resource requirements from the workflow with 768MB as minimum.
    // It is the largest memory and cores from all Spark and MapReduce programs inside the workflow
    Resources resources = new Resources(768);
    for (WorkflowNode node : spec.getNodeIdMap().values()) {
        if (WorkflowNodeType.ACTION == node.getType()) {
            ScheduleProgramInfo programInfo = ((WorkflowActionNode) node).getProgram();
            SchedulableProgramType programType = programInfo.getProgramType();
            if (programType == SchedulableProgramType.SPARK || programType == SchedulableProgramType.MAPREDUCE) {
                // The program spec shouldn't be null, otherwise the Workflow is not valid
                Resources driverResources;
                if (programType == SchedulableProgramType.SPARK) {
                    driverResources = sparkSpecs.get(programInfo.getProgramName()).getClientResources();
                } else {
                    driverResources = mrSpecs.get(programInfo.getProgramName()).getDriverResources();
                }
                if (driverResources != null) {
                    resources = max(resources, driverResources);
                }
            }
        }
    }
    return resources;
}
Also used : WorkflowActionNode(co.cask.cdap.api.workflow.WorkflowActionNode) SchedulableProgramType(co.cask.cdap.api.schedule.SchedulableProgramType) Resources(co.cask.cdap.api.Resources) ScheduleProgramInfo(co.cask.cdap.api.workflow.ScheduleProgramInfo) WorkflowNode(co.cask.cdap.api.workflow.WorkflowNode)

Example 23 with Resources

use of co.cask.cdap.api.Resources in project cdap by caskdata.

the class ETLBatchConfigTest method testUpgrade.

@Test
public void testUpgrade() throws Exception {
    final ArtifactSelectorConfig artifact = new ArtifactSelectorConfig("SYSTEM", "universal", "1.0.0");
    ETLStage source = new ETLStage("DataGenerator", ImmutableMap.of("p1", "v1"), null);
    co.cask.cdap.etl.proto.v1.ETLStage sourceNew = new co.cask.cdap.etl.proto.v1.ETLStage("DataGenerator.1", new Plugin(source.getName(), source.getProperties(), artifact), source.getErrorDatasetName());
    ETLStage transform1 = new ETLStage("Script", ImmutableMap.of("script", "something"), null);
    co.cask.cdap.etl.proto.v1.ETLStage transform1New = new co.cask.cdap.etl.proto.v1.ETLStage("Script.2", new Plugin(transform1.getName(), transform1.getProperties(), artifact), transform1.getErrorDatasetName());
    ETLStage transform2 = new ETLStage("Script", null, null);
    co.cask.cdap.etl.proto.v1.ETLStage transform2New = new co.cask.cdap.etl.proto.v1.ETLStage("Script.3", new Plugin(transform2.getName(), transform2.getProperties(), artifact), transform2.getErrorDatasetName());
    ETLStage transform3 = new ETLStage("Validator", ImmutableMap.of("p1", "v1", "p2", "v2"), "errorDS");
    co.cask.cdap.etl.proto.v1.ETLStage transform3New = new co.cask.cdap.etl.proto.v1.ETLStage("Validator.4", new Plugin(transform3.getName(), transform3.getProperties(), artifact), transform3.getErrorDatasetName());
    ETLStage sink1 = new ETLStage("Table", ImmutableMap.of("rowkey", "xyz"), null);
    co.cask.cdap.etl.proto.v1.ETLStage sink1New = new co.cask.cdap.etl.proto.v1.ETLStage("Table.5", new Plugin(sink1.getName(), sink1.getProperties(), artifact), sink1.getErrorDatasetName());
    ETLStage sink2 = new ETLStage("HDFS", ImmutableMap.of("name", "abc"), null);
    co.cask.cdap.etl.proto.v1.ETLStage sink2New = new co.cask.cdap.etl.proto.v1.ETLStage("HDFS.6", new Plugin(sink2.getName(), sink2.getProperties(), artifact), sink2.getErrorDatasetName());
    ETLStage action = new ETLStage("Email", ImmutableMap.of("email", "slj@example.com"), null);
    co.cask.cdap.etl.proto.v1.ETLStage actionNew = new co.cask.cdap.etl.proto.v1.ETLStage("Email.1", new Plugin(action.getName(), action.getProperties(), artifact), action.getErrorDatasetName());
    List<Connection> connections = new ArrayList<>();
    connections.add(new Connection(sourceNew.getName(), transform1New.getName()));
    connections.add(new Connection(transform1New.getName(), transform2New.getName()));
    connections.add(new Connection(transform2New.getName(), transform3New.getName()));
    connections.add(new Connection(transform3New.getName(), sink1New.getName()));
    connections.add(new Connection(transform3New.getName(), sink2New.getName()));
    String schedule = "*/5 * * * *";
    Resources resources = new Resources(1024, 1);
    ETLBatchConfig config = new ETLBatchConfig(schedule, source, ImmutableList.of(sink1, sink2), ImmutableList.of(transform1, transform2, transform3), resources, ImmutableList.of(action));
    co.cask.cdap.etl.proto.v1.ETLBatchConfig configNew = co.cask.cdap.etl.proto.v1.ETLBatchConfig.builder(schedule).setSource(sourceNew).addSink(sink1New).addSink(sink2New).addTransform(transform1New).addTransform(transform2New).addTransform(transform3New).addConnections(connections).setResources(resources).setDriverResources(resources).addAction(actionNew).build();
    Assert.assertEquals(configNew, config.upgrade(new UpgradeContext() {

        @Nullable
        @Override
        public ArtifactSelectorConfig getPluginArtifact(String pluginType, String pluginName) {
            return new ArtifactSelectorConfig(ArtifactScope.SYSTEM.name(), "universal", "1.0.0");
        }
    }));
}
Also used : ArtifactSelectorConfig(co.cask.cdap.etl.proto.ArtifactSelectorConfig) ArrayList(java.util.ArrayList) Connection(co.cask.cdap.etl.proto.Connection) UpgradeContext(co.cask.cdap.etl.proto.UpgradeContext) Resources(co.cask.cdap.api.Resources) Plugin(co.cask.cdap.etl.proto.v1.Plugin) Test(org.junit.Test)

Example 24 with Resources

use of co.cask.cdap.api.Resources in project cdap by caskdata.

the class ETLRealtimeConfigTest method testUpgrade.

@Test
public void testUpgrade() throws Exception {
    ETLStage source = new ETLStage("DataGenerator", ImmutableMap.of("p1", "v1"), null);
    co.cask.cdap.etl.proto.v1.ETLStage sourceNew = new co.cask.cdap.etl.proto.v1.ETLStage("DataGenerator.1", new Plugin(source.getName(), source.getProperties()), source.getErrorDatasetName());
    ETLStage transform1 = new ETLStage("Script", ImmutableMap.of("script", "something"), null);
    co.cask.cdap.etl.proto.v1.ETLStage transform1New = new co.cask.cdap.etl.proto.v1.ETLStage("Script.2", new Plugin(transform1.getName(), transform1.getProperties()), transform1.getErrorDatasetName());
    ETLStage transform2 = new ETLStage("Script", null, null);
    co.cask.cdap.etl.proto.v1.ETLStage transform2New = new co.cask.cdap.etl.proto.v1.ETLStage("Script.3", new Plugin(transform2.getName(), transform2.getProperties()), transform2.getErrorDatasetName());
    ETLStage transform3 = new ETLStage("Validator", ImmutableMap.of("p1", "v1", "p2", "v2"), "errorDS");
    co.cask.cdap.etl.proto.v1.ETLStage transform3New = new co.cask.cdap.etl.proto.v1.ETLStage("Validator.4", new Plugin(transform3.getName(), transform3.getProperties()), transform3.getErrorDatasetName());
    ETLStage sink1 = new ETLStage("Table", ImmutableMap.of("rowkey", "xyz"), null);
    co.cask.cdap.etl.proto.v1.ETLStage sink1New = new co.cask.cdap.etl.proto.v1.ETLStage("Table.5", new Plugin(sink1.getName(), sink1.getProperties()), sink1.getErrorDatasetName());
    ETLStage sink2 = new ETLStage("HDFS", ImmutableMap.of("name", "abc"), null);
    co.cask.cdap.etl.proto.v1.ETLStage sink2New = new co.cask.cdap.etl.proto.v1.ETLStage("HDFS.6", new Plugin(sink2.getName(), sink2.getProperties()), sink2.getErrorDatasetName());
    List<Connection> connections = new ArrayList<>();
    connections.add(new Connection(sourceNew.getName(), transform1New.getName()));
    connections.add(new Connection(transform1New.getName(), transform2New.getName()));
    connections.add(new Connection(transform2New.getName(), transform3New.getName()));
    connections.add(new Connection(transform3New.getName(), sink1New.getName()));
    connections.add(new Connection(transform3New.getName(), sink2New.getName()));
    Resources resources = new Resources(1024, 1);
    ETLRealtimeConfig config = new ETLRealtimeConfig(1, source, ImmutableList.of(sink1, sink2), ImmutableList.of(transform1, transform2, transform3), resources);
    co.cask.cdap.etl.proto.v1.ETLRealtimeConfig configNew = co.cask.cdap.etl.proto.v1.ETLRealtimeConfig.builder().setInstances(1).setSource(sourceNew).addSink(sink1New).addSink(sink2New).addTransform(transform1New).addTransform(transform2New).addTransform(transform3New).addConnections(connections).setResources(resources).build();
    Assert.assertEquals(configNew, config.upgrade(new UpgradeContext() {

        @Nullable
        @Override
        public ArtifactSelectorConfig getPluginArtifact(String pluginType, String pluginName) {
            return null;
        }
    }));
}
Also used : ArrayList(java.util.ArrayList) Connection(co.cask.cdap.etl.proto.Connection) UpgradeContext(co.cask.cdap.etl.proto.UpgradeContext) Resources(co.cask.cdap.api.Resources) Plugin(co.cask.cdap.etl.proto.v1.Plugin) Test(org.junit.Test)

Example 25 with Resources

use of co.cask.cdap.api.Resources in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testGenerateSpec.

@Test
public void testGenerateSpec() {
    /*
     *           ---- t1 ------------
     *           |            |      |
     * source ---             |      |--- t3 --- sink1
     *           |            |      |
     *           ------------ t2 --------------- sink2
     *           |                        |
     *           |                        |
     *           -------------------------
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_A)).addStage(new ETLStage("t3", MOCK_TRANSFORM_B)).addConnection("source", "t1").addConnection("source", "t2").addConnection("source", "sink2").addConnection("t1", "t2").addConnection("t1", "t3").addConnection("t1", "sink2").addConnection("t2", "sink2").addConnection("t2", "t3").addConnection("t3", "sink1").build();
    // test the spec generated is correct, with the right input and output schemas and artifact information.
    BatchPipelineSpec actual = specGenerator.generateSpec(etlConfig);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).setOutputSchema(SCHEMA_A).addOutputs("t1", "t2", "sink2").build()).addStage(StageSpec.builder("sink1", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("t3", SCHEMA_B).addInputs("t3").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("sink2", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A, "source", SCHEMA_A)).addInputs("t1", "t2", "source").setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("t1", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).setOutputSchema(SCHEMA_A).addInputs("source").addOutputs("t2", "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t2", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("source", SCHEMA_A, "t1", SCHEMA_A)).setOutputSchema(SCHEMA_A).addInputs("source", "t1").addOutputs("t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t3", new PluginSpec(Transform.PLUGIN_TYPE, "mockB", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A)).setOutputSchema(SCHEMA_B).addInputs("t1", "t2").addOutputs("sink1").setErrorSchema(SCHEMA_A).build()).addConnections(etlConfig.getConnections()).setResources(etlConfig.getResources()).setDriverResources(new Resources(1024, 1)).setClientResources(new Resources(1024, 1)).setStageLoggingEnabled(etlConfig.isStageLoggingEnabled()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) BatchPipelineSpec(co.cask.cdap.etl.batch.BatchPipelineSpec) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) BatchPipelineSpec(co.cask.cdap.etl.batch.BatchPipelineSpec) Resources(co.cask.cdap.api.Resources) Test(org.junit.Test)

Aggregations

Resources (co.cask.cdap.api.Resources)40 JsonObject (com.google.gson.JsonObject)9 Test (org.junit.Test)8 ApplicationSpecification (co.cask.cdap.api.app.ApplicationSpecification)4 Connection (co.cask.cdap.etl.proto.Connection)4 UpgradeContext (co.cask.cdap.etl.proto.UpgradeContext)4 ServiceSpecification (co.cask.cdap.api.service.ServiceSpecification)3 HttpServiceHandlerSpecification (co.cask.cdap.api.service.http.HttpServiceHandlerSpecification)3 ArtifactSelectorConfig (co.cask.cdap.etl.proto.ArtifactSelectorConfig)3 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)3 ArrayList (java.util.ArrayList)3 MapReduceSpecification (co.cask.cdap.api.mapreduce.MapReduceSpecification)2 SchedulableProgramType (co.cask.cdap.api.schedule.SchedulableProgramType)2 ServiceHttpEndpoint (co.cask.cdap.api.service.http.ServiceHttpEndpoint)2 SparkSpecification (co.cask.cdap.api.spark.SparkSpecification)2 WorkerSpecification (co.cask.cdap.api.worker.WorkerSpecification)2 ScheduleProgramInfo (co.cask.cdap.api.workflow.ScheduleProgramInfo)2 WorkflowActionNode (co.cask.cdap.api.workflow.WorkflowActionNode)2 BatchPipelineSpec (co.cask.cdap.etl.batch.BatchPipelineSpec)2 Plugin (co.cask.cdap.etl.proto.v1.Plugin)2