use of co.cask.cdap.api.Resources in project cdap by caskdata.
the class MapReduceSpecificationCodec method deserialize.
@Override
public MapReduceSpecification deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException {
JsonObject jsonObj = json.getAsJsonObject();
String className = jsonObj.get("className").getAsString();
String name = jsonObj.get("name").getAsString();
String description = jsonObj.get("description").getAsString();
Resources driverResources = deserializeResources(jsonObj, "driver", context);
Resources mapperResources = deserializeResources(jsonObj, "mapper", context);
Resources reducerResources = deserializeResources(jsonObj, "reducer", context);
JsonElement inputDataSetElem = jsonObj.get("inputDataSet");
String inputDataSet = inputDataSetElem == null ? null : inputDataSetElem.getAsString();
JsonElement outputDataSetElem = jsonObj.get("outputDataSet");
String outputDataSet = outputDataSetElem == null ? null : outputDataSetElem.getAsString();
Set<String> dataSets = deserializeSet(jsonObj.get("datasets"), context, String.class);
Map<String, String> properties = deserializeMap(jsonObj.get("properties"), context, String.class);
return new MapReduceSpecification(className, name, description, inputDataSet, outputDataSet, dataSets, properties, driverResources, mapperResources, reducerResources);
}
use of co.cask.cdap.api.Resources in project cdap by caskdata.
the class DistributedWorkflowProgramRunner method findDriverResources.
/**
* Returns the {@link Resources} requirement for the workflow runnable deduced by Spark
* or MapReduce driver resources requirement.
*/
private Resources findDriverResources(Map<String, SparkSpecification> sparkSpecs, Map<String, MapReduceSpecification> mrSpecs, WorkflowSpecification spec) {
// Find the resource requirements from the workflow with 768MB as minimum.
// It is the largest memory and cores from all Spark and MapReduce programs inside the workflow
Resources resources = new Resources(768);
for (WorkflowNode node : spec.getNodeIdMap().values()) {
if (WorkflowNodeType.ACTION == node.getType()) {
ScheduleProgramInfo programInfo = ((WorkflowActionNode) node).getProgram();
SchedulableProgramType programType = programInfo.getProgramType();
if (programType == SchedulableProgramType.SPARK || programType == SchedulableProgramType.MAPREDUCE) {
// The program spec shouldn't be null, otherwise the Workflow is not valid
Resources driverResources;
if (programType == SchedulableProgramType.SPARK) {
driverResources = sparkSpecs.get(programInfo.getProgramName()).getClientResources();
} else {
driverResources = mrSpecs.get(programInfo.getProgramName()).getDriverResources();
}
if (driverResources != null) {
resources = max(resources, driverResources);
}
}
}
}
return resources;
}
use of co.cask.cdap.api.Resources in project cdap by caskdata.
the class ETLBatchConfigTest method testUpgrade.
@Test
public void testUpgrade() throws Exception {
final ArtifactSelectorConfig artifact = new ArtifactSelectorConfig("SYSTEM", "universal", "1.0.0");
ETLStage source = new ETLStage("DataGenerator", ImmutableMap.of("p1", "v1"), null);
co.cask.cdap.etl.proto.v1.ETLStage sourceNew = new co.cask.cdap.etl.proto.v1.ETLStage("DataGenerator.1", new Plugin(source.getName(), source.getProperties(), artifact), source.getErrorDatasetName());
ETLStage transform1 = new ETLStage("Script", ImmutableMap.of("script", "something"), null);
co.cask.cdap.etl.proto.v1.ETLStage transform1New = new co.cask.cdap.etl.proto.v1.ETLStage("Script.2", new Plugin(transform1.getName(), transform1.getProperties(), artifact), transform1.getErrorDatasetName());
ETLStage transform2 = new ETLStage("Script", null, null);
co.cask.cdap.etl.proto.v1.ETLStage transform2New = new co.cask.cdap.etl.proto.v1.ETLStage("Script.3", new Plugin(transform2.getName(), transform2.getProperties(), artifact), transform2.getErrorDatasetName());
ETLStage transform3 = new ETLStage("Validator", ImmutableMap.of("p1", "v1", "p2", "v2"), "errorDS");
co.cask.cdap.etl.proto.v1.ETLStage transform3New = new co.cask.cdap.etl.proto.v1.ETLStage("Validator.4", new Plugin(transform3.getName(), transform3.getProperties(), artifact), transform3.getErrorDatasetName());
ETLStage sink1 = new ETLStage("Table", ImmutableMap.of("rowkey", "xyz"), null);
co.cask.cdap.etl.proto.v1.ETLStage sink1New = new co.cask.cdap.etl.proto.v1.ETLStage("Table.5", new Plugin(sink1.getName(), sink1.getProperties(), artifact), sink1.getErrorDatasetName());
ETLStage sink2 = new ETLStage("HDFS", ImmutableMap.of("name", "abc"), null);
co.cask.cdap.etl.proto.v1.ETLStage sink2New = new co.cask.cdap.etl.proto.v1.ETLStage("HDFS.6", new Plugin(sink2.getName(), sink2.getProperties(), artifact), sink2.getErrorDatasetName());
ETLStage action = new ETLStage("Email", ImmutableMap.of("email", "slj@example.com"), null);
co.cask.cdap.etl.proto.v1.ETLStage actionNew = new co.cask.cdap.etl.proto.v1.ETLStage("Email.1", new Plugin(action.getName(), action.getProperties(), artifact), action.getErrorDatasetName());
List<Connection> connections = new ArrayList<>();
connections.add(new Connection(sourceNew.getName(), transform1New.getName()));
connections.add(new Connection(transform1New.getName(), transform2New.getName()));
connections.add(new Connection(transform2New.getName(), transform3New.getName()));
connections.add(new Connection(transform3New.getName(), sink1New.getName()));
connections.add(new Connection(transform3New.getName(), sink2New.getName()));
String schedule = "*/5 * * * *";
Resources resources = new Resources(1024, 1);
ETLBatchConfig config = new ETLBatchConfig(schedule, source, ImmutableList.of(sink1, sink2), ImmutableList.of(transform1, transform2, transform3), resources, ImmutableList.of(action));
co.cask.cdap.etl.proto.v1.ETLBatchConfig configNew = co.cask.cdap.etl.proto.v1.ETLBatchConfig.builder(schedule).setSource(sourceNew).addSink(sink1New).addSink(sink2New).addTransform(transform1New).addTransform(transform2New).addTransform(transform3New).addConnections(connections).setResources(resources).setDriverResources(resources).addAction(actionNew).build();
Assert.assertEquals(configNew, config.upgrade(new UpgradeContext() {
@Nullable
@Override
public ArtifactSelectorConfig getPluginArtifact(String pluginType, String pluginName) {
return new ArtifactSelectorConfig(ArtifactScope.SYSTEM.name(), "universal", "1.0.0");
}
}));
}
use of co.cask.cdap.api.Resources in project cdap by caskdata.
the class ETLRealtimeConfigTest method testUpgrade.
@Test
public void testUpgrade() throws Exception {
ETLStage source = new ETLStage("DataGenerator", ImmutableMap.of("p1", "v1"), null);
co.cask.cdap.etl.proto.v1.ETLStage sourceNew = new co.cask.cdap.etl.proto.v1.ETLStage("DataGenerator.1", new Plugin(source.getName(), source.getProperties()), source.getErrorDatasetName());
ETLStage transform1 = new ETLStage("Script", ImmutableMap.of("script", "something"), null);
co.cask.cdap.etl.proto.v1.ETLStage transform1New = new co.cask.cdap.etl.proto.v1.ETLStage("Script.2", new Plugin(transform1.getName(), transform1.getProperties()), transform1.getErrorDatasetName());
ETLStage transform2 = new ETLStage("Script", null, null);
co.cask.cdap.etl.proto.v1.ETLStage transform2New = new co.cask.cdap.etl.proto.v1.ETLStage("Script.3", new Plugin(transform2.getName(), transform2.getProperties()), transform2.getErrorDatasetName());
ETLStage transform3 = new ETLStage("Validator", ImmutableMap.of("p1", "v1", "p2", "v2"), "errorDS");
co.cask.cdap.etl.proto.v1.ETLStage transform3New = new co.cask.cdap.etl.proto.v1.ETLStage("Validator.4", new Plugin(transform3.getName(), transform3.getProperties()), transform3.getErrorDatasetName());
ETLStage sink1 = new ETLStage("Table", ImmutableMap.of("rowkey", "xyz"), null);
co.cask.cdap.etl.proto.v1.ETLStage sink1New = new co.cask.cdap.etl.proto.v1.ETLStage("Table.5", new Plugin(sink1.getName(), sink1.getProperties()), sink1.getErrorDatasetName());
ETLStage sink2 = new ETLStage("HDFS", ImmutableMap.of("name", "abc"), null);
co.cask.cdap.etl.proto.v1.ETLStage sink2New = new co.cask.cdap.etl.proto.v1.ETLStage("HDFS.6", new Plugin(sink2.getName(), sink2.getProperties()), sink2.getErrorDatasetName());
List<Connection> connections = new ArrayList<>();
connections.add(new Connection(sourceNew.getName(), transform1New.getName()));
connections.add(new Connection(transform1New.getName(), transform2New.getName()));
connections.add(new Connection(transform2New.getName(), transform3New.getName()));
connections.add(new Connection(transform3New.getName(), sink1New.getName()));
connections.add(new Connection(transform3New.getName(), sink2New.getName()));
Resources resources = new Resources(1024, 1);
ETLRealtimeConfig config = new ETLRealtimeConfig(1, source, ImmutableList.of(sink1, sink2), ImmutableList.of(transform1, transform2, transform3), resources);
co.cask.cdap.etl.proto.v1.ETLRealtimeConfig configNew = co.cask.cdap.etl.proto.v1.ETLRealtimeConfig.builder().setInstances(1).setSource(sourceNew).addSink(sink1New).addSink(sink2New).addTransform(transform1New).addTransform(transform2New).addTransform(transform3New).addConnections(connections).setResources(resources).build();
Assert.assertEquals(configNew, config.upgrade(new UpgradeContext() {
@Nullable
@Override
public ArtifactSelectorConfig getPluginArtifact(String pluginType, String pluginName) {
return null;
}
}));
}
use of co.cask.cdap.api.Resources in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testGenerateSpec.
@Test
public void testGenerateSpec() {
/*
* ---- t1 ------------
* | | |
* source --- | |--- t3 --- sink1
* | | |
* ------------ t2 --------------- sink2
* | |
* | |
* -------------------------
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_A)).addStage(new ETLStage("t3", MOCK_TRANSFORM_B)).addConnection("source", "t1").addConnection("source", "t2").addConnection("source", "sink2").addConnection("t1", "t2").addConnection("t1", "t3").addConnection("t1", "sink2").addConnection("t2", "sink2").addConnection("t2", "t3").addConnection("t3", "sink1").build();
// test the spec generated is correct, with the right input and output schemas and artifact information.
BatchPipelineSpec actual = specGenerator.generateSpec(etlConfig);
Map<String, String> emptyMap = ImmutableMap.of();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).setOutputSchema(SCHEMA_A).addOutputs("t1", "t2", "sink2").build()).addStage(StageSpec.builder("sink1", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("t3", SCHEMA_B).addInputs("t3").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("sink2", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A, "source", SCHEMA_A)).addInputs("t1", "t2", "source").setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("t1", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).setOutputSchema(SCHEMA_A).addInputs("source").addOutputs("t2", "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t2", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("source", SCHEMA_A, "t1", SCHEMA_A)).setOutputSchema(SCHEMA_A).addInputs("source", "t1").addOutputs("t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t3", new PluginSpec(Transform.PLUGIN_TYPE, "mockB", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A)).setOutputSchema(SCHEMA_B).addInputs("t1", "t2").addOutputs("sink1").setErrorSchema(SCHEMA_A).build()).addConnections(etlConfig.getConnections()).setResources(etlConfig.getResources()).setDriverResources(new Resources(1024, 1)).setClientResources(new Resources(1024, 1)).setStageLoggingEnabled(etlConfig.isStageLoggingEnabled()).build();
Assert.assertEquals(expected, actual);
}
Aggregations