use of io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping in project cdap by caskdata.
the class DataPipelineTest method deployPipelineWithSchedule.
private WorkflowManager deployPipelineWithSchedule(String pipelineName, Engine engine, String triggeringPipelineName, ArgumentMapping key1Mapping, String expectedKey1Value, PluginPropertyMapping key2Mapping, String expectedKey2Value) throws Exception {
String tableName = "actionScheduleTable" + pipelineName + engine;
String sourceName = "macroActionWithScheduleInput-" + pipelineName + engine;
String sinkName = "macroActionWithScheduleOutput-" + pipelineName + engine;
String key1 = key1Mapping.getTarget();
String key2 = key2Mapping.getTarget();
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("action1", MockAction.getPlugin(tableName, "row1", "column1", String.format("${%s}", key1)))).addStage(new ETLStage("action2", MockAction.getPlugin(tableName, "row2", "column2", String.format("${%s}", key2)))).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter1", StringValueFilterTransform.getPlugin("name", String.format("${%s}", key1)))).addStage(new ETLStage("filter2", StringValueFilterTransform.getPlugin("name", String.format("${%s}", key2)))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("action1", "action2").addConnection("action2", "source").addConnection("source", "filter1").addConnection("filter1", "filter2").addConnection("filter2", "sink").setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app(pipelineName);
ApplicationManager appManager = deployApplication(appId, appRequest);
// there should be only two programs - one workflow and one mapreduce/spark
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
// Use the expectedKey1Value and expectedKey2Value as values for two records, so that Only record "samuel"
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordKey1Value = StructuredRecord.builder(schema).set("name", expectedKey1Value).build();
StructuredRecord recordKey2Value = StructuredRecord.builder(schema).set("name", expectedKey2Value).build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(sourceName);
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordKey1Value, recordKey2Value));
String defaultNamespace = NamespaceId.DEFAULT.getNamespace();
// Use properties from the triggering pipeline as values for runtime argument key1, key2
TriggeringPropertyMapping propertyMapping = new TriggeringPropertyMapping(ImmutableList.of(key1Mapping), ImmutableList.of(key2Mapping));
ProgramStatusTrigger completeTrigger = new ProgramStatusTrigger(new WorkflowId(defaultNamespace, triggeringPipelineName, SmartWorkflow.NAME), ImmutableSet.of(ProgramStatus.COMPLETED));
ScheduleId scheduleId = appId.schedule("completeSchedule");
appManager.addSchedule(new ScheduleDetail(scheduleId.getNamespace(), scheduleId.getApplication(), scheduleId.getVersion(), scheduleId.getSchedule(), "", new ScheduleProgramInfo(SchedulableProgramType.WORKFLOW, SmartWorkflow.NAME), ImmutableMap.of(SmartWorkflow.TRIGGERING_PROPERTIES_MAPPING, GSON.toJson(propertyMapping)), completeTrigger, ImmutableList.<Constraint>of(), Schedulers.JOB_QUEUE_TIMEOUT_MILLIS, null));
appManager.enableSchedule(scheduleId);
return appManager.getWorkflowManager(SmartWorkflow.NAME);
}
use of io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping in project cdap by caskdata.
the class DataPipelineTest method deployPipelineWithSchedule.
private WorkflowManager deployPipelineWithSchedule(String pipelineName, Engine engine, String triggeringPipelineName, ArgumentMapping key1Mapping, String expectedKey1Value, PluginPropertyMapping key2Mapping, String expectedKey2Value) throws Exception {
String tableName = "actionScheduleTable" + pipelineName + engine;
String sourceName = "macroActionWithScheduleInput-" + pipelineName + engine;
String sinkName = "macroActionWithScheduleOutput-" + pipelineName + engine;
String key1 = key1Mapping.getTarget();
String key2 = key2Mapping.getTarget();
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("action1", MockAction.getPlugin(tableName, "row1", "column1", String.format("${%s}", key1)))).addStage(new ETLStage("action2", MockAction.getPlugin(tableName, "row2", "column2", String.format("${%s}", key2)))).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter1", StringValueFilterTransform.getPlugin("name", String.format("${%s}", key1)))).addStage(new ETLStage("filter2", StringValueFilterTransform.getPlugin("name", String.format("${%s}", key2)))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("action1", "action2").addConnection("action2", "source").addConnection("source", "filter1").addConnection("filter1", "filter2").addConnection("filter2", "sink").setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app(pipelineName);
ApplicationManager appManager = deployApplication(appId, appRequest);
// there should be only two programs - one workflow and one mapreduce/spark
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
// Use the expectedKey1Value and expectedKey2Value as values for two records, so that Only record "samuel"
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordKey1Value = StructuredRecord.builder(schema).set("name", expectedKey1Value).build();
StructuredRecord recordKey2Value = StructuredRecord.builder(schema).set("name", expectedKey2Value).build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(sourceName);
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordKey1Value, recordKey2Value));
String defaultNamespace = NamespaceId.DEFAULT.getNamespace();
// Use properties from the triggering pipeline as values for runtime argument key1, key2
TriggeringPropertyMapping propertyMapping = new TriggeringPropertyMapping(ImmutableList.of(key1Mapping), ImmutableList.of(key2Mapping));
ProgramStatusTrigger completeTrigger = new ProgramStatusTrigger(new WorkflowId(defaultNamespace, triggeringPipelineName, SmartWorkflow.NAME), ImmutableSet.of(ProgramStatus.COMPLETED));
ScheduleId scheduleId = appId.schedule("completeSchedule");
appManager.addSchedule(new ScheduleDetail(scheduleId.getNamespace(), scheduleId.getApplication(), scheduleId.getVersion(), scheduleId.getSchedule(), "", new ScheduleProgramInfo(SchedulableProgramType.WORKFLOW, SmartWorkflow.NAME), ImmutableMap.of(SmartWorkflow.TRIGGERING_PROPERTIES_MAPPING, GSON.toJson(propertyMapping)), completeTrigger, ImmutableList.of(), Schedulers.JOB_QUEUE_TIMEOUT_MILLIS, null, null));
appManager.enableSchedule(scheduleId);
return appManager.getWorkflowManager(SmartWorkflow.NAME);
}
use of io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping in project cdap by caskdata.
the class SmartWorkflow method updateTokenWithTriggeringProperties.
private void updateTokenWithTriggeringProperties(TriggeringScheduleInfo scheduleInfo, TriggeringPropertyMapping propertiesMapping, WorkflowToken token) {
List<ProgramStatusTriggerInfo> programStatusTriggerInfos = new ArrayList<>();
for (TriggerInfo info : scheduleInfo.getTriggerInfos()) {
if (info instanceof ProgramStatusTriggerInfo) {
programStatusTriggerInfos.add((ProgramStatusTriggerInfo) info);
}
}
// If no ProgramStatusTriggerInfo, no need of override the existing runtimeArgs
if (programStatusTriggerInfos.isEmpty()) {
return;
}
// Currently only expecting one trigger in a schedule
ProgramStatusTriggerInfo triggerInfo = programStatusTriggerInfos.get(0);
BasicArguments triggeringArguments = new BasicArguments(triggerInfo.getWorkflowToken(), triggerInfo.getRuntimeArguments());
// Get the value of every triggering pipeline arguments specified in the propertiesMapping and update newRuntimeArgs
List<ArgumentMapping> argumentMappings = propertiesMapping.getArguments();
for (ArgumentMapping mapping : argumentMappings) {
String sourceKey = mapping.getSource();
if (sourceKey == null) {
LOG.warn("The name of argument from the triggering pipeline cannot be null, " + "skip this argument mapping: '{}'.", mapping);
continue;
}
String value = triggeringArguments.get(sourceKey);
if (value == null) {
LOG.warn("Runtime argument '{}' is not found in run '{}' of the triggering pipeline '{}' " + "in namespace '{}' ", sourceKey, triggerInfo.getRunId(), triggerInfo.getApplicationName(), triggerInfo.getNamespace());
continue;
}
// Use the argument name in the triggering pipeline if target is not specified
String targetKey = mapping.getTarget() == null ? sourceKey : mapping.getTarget();
token.put(targetKey, value);
}
// Get the resolved plugin properties map from triggering pipeline's workflow token in triggeringArguments
Map<String, Map<String, String>> resolvedProperties = GSON.fromJson(triggeringArguments.get(RESOLVED_PLUGIN_PROPERTIES_MAP), STAGE_PROPERTIES_MAP);
for (PluginPropertyMapping mapping : propertiesMapping.getPluginProperties()) {
String stageName = mapping.getStageName();
if (stageName == null) {
LOG.warn("The name of the stage cannot be null in plugin property mapping, skip this mapping: '{}'.", mapping);
continue;
}
Map<String, String> pluginProperties = resolvedProperties.get(stageName);
if (pluginProperties == null) {
LOG.warn("No plugin properties can be found with stage name '{}' in triggering pipeline '{}' " + "in namespace '{}' ", mapping.getStageName(), triggerInfo.getApplicationName(), triggerInfo.getNamespace());
continue;
}
String sourceKey = mapping.getSource();
if (sourceKey == null) {
LOG.warn("The name of argument from the triggering pipeline cannot be null, " + "skip this argument mapping: '{}'.", mapping);
continue;
}
String value = pluginProperties.get(sourceKey);
if (value == null) {
LOG.warn("No property with name '{}' can be found in plugin '{}' of the triggering pipeline '{}' " + "in namespace '{}' ", sourceKey, stageName, triggerInfo.getApplicationName(), triggerInfo.getNamespace());
continue;
}
// Use the argument name in the triggering pipeline if target is not specified
String targetKey = mapping.getTarget() == null ? sourceKey : mapping.getTarget();
token.put(targetKey, value);
}
}
use of io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping in project cdap by caskdata.
the class SmartWorkflow method initialize.
@Override
public void initialize(WorkflowContext context) throws Exception {
super.initialize(context);
TriggeringScheduleInfo scheduleInfo = context.getTriggeringScheduleInfo();
if (scheduleInfo != null) {
String propertiesMappingString = scheduleInfo.getProperties().get(TRIGGERING_PROPERTIES_MAPPING);
if (propertiesMappingString != null) {
TriggeringPropertyMapping propertiesMapping = GSON.fromJson(propertiesMappingString, TriggeringPropertyMapping.class);
updateTokenWithTriggeringProperties(scheduleInfo, propertiesMapping, context.getToken());
}
}
PipelineRuntime pipelineRuntime = new PipelineRuntime(context, workflowMetrics);
WRAPPERLOGGER.info("Pipeline '{}' is started by user '{}' with arguments {}", context.getApplicationSpecification().getName(), UserGroupInformation.getCurrentUser().getShortUserName(), pipelineRuntime.getArguments().asMap());
alertPublishers = new HashMap<>();
postActions = new LinkedHashMap<>();
spec = GSON.fromJson(context.getWorkflowSpecification().getProperty(Constants.PIPELINE_SPEC_KEY), BatchPipelineSpec.class);
stageSpecs = new HashMap<>();
MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context.getNamespace());
PluginContext pluginContext = new PipelinePluginContext(context, workflowMetrics, spec.isStageLoggingEnabled(), spec.isProcessTimingEnabled());
for (ActionSpec actionSpec : spec.getEndingActions()) {
String stageName = actionSpec.getName();
postActions.put(stageName, (PostAction) pluginContext.newPluginInstance(stageName, macroEvaluator));
stageSpecs.put(stageName, StageSpec.builder(stageName, actionSpec.getPluginSpec()).setStageLoggingEnabled(spec.isStageLoggingEnabled()).setProcessTimingEnabled(spec.isProcessTimingEnabled()).build());
}
for (StageSpec stageSpec : spec.getStages()) {
String stageName = stageSpec.getName();
stageSpecs.put(stageName, stageSpec);
if (AlertPublisher.PLUGIN_TYPE.equals(stageSpec.getPluginType())) {
AlertPublisher alertPublisher = context.newPluginInstance(stageName, macroEvaluator);
alertPublishers.put(stageName, alertPublisher);
}
}
WRAPPERLOGGER.info("Pipeline '{}' running", context.getApplicationSpecification().getName());
}
use of io.cdap.cdap.etl.proto.v2.TriggeringPropertyMapping in project cdap by caskdata.
the class SmartWorkflow method initialize.
@Override
public void initialize(WorkflowContext context) throws Exception {
super.initialize(context);
context.enableFieldLineageConsolidation();
TriggeringScheduleInfo scheduleInfo = context.getTriggeringScheduleInfo();
if (scheduleInfo != null) {
String propertiesMappingString = scheduleInfo.getProperties().get(TRIGGERING_PROPERTIES_MAPPING);
if (propertiesMappingString != null) {
TriggeringPropertyMapping propertiesMapping = GSON.fromJson(propertiesMappingString, TriggeringPropertyMapping.class);
updateTokenWithTriggeringProperties(scheduleInfo, propertiesMapping, context.getToken());
}
}
PipelineRuntime pipelineRuntime = new PipelineRuntime(context, workflowMetrics);
WRAPPERLOGGER.info("Pipeline '{}' is started by user '{}' with arguments {}", context.getApplicationSpecification().getName(), UserGroupInformation.getCurrentUser().getShortUserName(), pipelineRuntime.getArguments().asMap());
alertPublishers = new HashMap<>();
postActions = new LinkedHashMap<>();
spec = GSON.fromJson(context.getWorkflowSpecification().getProperty(Constants.PIPELINE_SPEC_KEY), BatchPipelineSpec.class);
stageSpecs = new HashMap<>();
MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace());
PluginContext pluginContext = new PipelinePluginContext(context, workflowMetrics, spec.isStageLoggingEnabled(), spec.isProcessTimingEnabled());
for (ActionSpec actionSpec : spec.getEndingActions()) {
String stageName = actionSpec.getName();
postActions.put(stageName, pluginContext.newPluginInstance(stageName, macroEvaluator));
stageSpecs.put(stageName, StageSpec.builder(stageName, actionSpec.getPluginSpec()).setStageLoggingEnabled(spec.isStageLoggingEnabled()).setProcessTimingEnabled(spec.isProcessTimingEnabled()).setMaxPreviewRecords(spec.getNumOfRecordsPreview()).build());
}
for (StageSpec stageSpec : spec.getStages()) {
String stageName = stageSpec.getName();
stageSpecs.put(stageName, stageSpec);
if (AlertPublisher.PLUGIN_TYPE.equals(stageSpec.getPluginType())) {
AlertPublisher alertPublisher = context.newPluginInstance(stageName, macroEvaluator);
alertPublishers.put(stageName, alertPublisher);
}
}
WRAPPERLOGGER.info("Pipeline '{}' running", context.getApplicationSpecification().getName());
}
Aggregations