use of org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition in project tbd-studio-se by Talend.
the class DynamicCDPGraphFramesNodeModuleGroup method getModuleGroups.
@Override
public Set<DistributionModuleGroup> getModuleGroups(String distribution, String version) throws Exception {
Set<DistributionModuleGroup> hs = new HashSet<>();
DynamicPluginAdapter pluginAdapter = getPluginAdapter();
String graphFramesMrRequiredRuntimeId = pluginAdapter.getRuntimeModuleGroupIdByTemplateId(DynamicModuleGroupConstant.GRAPHFRAMES_MRREQUIRED_MODULE_GROUP.getModuleName());
checkRuntimeId(graphFramesMrRequiredRuntimeId);
if (StringUtils.isNotBlank(graphFramesMrRequiredRuntimeId)) {
DistributionModuleGroup dmg = new DistributionModuleGroup(graphFramesMrRequiredRuntimeId, true, new SparkBatchLinkedNodeCondition(distribution, version, SparkBatchConstant.SPARK_BATCH_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition());
hs.add(dmg);
}
return hs;
}
use of org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition in project tbd-studio-se by Talend.
the class AbstractDistribution method buildNodeModuleGroups.
protected Map<NodeComponentTypeBean, Set<DistributionModuleGroup>> buildNodeModuleGroups(String distribution, String version) {
Map<NodeComponentTypeBean, Set<DistributionModuleGroup>> result = new HashMap<>();
// Azure
ComponentCondition azureCondition = new SparkStreamingLinkedNodeCondition(distribution, version, SparkBatchConstant.SPARK_BATCH_AZURE_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.AZURE_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, azureCondition, ModuleGroupName.AZURE.get(this.getVersion()), true));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.AZURE_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, azureCondition, ModuleGroupName.AZURE.get(this.getVersion()), true));
// Spark Batch BigQuery
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.BIGQUERY_CONFIG_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.BIGQUERY.get(this.getVersion()), true));
// DynamoDB nodes ...
Set<DistributionModuleGroup> dynamoDBBatchNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.DYNAMODB_BATCH.get(this.getVersion()), true);
Set<DistributionModuleGroup> dynamoDBBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.DYNAMODB_BATCH.get(this.getVersion()), true);
// ... in Spark batch
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DYNAMODB_INPUT_COMPONENT), dynamoDBBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DYNAMODB_OUTPUT_COMPONENT), dynamoDBBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DYNAMODB_CONFIGURATION_COMPONENT), dynamoDBBatchConfigurationModuleGroups);
Set<DistributionModuleGroup> dynamoDBStreamingNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.DYNAMODB_STREAMING.get(this.getVersion()), true);
Set<DistributionModuleGroup> dynamoDBStreamingConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.DYNAMODB_STREAMING.get(this.getVersion()), true);
// ... in Spark streaming
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.DYNAMODB_INPUT_COMPONENT), dynamoDBStreamingNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.DYNAMODB_OUTPUT_COMPONENT), dynamoDBStreamingNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.DYNAMODB_CONFIGURATION_COMPONENT), dynamoDBStreamingConfigurationModuleGroups);
// Spark Streaming Flume nodes
ComponentCondition flumeCondition = new SparkStreamingLinkedNodeCondition(distribution, version, SparkStreamingConstant.FLUME_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.FLUME_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, flumeCondition, ModuleGroupName.FLUME.get(this.getVersion()), true));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.FLUME_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, flumeCondition, ModuleGroupName.FLUME.get(this.getVersion()), true));
// Spark Batch GCS
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.GCS_CONFIG_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.GCS.get(this.getVersion()), true));
// GraphFrames - Spark Batch DQ matching
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.MATCH_PREDICT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.GRAPHFRAMES.get(this.getVersion()), true));
// Spark Streaming Kafka nodes
ComponentCondition kafkaCondition = new SparkStreamingLinkedNodeCondition(distribution, version, SparkStreamingConstant.KAFKA_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KAFKA_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, kafkaCondition, ModuleGroupName.KAFKA.get(this.getVersion()), true));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KAFKA_AVRO_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, kafkaCondition, ModuleGroupName.KAFKA.get(this.getVersion()), true));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KAFKA_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, kafkaCondition, ModuleGroupName.KAFKA.get(this.getVersion()), true));
// Spark Streaming Kinesis nodes
Set<DistributionModuleGroup> kinesisModuleGroups = ModuleGroupsUtils.getStreamingModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.KINESIS.get(this.getVersion()), true);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KINESIS_INPUT_COMPONENT), kinesisModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KINESIS_INPUT_AVRO_COMPONENT), kinesisModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KINESIS_OUTPUT_COMPONENT), kinesisModuleGroups);
// PubSub nodes...
Set<DistributionModuleGroup> pubSubNodeModuleGroups = ModuleGroupsUtils.getStreamingModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PUBSUB.get(this.getVersion()), true);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PUBSUB_INPUT_COMPONENT), pubSubNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PUBSUB_INPUT_AVRO_COMPONENT), pubSubNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PUBSUB_OUTPUT_COMPONENT), pubSubNodeModuleGroups);
// Spark Batch Parquet nodes
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.PARQUET_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.PARQUET_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
// Spark Streaming Parquet nodes
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PARQUET_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PARQUET_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PARQUET_STREAM_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
// Redshift nodes ...
Set<DistributionModuleGroup> redshiftBatchNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.REDSHIFT_BATCH.get(this.getVersion()), true);
Set<DistributionModuleGroup> redshiftBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.REDSHIFT_BATCH.get(this.getVersion()), true);
// ... in Spark batch
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.REDSHIFT_INPUT_COMPONENT), redshiftBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.REDSHIFT_OUTPUT_COMPONENT), redshiftBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.REDSHIFT_CONFIGURATION_COMPONENT), redshiftBatchConfigurationModuleGroups);
Set<DistributionModuleGroup> redshiftStreamingNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.REDSHIFT_STREAMING.get(this.getVersion()), true);
Set<DistributionModuleGroup> redshiftStreamingConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.REDSHIFT_STREAMING.get(this.getVersion()), true);
// ... in Spark streaming
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.REDSHIFT_LOOKUP_INPUT_COMPONENT), redshiftStreamingNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.REDSHIFT_OUTPUT_COMPONENT), redshiftStreamingNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.REDSHIFT_CONFIGURATION_COMPONENT), redshiftStreamingConfigurationModuleGroups);
// Snowflake nodes ...
Set<DistributionModuleGroup> snowFlakeBatchNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.SNOWFLAKE.get(this.getVersion()), true);
Set<DistributionModuleGroup> snowFlakeBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.SNOWFLAKE.get(this.getVersion()), true);
// ... in Spark batch
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SNOWFLAKE_INPUT_COMPONENT), snowFlakeBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SNOWFLAKE_OUTPUT_COMPONENT), snowFlakeBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SNOWFLAKE_CONFIGURATION_COMPONENT), snowFlakeBatchConfigurationModuleGroups);
// Spark S3 condition
ComponentCondition s3StorageCondition = new SparkBatchLinkedNodeCondition(distribution, version, SparkBatchConstant.SPARK_BATCH_S3_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
// Spark Batch S3 nodes
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.S3_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) s3StorageCondition, ModuleGroupName.S3.get(this.getVersion()), true));
// Spark Streaming S3 nodes
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.S3_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) s3StorageCondition, ModuleGroupName.S3.get(this.getVersion()), true));
// WebHDFS
HDFSLinkedNodeCondition hdfsLinkedNodeCondition = new HDFSLinkedNodeCondition(distribution, version);
Set<DistributionModuleGroup> webHDFSNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, hdfsLinkedNodeCondition.getWebHDFSCondition(), ModuleGroupName.WEBHDFS.get(this.getVersion()), true);
for (String hdfsComponent : HDFSConstant.HDFS_COMPONENTS) {
result.put(new NodeComponentTypeBean(ComponentType.HDFS, hdfsComponent), webHDFSNodeModuleGroups);
}
// Sqoop
for (String sqoopComponent : SqoopConstant.SQOOP_COMPONENTS) {
result.put(new NodeComponentTypeBean(ComponentType.SQOOP, sqoopComponent), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.SQOOP.get(this.getVersion()), true));
}
// Spark Batch tSQLRow nodes
ComponentCondition hiveContextCondition = new SimpleComponentCondition(new BasicExpression("SQL_CONTEXT", EqualityOperator.EQ, "HiveContext"));
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SPARK_SQL_ROW_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, hiveContextCondition, ModuleGroupName.HIVE.get(this.getVersion()), true));
// Spark Streaming tSQLRow nodes
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.SPARK_SQL_ROW_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, hiveContextCondition, ModuleGroupName.HIVE.get(this.getVersion()), true));
// delta components in Spark batch
Set<DistributionModuleGroup> deltaBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.DELTALAKE.get(this.getVersion()), true);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DELTALAKE_INPUT_COMPONENT), deltaBatchConfigurationModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DELTALAKE_OUTPUT_COMPONENT), deltaBatchConfigurationModuleGroups);
return result;
}
use of org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition in project tbd-studio-se by Talend.
the class EMR5290GraphFramesNodeModuleGroup method getModuleGroups.
public static Set<DistributionModuleGroup> getModuleGroups(String distribution, String version) {
Set<DistributionModuleGroup> hs = new HashSet<>();
DistributionModuleGroup dmg = new DistributionModuleGroup(EMR5290Constant.GRAPHFRAMES_MODULE_GROUP.getModuleName(), true, new SparkBatchLinkedNodeCondition(distribution, version).getCondition());
hs.add(dmg);
return hs;
}
use of org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition in project tbd-studio-se by Talend.
the class EMR5290SparkBatchParquetNodeModuleGroup method getModuleGroups.
public static Set<DistributionModuleGroup> getModuleGroups(String distribution, String version) {
Set<DistributionModuleGroup> hs = new HashSet<>();
DistributionModuleGroup dmg = new DistributionModuleGroup(EMR5290Constant.SPARK_PARQUET_MRREQUIRED_MODULE_GROUP.getModuleName(), true, new SparkBatchLinkedNodeCondition(distribution, version).getCondition());
hs.add(dmg);
return hs;
}
use of org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition in project tbd-studio-se by Talend.
the class EMR5290SparkBatchS3NodeModuleGroup method getModuleGroups.
public static Set<DistributionModuleGroup> getModuleGroups(String distribution, String version) {
Set<DistributionModuleGroup> hs = new HashSet<>();
DistributionModuleGroup dmg = new DistributionModuleGroup(EMR5290Constant.S3_MODULE_GROUP.getModuleName(), true, new SparkBatchLinkedNodeCondition(distribution, version, SparkBatchConstant.SPARK_BATCH_S3_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition());
hs.add(dmg);
return hs;
}
Aggregations