Search in sources :

Example 1 with SparkBatchLinkedNodeCondition

use of org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition in project tbd-studio-se by Talend.

the class DynamicCDPGraphFramesNodeModuleGroup method getModuleGroups.

@Override
public Set<DistributionModuleGroup> getModuleGroups(String distribution, String version) throws Exception {
    Set<DistributionModuleGroup> hs = new HashSet<>();
    DynamicPluginAdapter pluginAdapter = getPluginAdapter();
    String graphFramesMrRequiredRuntimeId = pluginAdapter.getRuntimeModuleGroupIdByTemplateId(DynamicModuleGroupConstant.GRAPHFRAMES_MRREQUIRED_MODULE_GROUP.getModuleName());
    checkRuntimeId(graphFramesMrRequiredRuntimeId);
    if (StringUtils.isNotBlank(graphFramesMrRequiredRuntimeId)) {
        DistributionModuleGroup dmg = new DistributionModuleGroup(graphFramesMrRequiredRuntimeId, true, new SparkBatchLinkedNodeCondition(distribution, version, SparkBatchConstant.SPARK_BATCH_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition());
        hs.add(dmg);
    }
    return hs;
}
Also used : DynamicPluginAdapter(org.talend.hadoop.distribution.dynamic.adapter.DynamicPluginAdapter) SparkBatchLinkedNodeCondition(org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition) DistributionModuleGroup(org.talend.hadoop.distribution.DistributionModuleGroup) HashSet(java.util.HashSet)

Example 2 with SparkBatchLinkedNodeCondition

use of org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition in project tbd-studio-se by Talend.

the class AbstractDistribution method buildNodeModuleGroups.

protected Map<NodeComponentTypeBean, Set<DistributionModuleGroup>> buildNodeModuleGroups(String distribution, String version) {
    Map<NodeComponentTypeBean, Set<DistributionModuleGroup>> result = new HashMap<>();
    // Azure
    ComponentCondition azureCondition = new SparkStreamingLinkedNodeCondition(distribution, version, SparkBatchConstant.SPARK_BATCH_AZURE_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.AZURE_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, azureCondition, ModuleGroupName.AZURE.get(this.getVersion()), true));
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.AZURE_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, azureCondition, ModuleGroupName.AZURE.get(this.getVersion()), true));
    // Spark Batch BigQuery
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.BIGQUERY_CONFIG_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.BIGQUERY.get(this.getVersion()), true));
    // DynamoDB nodes ...
    Set<DistributionModuleGroup> dynamoDBBatchNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.DYNAMODB_BATCH.get(this.getVersion()), true);
    Set<DistributionModuleGroup> dynamoDBBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.DYNAMODB_BATCH.get(this.getVersion()), true);
    // ... in Spark batch
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DYNAMODB_INPUT_COMPONENT), dynamoDBBatchNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DYNAMODB_OUTPUT_COMPONENT), dynamoDBBatchNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DYNAMODB_CONFIGURATION_COMPONENT), dynamoDBBatchConfigurationModuleGroups);
    Set<DistributionModuleGroup> dynamoDBStreamingNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.DYNAMODB_STREAMING.get(this.getVersion()), true);
    Set<DistributionModuleGroup> dynamoDBStreamingConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.DYNAMODB_STREAMING.get(this.getVersion()), true);
    // ... in Spark streaming
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.DYNAMODB_INPUT_COMPONENT), dynamoDBStreamingNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.DYNAMODB_OUTPUT_COMPONENT), dynamoDBStreamingNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.DYNAMODB_CONFIGURATION_COMPONENT), dynamoDBStreamingConfigurationModuleGroups);
    // Spark Streaming Flume nodes
    ComponentCondition flumeCondition = new SparkStreamingLinkedNodeCondition(distribution, version, SparkStreamingConstant.FLUME_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.FLUME_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, flumeCondition, ModuleGroupName.FLUME.get(this.getVersion()), true));
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.FLUME_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, flumeCondition, ModuleGroupName.FLUME.get(this.getVersion()), true));
    // Spark Batch  GCS
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.GCS_CONFIG_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.GCS.get(this.getVersion()), true));
    // GraphFrames - Spark Batch DQ matching
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.MATCH_PREDICT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.GRAPHFRAMES.get(this.getVersion()), true));
    // Spark Streaming Kafka nodes
    ComponentCondition kafkaCondition = new SparkStreamingLinkedNodeCondition(distribution, version, SparkStreamingConstant.KAFKA_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KAFKA_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, kafkaCondition, ModuleGroupName.KAFKA.get(this.getVersion()), true));
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KAFKA_AVRO_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, kafkaCondition, ModuleGroupName.KAFKA.get(this.getVersion()), true));
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KAFKA_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, kafkaCondition, ModuleGroupName.KAFKA.get(this.getVersion()), true));
    // Spark Streaming Kinesis nodes
    Set<DistributionModuleGroup> kinesisModuleGroups = ModuleGroupsUtils.getStreamingModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.KINESIS.get(this.getVersion()), true);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KINESIS_INPUT_COMPONENT), kinesisModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KINESIS_INPUT_AVRO_COMPONENT), kinesisModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KINESIS_OUTPUT_COMPONENT), kinesisModuleGroups);
    // PubSub nodes...
    Set<DistributionModuleGroup> pubSubNodeModuleGroups = ModuleGroupsUtils.getStreamingModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PUBSUB.get(this.getVersion()), true);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PUBSUB_INPUT_COMPONENT), pubSubNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PUBSUB_INPUT_AVRO_COMPONENT), pubSubNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PUBSUB_OUTPUT_COMPONENT), pubSubNodeModuleGroups);
    // Spark Batch Parquet nodes
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.PARQUET_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.PARQUET_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
    // Spark Streaming Parquet nodes
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PARQUET_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PARQUET_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PARQUET_STREAM_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
    // Redshift nodes ...
    Set<DistributionModuleGroup> redshiftBatchNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.REDSHIFT_BATCH.get(this.getVersion()), true);
    Set<DistributionModuleGroup> redshiftBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.REDSHIFT_BATCH.get(this.getVersion()), true);
    // ... in Spark batch
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.REDSHIFT_INPUT_COMPONENT), redshiftBatchNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.REDSHIFT_OUTPUT_COMPONENT), redshiftBatchNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.REDSHIFT_CONFIGURATION_COMPONENT), redshiftBatchConfigurationModuleGroups);
    Set<DistributionModuleGroup> redshiftStreamingNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.REDSHIFT_STREAMING.get(this.getVersion()), true);
    Set<DistributionModuleGroup> redshiftStreamingConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.REDSHIFT_STREAMING.get(this.getVersion()), true);
    // ... in Spark streaming
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.REDSHIFT_LOOKUP_INPUT_COMPONENT), redshiftStreamingNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.REDSHIFT_OUTPUT_COMPONENT), redshiftStreamingNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.REDSHIFT_CONFIGURATION_COMPONENT), redshiftStreamingConfigurationModuleGroups);
    // Snowflake nodes ...
    Set<DistributionModuleGroup> snowFlakeBatchNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.SNOWFLAKE.get(this.getVersion()), true);
    Set<DistributionModuleGroup> snowFlakeBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.SNOWFLAKE.get(this.getVersion()), true);
    // ... in Spark batch
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SNOWFLAKE_INPUT_COMPONENT), snowFlakeBatchNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SNOWFLAKE_OUTPUT_COMPONENT), snowFlakeBatchNodeModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SNOWFLAKE_CONFIGURATION_COMPONENT), snowFlakeBatchConfigurationModuleGroups);
    // Spark S3 condition
    ComponentCondition s3StorageCondition = new SparkBatchLinkedNodeCondition(distribution, version, SparkBatchConstant.SPARK_BATCH_S3_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
    // Spark Batch S3 nodes
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.S3_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) s3StorageCondition, ModuleGroupName.S3.get(this.getVersion()), true));
    // Spark Streaming S3 nodes
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.S3_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) s3StorageCondition, ModuleGroupName.S3.get(this.getVersion()), true));
    // WebHDFS
    HDFSLinkedNodeCondition hdfsLinkedNodeCondition = new HDFSLinkedNodeCondition(distribution, version);
    Set<DistributionModuleGroup> webHDFSNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, hdfsLinkedNodeCondition.getWebHDFSCondition(), ModuleGroupName.WEBHDFS.get(this.getVersion()), true);
    for (String hdfsComponent : HDFSConstant.HDFS_COMPONENTS) {
        result.put(new NodeComponentTypeBean(ComponentType.HDFS, hdfsComponent), webHDFSNodeModuleGroups);
    }
    // Sqoop
    for (String sqoopComponent : SqoopConstant.SQOOP_COMPONENTS) {
        result.put(new NodeComponentTypeBean(ComponentType.SQOOP, sqoopComponent), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.SQOOP.get(this.getVersion()), true));
    }
    // Spark Batch tSQLRow nodes
    ComponentCondition hiveContextCondition = new SimpleComponentCondition(new BasicExpression("SQL_CONTEXT", EqualityOperator.EQ, "HiveContext"));
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SPARK_SQL_ROW_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, hiveContextCondition, ModuleGroupName.HIVE.get(this.getVersion()), true));
    // Spark Streaming tSQLRow nodes
    result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.SPARK_SQL_ROW_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, hiveContextCondition, ModuleGroupName.HIVE.get(this.getVersion()), true));
    // delta components in Spark batch
    Set<DistributionModuleGroup> deltaBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.DELTALAKE.get(this.getVersion()), true);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DELTALAKE_INPUT_COMPONENT), deltaBatchConfigurationModuleGroups);
    result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DELTALAKE_OUTPUT_COMPONENT), deltaBatchConfigurationModuleGroups);
    return result;
}
Also used : HDFSLinkedNodeCondition(org.talend.hadoop.distribution.condition.common.HDFSLinkedNodeCondition) HashSet(java.util.HashSet) Set(java.util.Set) SparkStreamingLinkedNodeCondition(org.talend.hadoop.distribution.condition.common.SparkStreamingLinkedNodeCondition) HashMap(java.util.HashMap) SimpleComponentCondition(org.talend.hadoop.distribution.condition.SimpleComponentCondition) SparkBatchLinkedNodeCondition(org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition) BasicExpression(org.talend.hadoop.distribution.condition.BasicExpression) MultiComponentCondition(org.talend.hadoop.distribution.condition.MultiComponentCondition) SimpleComponentCondition(org.talend.hadoop.distribution.condition.SimpleComponentCondition) ComponentCondition(org.talend.hadoop.distribution.condition.ComponentCondition)

Example 3 with SparkBatchLinkedNodeCondition

use of org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition in project tbd-studio-se by Talend.

the class EMR5290GraphFramesNodeModuleGroup method getModuleGroups.

public static Set<DistributionModuleGroup> getModuleGroups(String distribution, String version) {
    Set<DistributionModuleGroup> hs = new HashSet<>();
    DistributionModuleGroup dmg = new DistributionModuleGroup(EMR5290Constant.GRAPHFRAMES_MODULE_GROUP.getModuleName(), true, new SparkBatchLinkedNodeCondition(distribution, version).getCondition());
    hs.add(dmg);
    return hs;
}
Also used : SparkBatchLinkedNodeCondition(org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition) DistributionModuleGroup(org.talend.hadoop.distribution.DistributionModuleGroup) HashSet(java.util.HashSet)

Example 4 with SparkBatchLinkedNodeCondition

use of org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition in project tbd-studio-se by Talend.

the class EMR5290SparkBatchParquetNodeModuleGroup method getModuleGroups.

public static Set<DistributionModuleGroup> getModuleGroups(String distribution, String version) {
    Set<DistributionModuleGroup> hs = new HashSet<>();
    DistributionModuleGroup dmg = new DistributionModuleGroup(EMR5290Constant.SPARK_PARQUET_MRREQUIRED_MODULE_GROUP.getModuleName(), true, new SparkBatchLinkedNodeCondition(distribution, version).getCondition());
    hs.add(dmg);
    return hs;
}
Also used : SparkBatchLinkedNodeCondition(org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition) DistributionModuleGroup(org.talend.hadoop.distribution.DistributionModuleGroup) HashSet(java.util.HashSet)

Example 5 with SparkBatchLinkedNodeCondition

use of org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition in project tbd-studio-se by Talend.

the class EMR5290SparkBatchS3NodeModuleGroup method getModuleGroups.

public static Set<DistributionModuleGroup> getModuleGroups(String distribution, String version) {
    Set<DistributionModuleGroup> hs = new HashSet<>();
    DistributionModuleGroup dmg = new DistributionModuleGroup(EMR5290Constant.S3_MODULE_GROUP.getModuleName(), true, new SparkBatchLinkedNodeCondition(distribution, version, SparkBatchConstant.SPARK_BATCH_S3_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition());
    hs.add(dmg);
    return hs;
}
Also used : SparkBatchLinkedNodeCondition(org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition) DistributionModuleGroup(org.talend.hadoop.distribution.DistributionModuleGroup) HashSet(java.util.HashSet)

Aggregations

SparkBatchLinkedNodeCondition (org.talend.hadoop.distribution.condition.common.SparkBatchLinkedNodeCondition)15 DistributionModuleGroup (org.talend.hadoop.distribution.DistributionModuleGroup)14 HashSet (java.util.HashSet)13 DynamicPluginAdapter (org.talend.hadoop.distribution.dynamic.adapter.DynamicPluginAdapter)5 MultiComponentCondition (org.talend.hadoop.distribution.condition.MultiComponentCondition)3 Test (org.junit.Test)2 BasicExpression (org.talend.hadoop.distribution.condition.BasicExpression)2 ComponentCondition (org.talend.hadoop.distribution.condition.ComponentCondition)2 SimpleComponentCondition (org.talend.hadoop.distribution.condition.SimpleComponentCondition)2 HashMap (java.util.HashMap)1 Set (java.util.Set)1 HDFSLinkedNodeCondition (org.talend.hadoop.distribution.condition.common.HDFSLinkedNodeCondition)1 SparkStreamingLinkedNodeCondition (org.talend.hadoop.distribution.condition.common.SparkStreamingLinkedNodeCondition)1