use of org.talend.hadoop.distribution.condition.ComponentCondition in project tbd-studio-se by Talend.
the class ComponentConditionUtil method buildDistributionShowIf.
/**
* This method creates a condition made of other conditions, contained in the given Set of conditions.
*
* @param conditions - the Set of conditions to merge in a single condition.
* @return a new ComponentCondition made of the other conditions. Returns null in case of at least one condition is
* null.
*/
public static ComponentCondition buildDistributionShowIf(Set<ComponentCondition> conditions) {
if (conditions != null) {
Iterator<ComponentCondition> iter = conditions.iterator();
ComponentCondition previous = null;
while (iter.hasNext()) {
ComponentCondition cc = iter.next();
if (cc == null) {
return null;
}
ComponentCondition wrappedCondition = new NestedComponentCondition(cc);
if (previous != null) {
wrappedCondition = new MultiComponentCondition(previous, BooleanOperator.OR, wrappedCondition);
}
previous = wrappedCondition;
}
if (previous != null) {
return new NestedComponentCondition(previous);
}
}
return null;
}
use of org.talend.hadoop.distribution.condition.ComponentCondition in project tbd-studio-se by Talend.
the class DynamicHiveModuleGroup method getModuleGroups.
@Override
public Set<DistributionModuleGroup> getModuleGroups() throws Exception {
Set<DistributionModuleGroup> hs = new HashSet<>();
DynamicPluginAdapter pluginAdapter = getPluginAdapter();
String hiveRuntimeId = pluginAdapter.getRuntimeModuleGroupIdByTemplateId(DynamicModuleGroupConstant.HIVE_MODULE_GROUP.getModuleName());
String hdfsRuntimeId = pluginAdapter.getRuntimeModuleGroupIdByTemplateId(DynamicModuleGroupConstant.HDFS_MODULE_GROUP.getModuleName());
String mrRuntimeId = pluginAdapter.getRuntimeModuleGroupIdByTemplateId(DynamicModuleGroupConstant.MAPREDUCE_MODULE_GROUP.getModuleName());
String hiveHBaseRuntimeId = pluginAdapter.getRuntimeModuleGroupIdByTemplateId(DynamicModuleGroupConstant.HIVE_HBASE_MODULE_GROUP.getModuleName());
checkRuntimeId(hiveRuntimeId);
checkRuntimeId(hdfsRuntimeId);
checkRuntimeId(mrRuntimeId);
checkRuntimeId(hiveHBaseRuntimeId);
if (StringUtils.isNotBlank(hiveRuntimeId)) {
hs.add(new DistributionModuleGroup(hiveRuntimeId));
}
if (StringUtils.isNotBlank(hdfsRuntimeId)) {
hs.add(new DistributionModuleGroup(hdfsRuntimeId));
}
if (StringUtils.isNotBlank(mrRuntimeId)) {
hs.add(new DistributionModuleGroup(mrRuntimeId));
}
if (StringUtils.isNotBlank(hiveHBaseRuntimeId)) {
// The following condition instance stands for:
// (isShow[STORE_BY_HBASE] AND STORE_BY_HBASE=='true')
ComponentCondition hbaseLoaderCondition = new MultiComponentCondition(//
new SimpleComponentCondition(new BasicExpression(HiveConstant.HIVE_CONFIGURATION_COMPONENT_HBASEPARAMETER)), //
BooleanOperator.AND, new SimpleComponentCondition(new ShowExpression(HiveConstant.HIVE_CONFIGURATION_COMPONENT_HBASEPARAMETER)));
// The Hive components need to import some hbase libraries if the "Use HBase storage" is checked.
hs.add(new DistributionModuleGroup(hiveHBaseRuntimeId, false, hbaseLoaderCondition));
}
return hs;
}
use of org.talend.hadoop.distribution.condition.ComponentCondition in project tbd-studio-se by Talend.
the class AbstractDistribution method buildNodeModuleGroups.
protected Map<NodeComponentTypeBean, Set<DistributionModuleGroup>> buildNodeModuleGroups(String distribution, String version) {
Map<NodeComponentTypeBean, Set<DistributionModuleGroup>> result = new HashMap<>();
// Azure
ComponentCondition azureCondition = new SparkStreamingLinkedNodeCondition(distribution, version, SparkBatchConstant.SPARK_BATCH_AZURE_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.AZURE_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, azureCondition, ModuleGroupName.AZURE.get(this.getVersion()), true));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.AZURE_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, azureCondition, ModuleGroupName.AZURE.get(this.getVersion()), true));
// Spark Batch BigQuery
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.BIGQUERY_CONFIG_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.BIGQUERY.get(this.getVersion()), true));
// DynamoDB nodes ...
Set<DistributionModuleGroup> dynamoDBBatchNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.DYNAMODB_BATCH.get(this.getVersion()), true);
Set<DistributionModuleGroup> dynamoDBBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.DYNAMODB_BATCH.get(this.getVersion()), true);
// ... in Spark batch
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DYNAMODB_INPUT_COMPONENT), dynamoDBBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DYNAMODB_OUTPUT_COMPONENT), dynamoDBBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DYNAMODB_CONFIGURATION_COMPONENT), dynamoDBBatchConfigurationModuleGroups);
Set<DistributionModuleGroup> dynamoDBStreamingNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.DYNAMODB_STREAMING.get(this.getVersion()), true);
Set<DistributionModuleGroup> dynamoDBStreamingConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.DYNAMODB_STREAMING.get(this.getVersion()), true);
// ... in Spark streaming
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.DYNAMODB_INPUT_COMPONENT), dynamoDBStreamingNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.DYNAMODB_OUTPUT_COMPONENT), dynamoDBStreamingNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.DYNAMODB_CONFIGURATION_COMPONENT), dynamoDBStreamingConfigurationModuleGroups);
// Spark Streaming Flume nodes
ComponentCondition flumeCondition = new SparkStreamingLinkedNodeCondition(distribution, version, SparkStreamingConstant.FLUME_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.FLUME_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, flumeCondition, ModuleGroupName.FLUME.get(this.getVersion()), true));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.FLUME_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, flumeCondition, ModuleGroupName.FLUME.get(this.getVersion()), true));
// Spark Batch GCS
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.GCS_CONFIG_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.GCS.get(this.getVersion()), true));
// GraphFrames - Spark Batch DQ matching
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.MATCH_PREDICT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.GRAPHFRAMES.get(this.getVersion()), true));
// Spark Streaming Kafka nodes
ComponentCondition kafkaCondition = new SparkStreamingLinkedNodeCondition(distribution, version, SparkStreamingConstant.KAFKA_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KAFKA_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, kafkaCondition, ModuleGroupName.KAFKA.get(this.getVersion()), true));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KAFKA_AVRO_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, kafkaCondition, ModuleGroupName.KAFKA.get(this.getVersion()), true));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KAFKA_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, kafkaCondition, ModuleGroupName.KAFKA.get(this.getVersion()), true));
// Spark Streaming Kinesis nodes
Set<DistributionModuleGroup> kinesisModuleGroups = ModuleGroupsUtils.getStreamingModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.KINESIS.get(this.getVersion()), true);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KINESIS_INPUT_COMPONENT), kinesisModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KINESIS_INPUT_AVRO_COMPONENT), kinesisModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.KINESIS_OUTPUT_COMPONENT), kinesisModuleGroups);
// PubSub nodes...
Set<DistributionModuleGroup> pubSubNodeModuleGroups = ModuleGroupsUtils.getStreamingModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PUBSUB.get(this.getVersion()), true);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PUBSUB_INPUT_COMPONENT), pubSubNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PUBSUB_INPUT_AVRO_COMPONENT), pubSubNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PUBSUB_OUTPUT_COMPONENT), pubSubNodeModuleGroups);
// Spark Batch Parquet nodes
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.PARQUET_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.PARQUET_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
// Spark Streaming Parquet nodes
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PARQUET_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PARQUET_OUTPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.PARQUET_STREAM_INPUT_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.PARQUET.get(this.getVersion()), !"SPARK".equals(this.getDistribution())));
// Redshift nodes ...
Set<DistributionModuleGroup> redshiftBatchNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.REDSHIFT_BATCH.get(this.getVersion()), true);
Set<DistributionModuleGroup> redshiftBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.REDSHIFT_BATCH.get(this.getVersion()), true);
// ... in Spark batch
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.REDSHIFT_INPUT_COMPONENT), redshiftBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.REDSHIFT_OUTPUT_COMPONENT), redshiftBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.REDSHIFT_CONFIGURATION_COMPONENT), redshiftBatchConfigurationModuleGroups);
Set<DistributionModuleGroup> redshiftStreamingNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.REDSHIFT_STREAMING.get(this.getVersion()), true);
Set<DistributionModuleGroup> redshiftStreamingConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.REDSHIFT_STREAMING.get(this.getVersion()), true);
// ... in Spark streaming
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.REDSHIFT_LOOKUP_INPUT_COMPONENT), redshiftStreamingNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.REDSHIFT_OUTPUT_COMPONENT), redshiftStreamingNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.REDSHIFT_CONFIGURATION_COMPONENT), redshiftStreamingConfigurationModuleGroups);
// Snowflake nodes ...
Set<DistributionModuleGroup> snowFlakeBatchNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, "USE_EXISTING_CONNECTION == 'false'", ModuleGroupName.SNOWFLAKE.get(this.getVersion()), true);
Set<DistributionModuleGroup> snowFlakeBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.SNOWFLAKE.get(this.getVersion()), true);
// ... in Spark batch
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SNOWFLAKE_INPUT_COMPONENT), snowFlakeBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SNOWFLAKE_OUTPUT_COMPONENT), snowFlakeBatchNodeModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SNOWFLAKE_CONFIGURATION_COMPONENT), snowFlakeBatchConfigurationModuleGroups);
// Spark S3 condition
ComponentCondition s3StorageCondition = new SparkBatchLinkedNodeCondition(distribution, version, SparkBatchConstant.SPARK_BATCH_S3_SPARKCONFIGURATION_LINKEDPARAMETER).getCondition();
// Spark Batch S3 nodes
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.S3_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) s3StorageCondition, ModuleGroupName.S3.get(this.getVersion()), true));
// Spark Streaming S3 nodes
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.S3_CONFIGURATION_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) s3StorageCondition, ModuleGroupName.S3.get(this.getVersion()), true));
// WebHDFS
HDFSLinkedNodeCondition hdfsLinkedNodeCondition = new HDFSLinkedNodeCondition(distribution, version);
Set<DistributionModuleGroup> webHDFSNodeModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, hdfsLinkedNodeCondition.getWebHDFSCondition(), ModuleGroupName.WEBHDFS.get(this.getVersion()), true);
for (String hdfsComponent : HDFSConstant.HDFS_COMPONENTS) {
result.put(new NodeComponentTypeBean(ComponentType.HDFS, hdfsComponent), webHDFSNodeModuleGroups);
}
// Sqoop
for (String sqoopComponent : SqoopConstant.SQOOP_COMPONENTS) {
result.put(new NodeComponentTypeBean(ComponentType.SQOOP, sqoopComponent), ModuleGroupsUtils.getModuleGroups(distribution, version, (ComponentCondition) null, ModuleGroupName.SQOOP.get(this.getVersion()), true));
}
// Spark Batch tSQLRow nodes
ComponentCondition hiveContextCondition = new SimpleComponentCondition(new BasicExpression("SQL_CONTEXT", EqualityOperator.EQ, "HiveContext"));
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.SPARK_SQL_ROW_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, hiveContextCondition, ModuleGroupName.HIVE.get(this.getVersion()), true));
// Spark Streaming tSQLRow nodes
result.put(new NodeComponentTypeBean(ComponentType.SPARKSTREAMING, SparkStreamingConstant.SPARK_SQL_ROW_COMPONENT), ModuleGroupsUtils.getModuleGroups(distribution, version, hiveContextCondition, ModuleGroupName.HIVE.get(this.getVersion()), true));
// delta components in Spark batch
Set<DistributionModuleGroup> deltaBatchConfigurationModuleGroups = ModuleGroupsUtils.getModuleGroups(distribution, version, (String) null, ModuleGroupName.DELTALAKE.get(this.getVersion()), true);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DELTALAKE_INPUT_COMPONENT), deltaBatchConfigurationModuleGroups);
result.put(new NodeComponentTypeBean(ComponentType.SPARKBATCH, SparkBatchConstant.DELTALAKE_OUTPUT_COMPONENT), deltaBatchConfigurationModuleGroups);
return result;
}
use of org.talend.hadoop.distribution.condition.ComponentCondition in project tbd-studio-se by Talend.
the class ComponentConditionTest method testGetConditionString.
@Test
public void testGetConditionString() throws Exception {
ComponentCondition dc1 = new SimpleComponentCondition(new BasicExpression(PARAM_1, VALUE_1, EqualityOperator.EQ));
assertEquals(dc1.getConditionString(), LEFT_PAR + PARAM_1 + EQ + SINGLE_QUOTE + VALUE_1 + SINGLE_QUOTE + RIGHT_PAR);
// $NON-NLS-1$ //$NON-NLS-2$
Expression e1 = new BasicExpression("A", "aaa", EqualityOperator.EQ);
// $NON-NLS-1$ //$NON-NLS-2$
Expression e2 = new BasicExpression("Z", "ccc", EqualityOperator.EQ);
// $NON-NLS-1$ //$NON-NLS-2$
Expression e3 = new BasicExpression("A", "bbb", EqualityOperator.EQ);
// $NON-NLS-1$ //$NON-NLS-2$
Expression e4 = new BasicExpression("B", "ccc", EqualityOperator.NOT_EQ);
dc1 = new NestedComponentCondition(new MultiComponentCondition(new SimpleComponentCondition(e1), new NestedComponentCondition(new MultiComponentCondition(new SimpleComponentCondition(e4), new NestedComponentCondition(new MultiComponentCondition(new SimpleComponentCondition(e2), new SimpleComponentCondition(e3), BooleanOperator.AND)), BooleanOperator.OR)), BooleanOperator.AND));
assertEquals(dc1.getConditionString(), result1);
dc1 = new MultiComponentCondition(new SimpleComponentCondition(e1), new MultiComponentCondition(new SimpleComponentCondition(e2), new NestedComponentCondition(new MultiComponentCondition(new SimpleComponentCondition(e4), new SimpleComponentCondition(e3), BooleanOperator.OR)), BooleanOperator.AND), BooleanOperator.AND);
assertEquals(dc1.getConditionString(), result2);
}
use of org.talend.hadoop.distribution.condition.ComponentCondition in project tbd-studio-se by Talend.
the class EMR5290HiveModuleGroup method getModuleGroups.
public static Set<DistributionModuleGroup> getModuleGroups() {
ComponentCondition hbaseLoaderCondition = new MultiComponentCondition(new SimpleComponentCondition(new BasicExpression(//
HiveConstant.HIVE_CONFIGURATION_COMPONENT_HBASEPARAMETER)), //
BooleanOperator.AND, new SimpleComponentCondition(new ShowExpression(HiveConstant.HIVE_CONFIGURATION_COMPONENT_HBASEPARAMETER)));
Set<DistributionModuleGroup> hs = new HashSet<>();
hs.add(new DistributionModuleGroup(EMR5290Constant.HIVE_MODULE_GROUP.getModuleName()));
hs.add(new DistributionModuleGroup(EMR5290Constant.HDFS_MODULE_GROUP.getModuleName()));
hs.add(new DistributionModuleGroup(EMR5290Constant.MAPREDUCE_MODULE_GROUP.getModuleName()));
// The Hive components need to import some hbase libraries if the "Use HBase storage" is checked.
hs.add(new DistributionModuleGroup(EMR5290Constant.HIVE_HBASE_MODULE_GROUP.getModuleName(), false, hbaseLoaderCondition));
return hs;
}
Aggregations