Search in sources :

Example 1 with JobFlowInstancesConfig

use of com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig in project herd by FINRAOS.

the class EmrDaoTest method createEmrClusterAssertCallRunEmrJobFlowWithInstanceFleetAndMultipleSubnets.

@Test
public void createEmrClusterAssertCallRunEmrJobFlowWithInstanceFleetAndMultipleSubnets() throws Exception {
    // Create objects required for testing.
    final String clusterName = "clusterName";
    final String clusterId = "clusterId";
    final String name = STRING_VALUE;
    final String instanceFleetType = STRING_VALUE_2;
    final Integer targetOnDemandCapacity = INTEGER_VALUE;
    final Integer targetSpotCapacity = INTEGER_VALUE_2;
    final List<EmrClusterDefinitionInstanceTypeConfig> emrClusterDefinitionInstanceTypeConfigs = null;
    final EmrClusterDefinitionLaunchSpecifications emrClusterDefinitionLaunchSpecifications = null;
    final EmrClusterDefinitionInstanceFleet emrClusterDefinitionInstanceFleet = new EmrClusterDefinitionInstanceFleet(name, instanceFleetType, targetOnDemandCapacity, targetSpotCapacity, emrClusterDefinitionInstanceTypeConfigs, emrClusterDefinitionLaunchSpecifications);
    // Create an EMR cluster definition with instance fleet configuration and multiple EC2 subnet IDs.
    EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition();
    emrClusterDefinition.setInstanceFleets(Arrays.asList(emrClusterDefinitionInstanceFleet));
    emrClusterDefinition.setSubnetId(String.format("%s , %s  ", EC2_SUBNET, EC2_SUBNET_2));
    emrClusterDefinition.setNodeTags(Arrays.asList(new NodeTag("tagName", "tagValue")));
    when(mockEmrOperations.runEmrJobFlow(any(), any())).then(new Answer<String>() {

        @Override
        public String answer(InvocationOnMock invocation) throws Throwable {
            // Assert that the given EMR cluster definition produced the correct RunJobFlowRequest.
            RunJobFlowRequest runJobFlowRequest = invocation.getArgument(1);
            JobFlowInstancesConfig jobFlowInstancesConfig = runJobFlowRequest.getInstances();
            assertEquals(0, CollectionUtils.size(jobFlowInstancesConfig.getInstanceGroups()));
            final List<InstanceTypeConfig> expectedInstanceTypeConfigs = null;
            assertEquals(Arrays.asList(new InstanceFleetConfig().withName(name).withInstanceFleetType(instanceFleetType).withTargetOnDemandCapacity(targetOnDemandCapacity).withTargetSpotCapacity(targetSpotCapacity).withInstanceTypeConfigs(expectedInstanceTypeConfigs).withLaunchSpecifications(null)), jobFlowInstancesConfig.getInstanceFleets());
            assertNull(jobFlowInstancesConfig.getEc2SubnetId());
            assertEquals(2, CollectionUtils.size(jobFlowInstancesConfig.getEc2SubnetIds()));
            assertTrue(jobFlowInstancesConfig.getEc2SubnetIds().contains(EC2_SUBNET));
            assertTrue(jobFlowInstancesConfig.getEc2SubnetIds().contains(EC2_SUBNET_2));
            assertEquals(herdStringHelper.getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME), runJobFlowRequest.getJobFlowRole());
            assertEquals(herdStringHelper.getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME), runJobFlowRequest.getServiceRole());
            List<StepConfig> stepConfigs = runJobFlowRequest.getSteps();
            assertEquals(0, stepConfigs.size());
            List<Tag> tags = runJobFlowRequest.getTags();
            assertEquals(1, tags.size());
            {
                Tag tag = tags.get(0);
                assertEquals("tagName", tag.getKey());
                assertEquals("tagValue", tag.getValue());
            }
            return clusterId;
        }
    });
    assertEquals(clusterId, emrDao.createEmrCluster(clusterName, emrClusterDefinition, new AwsParamsDto()));
}
Also used : EmrClusterDefinitionInstanceTypeConfig(org.finra.herd.model.api.xml.EmrClusterDefinitionInstanceTypeConfig) AwsParamsDto(org.finra.herd.model.dto.AwsParamsDto) RunJobFlowRequest(com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest) JobFlowInstancesConfig(com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig) InstanceFleetConfig(com.amazonaws.services.elasticmapreduce.model.InstanceFleetConfig) EmrClusterDefinition(org.finra.herd.model.api.xml.EmrClusterDefinition) InvocationOnMock(org.mockito.invocation.InvocationOnMock) NodeTag(org.finra.herd.model.api.xml.NodeTag) List(java.util.List) ArrayList(java.util.ArrayList) Tag(com.amazonaws.services.elasticmapreduce.model.Tag) NodeTag(org.finra.herd.model.api.xml.NodeTag) EmrClusterDefinitionInstanceFleet(org.finra.herd.model.api.xml.EmrClusterDefinitionInstanceFleet) EmrClusterDefinitionLaunchSpecifications(org.finra.herd.model.api.xml.EmrClusterDefinitionLaunchSpecifications) Test(org.junit.Test)

Example 2 with JobFlowInstancesConfig

use of com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig in project herd by FINRAOS.

the class EmrDaoTest method createEmrClusterAssertCallRunEmrJobFlowRequiredParamsOnly.

@Test
public void createEmrClusterAssertCallRunEmrJobFlowRequiredParamsOnly() throws Exception {
    String clusterName = "clusterName";
    EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition();
    InstanceDefinitions instanceDefinitions = new InstanceDefinitions();
    instanceDefinitions.setMasterInstances(new MasterInstanceDefinition(10, "masterInstanceType", NO_EMR_CLUSTER_DEFINITION_EBS_CONFIGURATION, NO_INSTANCE_SPOT_PRICE, NO_INSTANCE_MAX_SEARCH_PRICE, NO_INSTANCE_ON_DEMAND_THRESHOLD));
    instanceDefinitions.setCoreInstances(new InstanceDefinition(20, "coreInstanceType", NO_EMR_CLUSTER_DEFINITION_EBS_CONFIGURATION, NO_INSTANCE_SPOT_PRICE, NO_INSTANCE_MAX_SEARCH_PRICE, NO_INSTANCE_ON_DEMAND_THRESHOLD));
    emrClusterDefinition.setInstanceDefinitions(instanceDefinitions);
    emrClusterDefinition.setNodeTags(Arrays.asList(new NodeTag("tagName", "tagValue")));
    String clusterId = "clusterId";
    when(mockEmrOperations.runEmrJobFlow(any(), any())).then(new Answer<String>() {

        @Override
        public String answer(InvocationOnMock invocation) throws Throwable {
            /*
                 * Assert that the given EMR cluster definition produced the correct RunJobFlowRequest
                 */
            RunJobFlowRequest runJobFlowRequest = invocation.getArgument(1);
            JobFlowInstancesConfig jobFlowInstancesConfig = runJobFlowRequest.getInstances();
            List<InstanceGroupConfig> instanceGroupConfigs = jobFlowInstancesConfig.getInstanceGroups();
            assertEquals(2, instanceGroupConfigs.size());
            {
                InstanceGroupConfig instanceGroupConfig = instanceGroupConfigs.get(0);
                assertEquals(10, instanceGroupConfig.getInstanceCount().intValue());
                assertEquals("masterInstanceType", instanceGroupConfig.getInstanceType());
            }
            {
                InstanceGroupConfig instanceGroupConfig = instanceGroupConfigs.get(1);
                assertEquals(20, instanceGroupConfig.getInstanceCount().intValue());
                assertEquals("coreInstanceType", instanceGroupConfig.getInstanceType());
            }
            assertEquals(herdStringHelper.getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME), runJobFlowRequest.getJobFlowRole());
            assertEquals(herdStringHelper.getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME), runJobFlowRequest.getServiceRole());
            List<StepConfig> stepConfigs = runJobFlowRequest.getSteps();
            assertEquals(0, stepConfigs.size());
            List<Tag> tags = runJobFlowRequest.getTags();
            assertEquals(1, tags.size());
            {
                Tag tag = tags.get(0);
                assertEquals("tagName", tag.getKey());
                assertEquals("tagValue", tag.getValue());
            }
            return clusterId;
        }
    });
    assertEquals(clusterId, emrDao.createEmrCluster(clusterName, emrClusterDefinition, new AwsParamsDto()));
}
Also used : MasterInstanceDefinition(org.finra.herd.model.api.xml.MasterInstanceDefinition) InstanceDefinition(org.finra.herd.model.api.xml.InstanceDefinition) AwsParamsDto(org.finra.herd.model.dto.AwsParamsDto) RunJobFlowRequest(com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest) JobFlowInstancesConfig(com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig) MasterInstanceDefinition(org.finra.herd.model.api.xml.MasterInstanceDefinition) InstanceDefinitions(org.finra.herd.model.api.xml.InstanceDefinitions) EmrClusterDefinition(org.finra.herd.model.api.xml.EmrClusterDefinition) InvocationOnMock(org.mockito.invocation.InvocationOnMock) NodeTag(org.finra.herd.model.api.xml.NodeTag) List(java.util.List) ArrayList(java.util.ArrayList) Tag(com.amazonaws.services.elasticmapreduce.model.Tag) NodeTag(org.finra.herd.model.api.xml.NodeTag) InstanceGroupConfig(com.amazonaws.services.elasticmapreduce.model.InstanceGroupConfig) Test(org.junit.Test)

Example 3 with JobFlowInstancesConfig

use of com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig in project herd by FINRAOS.

the class EmrDaoImpl method getJobFlowInstancesConfig.

/**
 * Creates the job flow instance configuration containing specification of the number and type of Amazon EC2 instances.
 *
 * @param emrClusterDefinition the EMR cluster definition that contains all the EMR parameters
 *
 * @return the job flow instance configuration
 */
private JobFlowInstancesConfig getJobFlowInstancesConfig(EmrClusterDefinition emrClusterDefinition) {
    // Create a new job flow instances configuration object.
    JobFlowInstancesConfig jobFlowInstancesConfig = new JobFlowInstancesConfig();
    // Set up master/slave security group
    jobFlowInstancesConfig.setEmrManagedMasterSecurityGroup(emrClusterDefinition.getMasterSecurityGroup());
    jobFlowInstancesConfig.setEmrManagedSlaveSecurityGroup(emrClusterDefinition.getSlaveSecurityGroup());
    // Add additional security groups to master nodes.
    jobFlowInstancesConfig.setAdditionalMasterSecurityGroups(emrClusterDefinition.getAdditionalMasterSecurityGroups());
    // Add additional security groups to slave nodes.
    jobFlowInstancesConfig.setAdditionalSlaveSecurityGroups(emrClusterDefinition.getAdditionalSlaveSecurityGroups());
    // Fill-in the ssh key.
    if (StringUtils.isNotBlank(emrClusterDefinition.getSshKeyPairName())) {
        jobFlowInstancesConfig.setEc2KeyName(emrClusterDefinition.getSshKeyPairName());
    }
    // Fill in configuration for the instance groups in a cluster.
    jobFlowInstancesConfig.setInstanceGroups(getInstanceGroupConfigs(emrClusterDefinition.getInstanceDefinitions()));
    // Fill in instance fleet configuration.
    jobFlowInstancesConfig.setInstanceFleets(getInstanceFleets(emrClusterDefinition.getInstanceFleets()));
    // Fill-in subnet id.
    if (StringUtils.isNotBlank(emrClusterDefinition.getSubnetId())) {
        // Use collection of subnet IDs when instance fleet configuration is specified. Otherwise, we expect a single EC2 subnet ID to be passed here.
        if (CollectionUtils.isNotEmpty(jobFlowInstancesConfig.getInstanceFleets())) {
            jobFlowInstancesConfig.setEc2SubnetIds(herdStringHelper.splitAndTrim(emrClusterDefinition.getSubnetId(), ","));
        } else {
            jobFlowInstancesConfig.setEc2SubnetId(emrClusterDefinition.getSubnetId());
        }
    }
    // Fill in optional keep alive flag.
    if (emrClusterDefinition.isKeepAlive() != null) {
        jobFlowInstancesConfig.setKeepJobFlowAliveWhenNoSteps(emrClusterDefinition.isKeepAlive());
    }
    // Fill in optional termination protection flag.
    if (emrClusterDefinition.isTerminationProtection() != null) {
        jobFlowInstancesConfig.setTerminationProtected(emrClusterDefinition.isTerminationProtection());
    }
    // Fill in optional Hadoop version flag.
    if (StringUtils.isNotBlank(emrClusterDefinition.getHadoopVersion())) {
        jobFlowInstancesConfig.setHadoopVersion(emrClusterDefinition.getHadoopVersion());
    }
    // Return the object.
    return jobFlowInstancesConfig;
}
Also used : JobFlowInstancesConfig(com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig)

Aggregations

JobFlowInstancesConfig (com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig)3 RunJobFlowRequest (com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest)2 Tag (com.amazonaws.services.elasticmapreduce.model.Tag)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 EmrClusterDefinition (org.finra.herd.model.api.xml.EmrClusterDefinition)2 NodeTag (org.finra.herd.model.api.xml.NodeTag)2 AwsParamsDto (org.finra.herd.model.dto.AwsParamsDto)2 Test (org.junit.Test)2 InvocationOnMock (org.mockito.invocation.InvocationOnMock)2 InstanceFleetConfig (com.amazonaws.services.elasticmapreduce.model.InstanceFleetConfig)1 InstanceGroupConfig (com.amazonaws.services.elasticmapreduce.model.InstanceGroupConfig)1 EmrClusterDefinitionInstanceFleet (org.finra.herd.model.api.xml.EmrClusterDefinitionInstanceFleet)1 EmrClusterDefinitionInstanceTypeConfig (org.finra.herd.model.api.xml.EmrClusterDefinitionInstanceTypeConfig)1 EmrClusterDefinitionLaunchSpecifications (org.finra.herd.model.api.xml.EmrClusterDefinitionLaunchSpecifications)1 InstanceDefinition (org.finra.herd.model.api.xml.InstanceDefinition)1 InstanceDefinitions (org.finra.herd.model.api.xml.InstanceDefinitions)1 MasterInstanceDefinition (org.finra.herd.model.api.xml.MasterInstanceDefinition)1