Search in sources :

Example 11 with AutoscalingPolicy

use of com.google.cloud.dataproc.v1.AutoscalingPolicy in project cdap by cdapio.

the class DataprocClient method createCluster.

/**
 * Create a cluster. This will return after the initial request to create the cluster is completed.
 * At this point, the cluster is likely not yet running, but in a provisioning state.
 *
 * @param name         the name of the cluster to create
 * @param imageVersion the image version for the cluster
 * @param labels       labels to set on the cluster
 * @param privateInstance {@code true} to indicate using private instance
 * @return create operation metadata
 * @throws InterruptedException        if the thread was interrupted while waiting for the initial request to complete
 * @throws AlreadyExistsException      if the cluster already exists
 * @throws IOException                 if there was an I/O error talking to Google Compute APIs
 * @throws RetryableProvisionException if there was a non 4xx error code returned
 */
ClusterOperationMetadata createCluster(String name, String imageVersion, Map<String, String> labels, boolean privateInstance) throws RetryableProvisionException, InterruptedException, IOException {
    if (network == null) {
        // yet being used to create cluster.
        throw new IllegalArgumentException("Missing network information");
    }
    try {
        Map<String, String> metadata = new HashMap<>();
        SSHPublicKey publicKey = conf.getPublicKey();
        if (publicKey != null) {
            // Don't fail if there is no public key. It is for tooling case that the key might be generated differently.
            metadata.put("ssh-keys", publicKey.getUser() + ":" + publicKey.getKey());
            // override any os-login that may be set on the project-level metadata
            // this metadata is only needed if ssh is being used to launch the jobs - CDAP-15369
            metadata.put("enable-oslogin", "false");
        }
        // Check if ClusterMetaData is provided and add them.
        metadata.putAll(conf.getClusterMetaData());
        GceClusterConfig.Builder clusterConfig = GceClusterConfig.newBuilder().addServiceAccountScopes(DataprocConf.CLOUD_PLATFORM_SCOPE).setShieldedInstanceConfig(ShieldedInstanceConfig.newBuilder().setEnableSecureBoot(conf.isSecureBootEnabled()).setEnableVtpm(conf.isvTpmEnabled()).setEnableIntegrityMonitoring(conf.isIntegrityMonitoringEnabled()).build()).putAllMetadata(metadata);
        if (conf.getServiceAccount() != null) {
            clusterConfig.setServiceAccount(conf.getServiceAccount());
        }
        if (conf.getZone() != null) {
            clusterConfig.setZoneUri(conf.getZone());
        }
        // subnets are unique within a location, not within a network, which is why these configs are mutually exclusive.
        if (conf.getSubnet() != null) {
            clusterConfig.setSubnetworkUri(conf.getSubnet());
        } else {
            clusterConfig.setNetworkUri(network.getSelfLink());
        }
        // Add any defined Network Tags
        clusterConfig.addAllTags(conf.getNetworkTags());
        boolean internalIPOnly = isInternalIPOnly(network, privateInstance, publicKey != null);
        // if public key is not null that means ssh is used to launch / monitor job on dataproc
        if (publicKey != null) {
            int maxTags = Math.max(0, DataprocConf.MAX_NETWORK_TAGS - clusterConfig.getTagsCount());
            List<String> tags = getFirewallTargetTags(network, internalIPOnly);
            if (tags.size() > maxTags) {
                LOG.warn("No more than 64 tags can be added. Firewall tags ignored: {}", tags.subList(maxTags, tags.size()));
            }
            tags.stream().limit(maxTags).forEach(clusterConfig::addTags);
        }
        // if internal ip is preferred then create dataproc cluster without external ip for better security
        clusterConfig.setInternalIpOnly(internalIPOnly);
        Map<String, String> clusterProperties = new HashMap<>(conf.getClusterProperties());
        // Enable/Disable stackdriver
        clusterProperties.put("dataproc:dataproc.logging.stackdriver.enable", Boolean.toString(conf.isStackdriverLoggingEnabled()));
        clusterProperties.put("dataproc:dataproc.monitoring.stackdriver.enable", Boolean.toString(conf.isStackdriverMonitoringEnabled()));
        DiskConfig workerDiskConfig = DiskConfig.newBuilder().setBootDiskSizeGb(conf.getWorkerDiskGB()).setBootDiskType(conf.getWorkerDiskType()).setNumLocalSsds(0).build();
        InstanceGroupConfig.Builder primaryWorkerConfig = InstanceGroupConfig.newBuilder().setNumInstances(conf.getWorkerNumNodes()).setMachineTypeUri(conf.getWorkerMachineType()).setDiskConfig(workerDiskConfig);
        InstanceGroupConfig.Builder secondaryWorkerConfig = InstanceGroupConfig.newBuilder().setNumInstances(conf.getSecondaryWorkerNumNodes()).setMachineTypeUri(conf.getWorkerMachineType()).setPreemptibility(InstanceGroupConfig.Preemptibility.NON_PREEMPTIBLE).setDiskConfig(workerDiskConfig);
        // Set default concurrency settings for fixed cluster
        if (Strings.isNullOrEmpty(conf.getAutoScalingPolicy()) && !conf.isPredefinedAutoScaleEnabled()) {
            // Set spark.default.parallelism according to cluster size.
            // Spark defaults it to number of current executors, but when we configure the job
            // executors may not have started yet, so this value gets artificially low.
            int defaultConcurrency = Math.max(conf.getTotalWorkerCPUs(), MIN_DEFAULT_CONCURRENCY);
            // Set spark.sql.adaptive.coalescePartitions.initialPartitionNum as 32x of default parallelism,
            // but no more than 8192. This value is used only in spark 3 with adaptive execution and
            // according to our tests spark can handle really large numbers and 32x is a reasonable default.
            int initialPartitionNum = Math.min(Math.max(conf.getTotalWorkerCPUs() * PARTITION_NUM_FACTOR, MIN_INITIAL_PARTITIONS_DEFAULT), MAX_INITIAL_PARTITIONS_DEFAULT);
            clusterProperties.putIfAbsent("spark:spark.default.parallelism", Integer.toString(defaultConcurrency));
            clusterProperties.putIfAbsent("spark:spark.sql.adaptive.coalescePartitions.initialPartitionNum", Integer.toString(initialPartitionNum));
        }
        SoftwareConfig.Builder softwareConfigBuilder = SoftwareConfig.newBuilder().putAllProperties(clusterProperties);
        // Use image version only if custom Image URI is not specified, otherwise may cause image version conflicts
        if (conf.getCustomImageUri() == null || conf.getCustomImageUri().isEmpty()) {
            softwareConfigBuilder.setImageVersion(imageVersion);
        } else {
            // If custom Image URI is specified, use that for cluster creation
            primaryWorkerConfig.setImageUri(conf.getCustomImageUri());
            secondaryWorkerConfig.setImageUri(conf.getCustomImageUri());
        }
        ClusterConfig.Builder builder = ClusterConfig.newBuilder().setEndpointConfig(EndpointConfig.newBuilder().setEnableHttpPortAccess(conf.isComponentGatewayEnabled()).build()).setMasterConfig(InstanceGroupConfig.newBuilder().setNumInstances(conf.getMasterNumNodes()).setMachineTypeUri(conf.getMasterMachineType()).setDiskConfig(DiskConfig.newBuilder().setBootDiskType(conf.getMasterDiskType()).setBootDiskSizeGb(conf.getMasterDiskGB()).setNumLocalSsds(0).build()).build()).setWorkerConfig(primaryWorkerConfig.build()).setSecondaryWorkerConfig(secondaryWorkerConfig.build()).setGceClusterConfig(clusterConfig.build()).setSoftwareConfig(softwareConfigBuilder);
        // Cluster TTL if one should be set
        if (conf.getIdleTTLMinutes() > 0) {
            long seconds = TimeUnit.MINUTES.toSeconds(conf.getIdleTTLMinutes());
            builder.setLifecycleConfig(LifecycleConfig.newBuilder().setIdleDeleteTtl(Duration.newBuilder().setSeconds(seconds).build()).build());
        }
        // Add any Node Initialization action scripts
        for (String action : conf.getInitActions()) {
            builder.addInitializationActions(NodeInitializationAction.newBuilder().setExecutableFile(action).build());
        }
        // Set Auto Scaling Policy
        String autoScalingPolicy = conf.getAutoScalingPolicy();
        if (conf.isPredefinedAutoScaleEnabled()) {
            PredefinedAutoScaling predefinedAutoScaling = new PredefinedAutoScaling(conf);
            autoScalingPolicy = predefinedAutoScaling.createPredefinedAutoScalingPolicy();
        }
        if (!Strings.isNullOrEmpty(autoScalingPolicy)) {
            // Check if policy is URI or ID. If ID Convert to URI
            if (!autoScalingPolicy.contains("/")) {
                autoScalingPolicy = "projects/" + conf.getProjectId() + "/regions/" + conf.getRegion() + "/autoscalingPolicies/" + autoScalingPolicy;
            }
            builder.setAutoscalingConfig(AutoscalingConfig.newBuilder().setPolicyUri(autoScalingPolicy).build());
        }
        if (conf.getEncryptionKeyName() != null) {
            builder.setEncryptionConfig(EncryptionConfig.newBuilder().setGcePdKmsKeyName(conf.getEncryptionKeyName()).build());
        }
        if (conf.getGcsBucket() != null) {
            builder.setConfigBucket(conf.getGcsBucket());
        }
        Cluster cluster = com.google.cloud.dataproc.v1.Cluster.newBuilder().setClusterName(name).putAllLabels(labels).setConfig(builder.build()).build();
        OperationFuture<Cluster, ClusterOperationMetadata> operationFuture = client.createClusterAsync(conf.getProjectId(), conf.getRegion(), cluster);
        return operationFuture.getMetadata().get();
    } catch (ExecutionException e) {
        cleanUpClusterAfterCreationFailure(name);
        Throwable cause = e.getCause();
        if (cause instanceof ApiException) {
            throw handleApiException((ApiException) cause);
        }
        throw new DataprocRuntimeException(cause);
    }
}
Also used : ClusterOperationMetadata(com.google.cloud.dataproc.v1.ClusterOperationMetadata) HashMap(java.util.HashMap) DiskConfig(com.google.cloud.dataproc.v1.DiskConfig) SoftwareConfig(com.google.cloud.dataproc.v1.SoftwareConfig) ExecutionException(java.util.concurrent.ExecutionException) InstanceGroupConfig(com.google.cloud.dataproc.v1.InstanceGroupConfig) GceClusterConfig(com.google.cloud.dataproc.v1.GceClusterConfig) Cluster(com.google.cloud.dataproc.v1.Cluster) SSHPublicKey(io.cdap.cdap.runtime.spi.ssh.SSHPublicKey) ClusterConfig(com.google.cloud.dataproc.v1.ClusterConfig) GceClusterConfig(com.google.cloud.dataproc.v1.GceClusterConfig) ApiException(com.google.api.gax.rpc.ApiException)

Example 12 with AutoscalingPolicy

use of com.google.cloud.dataproc.v1.AutoscalingPolicy in project cdap by cdapio.

the class PredefinedAutoScalingTest method testFetchingExistingAutoScalingPolicySuccess.

@Test
public void testFetchingExistingAutoScalingPolicySuccess() throws IOException {
    PredefinedAutoScaling predefinedAutoScaling = new PredefinedAutoScaling(dataprocConf);
    // mock Return generated auto-scaling policy while fetching
    AutoscalingPolicyName autoscalingPolicyName = AutoscalingPolicyName.ofProjectLocationAutoscalingPolicyName(dataprocConf.getProjectId(), dataprocConf.getRegion(), PredefinedAutoScaling.AUTOSCALING_POLICY_ID);
    AutoscalingPolicy generatedPolicy = predefinedAutoScaling.generatePredefinedAutoScaling();
    AutoscalingPolicyServiceClient mockClient = PowerMockito.mock(AutoscalingPolicyServiceClient.class);
    Mockito.when(mockClient.getAutoscalingPolicy(autoscalingPolicyName)).thenReturn(generatedPolicy);
    PredefinedAutoScaling spy = Mockito.spy(predefinedAutoScaling);
    Mockito.doReturn(mockClient).when(spy).getAutoscalingPolicyServiceClient();
    String name = spy.createPredefinedAutoScalingPolicy();
    Mockito.verify(mockClient, Mockito.times(1)).getAutoscalingPolicy(autoscalingPolicyName);
    // verify that create call is not made
    RegionName parent = RegionName.of(dataprocConf.getProjectId(), dataprocConf.getRegion());
    Mockito.verify(mockClient, Mockito.never()).createAutoscalingPolicy(parent, generatedPolicy);
    Assert.assertEquals(name, autoscalingPolicyName.toString());
}
Also used : AutoscalingPolicy(com.google.cloud.dataproc.v1.AutoscalingPolicy) RegionName(com.google.cloud.dataproc.v1.RegionName) AutoscalingPolicyName(com.google.cloud.dataproc.v1.AutoscalingPolicyName) AutoscalingPolicyServiceClient(com.google.cloud.dataproc.v1.AutoscalingPolicyServiceClient) Test(org.junit.Test) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest)

Example 13 with AutoscalingPolicy

use of com.google.cloud.dataproc.v1.AutoscalingPolicy in project cdap by cdapio.

the class PredefinedAutoScalingTest method testFetchFailedAndCreateIsCalled.

@Test
public void testFetchFailedAndCreateIsCalled() throws IOException {
    PredefinedAutoScaling predefinedAutoScaling = new PredefinedAutoScaling(dataprocConf);
    // mock Return generated auto-scaling policy while fetching
    AutoscalingPolicyName autoscalingPolicyName = AutoscalingPolicyName.ofProjectLocationAutoscalingPolicyName(dataprocConf.getProjectId(), dataprocConf.getRegion(), PredefinedAutoScaling.AUTOSCALING_POLICY_ID);
    AutoscalingPolicy generatedPolicy = predefinedAutoScaling.generatePredefinedAutoScaling();
    AutoscalingPolicyServiceClient mockClient = PowerMockito.mock(AutoscalingPolicyServiceClient.class);
    Mockito.when(mockClient.getAutoscalingPolicy(autoscalingPolicyName)).thenThrow(NotFoundException.class);
    RegionName parent = RegionName.of(dataprocConf.getProjectId(), dataprocConf.getRegion());
    Mockito.when(mockClient.createAutoscalingPolicy(parent, generatedPolicy)).thenReturn(null);
    PredefinedAutoScaling spy = Mockito.spy(predefinedAutoScaling);
    Mockito.doReturn(mockClient).when(spy).getAutoscalingPolicyServiceClient();
    String name = spy.createPredefinedAutoScalingPolicy();
    Mockito.verify(mockClient, Mockito.times(1)).getAutoscalingPolicy(autoscalingPolicyName);
    // verify that create call is not made
    Mockito.verify(mockClient, Mockito.times(1)).createAutoscalingPolicy(parent, generatedPolicy);
    Assert.assertEquals(name, autoscalingPolicyName.toString());
}
Also used : AutoscalingPolicy(com.google.cloud.dataproc.v1.AutoscalingPolicy) RegionName(com.google.cloud.dataproc.v1.RegionName) AutoscalingPolicyName(com.google.cloud.dataproc.v1.AutoscalingPolicyName) AutoscalingPolicyServiceClient(com.google.cloud.dataproc.v1.AutoscalingPolicyServiceClient) Test(org.junit.Test) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest)

Example 14 with AutoscalingPolicy

use of com.google.cloud.dataproc.v1.AutoscalingPolicy in project cdap by cdapio.

the class PredefinedAutoScalingTest method testGeneratedAutoScalePolicyPublicConfigs.

@Test
public void testGeneratedAutoScalePolicyPublicConfigs() {
    PredefinedAutoScaling predefinedAutoScaling = new PredefinedAutoScaling(dataprocConf);
    AutoscalingPolicy autoscalingPolicy = predefinedAutoScaling.generatePredefinedAutoScaling();
    Assert.assertEquals(PredefinedAutoScaling.AUTOSCALING_POLICY_ID, autoscalingPolicy.getId());
    Assert.assertEquals(PredefinedAutoScaling.getPrimaryWorkerInstances(), autoscalingPolicy.getWorkerConfig().getMaxInstances());
    Assert.assertEquals(PredefinedAutoScaling.getPrimaryWorkerInstances(), autoscalingPolicy.getWorkerConfig().getMinInstances());
    Assert.assertEquals(PredefinedAutoScaling.getMinSecondaryWorkerInstances(), autoscalingPolicy.getSecondaryWorkerConfig().getMinInstances());
    Assert.assertEquals(PredefinedAutoScaling.getMaxSecondaryWorkerInstances(), autoscalingPolicy.getSecondaryWorkerConfig().getMaxInstances());
}
Also used : AutoscalingPolicy(com.google.cloud.dataproc.v1.AutoscalingPolicy) Test(org.junit.Test) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest)

Example 15 with AutoscalingPolicy

use of com.google.cloud.dataproc.v1.AutoscalingPolicy in project java-dataproc by googleapis.

the class AutoscalingPolicyServiceClientTest method listAutoscalingPoliciesTest.

@Test
public void listAutoscalingPoliciesTest() throws Exception {
    AutoscalingPolicy responsesElement = AutoscalingPolicy.newBuilder().build();
    ListAutoscalingPoliciesResponse expectedResponse = ListAutoscalingPoliciesResponse.newBuilder().setNextPageToken("").addAllPolicies(Arrays.asList(responsesElement)).build();
    mockAutoscalingPolicyService.addResponse(expectedResponse);
    LocationName parent = LocationName.of("[PROJECT]", "[LOCATION]");
    ListAutoscalingPoliciesPagedResponse pagedListResponse = client.listAutoscalingPolicies(parent);
    List<AutoscalingPolicy> resources = Lists.newArrayList(pagedListResponse.iterateAll());
    Assert.assertEquals(1, resources.size());
    Assert.assertEquals(expectedResponse.getPoliciesList().get(0), resources.get(0));
    List<AbstractMessage> actualRequests = mockAutoscalingPolicyService.getRequests();
    Assert.assertEquals(1, actualRequests.size());
    ListAutoscalingPoliciesRequest actualRequest = ((ListAutoscalingPoliciesRequest) actualRequests.get(0));
    Assert.assertEquals(parent.toString(), actualRequest.getParent());
    Assert.assertTrue(channelProvider.isHeaderSent(ApiClientHeaderProvider.getDefaultApiClientHeaderKey(), GaxGrpcProperties.getDefaultApiClientHeaderPattern()));
}
Also used : AbstractMessage(com.google.protobuf.AbstractMessage) ListAutoscalingPoliciesPagedResponse(com.google.cloud.dataproc.v1.AutoscalingPolicyServiceClient.ListAutoscalingPoliciesPagedResponse) Test(org.junit.Test)

Aggregations

AutoscalingPolicy (com.google.cloud.dataproc.v1.AutoscalingPolicy)11 Test (org.junit.Test)9 AutoscalingPolicyServiceClient (com.google.cloud.dataproc.v1.AutoscalingPolicyServiceClient)7 RegionName (com.google.cloud.dataproc.v1.RegionName)7 AutoscalingPolicyName (com.google.cloud.dataproc.v1.AutoscalingPolicyName)6 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)6 ListAutoscalingPoliciesPagedResponse (com.google.cloud.dataproc.v1.AutoscalingPolicyServiceClient.ListAutoscalingPoliciesPagedResponse)3 BasicAutoscalingAlgorithm (com.google.cloud.dataproc.v1.BasicAutoscalingAlgorithm)3 BasicYarnAutoscalingConfig (com.google.cloud.dataproc.v1.BasicYarnAutoscalingConfig)3 Cluster (com.google.cloud.dataproc.v1.Cluster)3 ClusterConfig (com.google.cloud.dataproc.v1.ClusterConfig)3 ClusterOperationMetadata (com.google.cloud.dataproc.v1.ClusterOperationMetadata)3 InstanceGroupAutoscalingPolicyConfig (com.google.cloud.dataproc.v1.InstanceGroupAutoscalingPolicyConfig)3 InstanceGroupConfig (com.google.cloud.dataproc.v1.InstanceGroupConfig)3 AbstractMessage (com.google.protobuf.AbstractMessage)3 ExecutionException (java.util.concurrent.ExecutionException)3 AlreadyExistsException (com.google.api.gax.rpc.AlreadyExistsException)2 ApiException (com.google.api.gax.rpc.ApiException)2 NotFoundException (com.google.api.gax.rpc.NotFoundException)2 DiskConfig (com.google.cloud.dataproc.v1.DiskConfig)2