use of com.google.cloud.dataproc.v1.ClusterConfig in project cdap by caskdata.
the class DataprocClient method createCluster.
/**
* Create a cluster. This will return after the initial request to create the cluster is completed.
* At this point, the cluster is likely not yet running, but in a provisioning state.
*
* @param name the name of the cluster to create
* @param imageVersion the image version for the cluster
* @param labels labels to set on the cluster
* @param privateInstance {@code true} to indicate using private instance
* @return create operation metadata
* @throws InterruptedException if the thread was interrupted while waiting for the initial request to complete
* @throws AlreadyExistsException if the cluster already exists
* @throws IOException if there was an I/O error talking to Google Compute APIs
* @throws RetryableProvisionException if there was a non 4xx error code returned
*/
ClusterOperationMetadata createCluster(String name, String imageVersion, Map<String, String> labels, boolean privateInstance) throws RetryableProvisionException, InterruptedException, IOException {
if (network == null) {
// yet being used to create cluster.
throw new IllegalArgumentException("Missing network information");
}
try {
Map<String, String> metadata = new HashMap<>();
SSHPublicKey publicKey = conf.getPublicKey();
if (publicKey != null) {
// Don't fail if there is no public key. It is for tooling case that the key might be generated differently.
metadata.put("ssh-keys", publicKey.getUser() + ":" + publicKey.getKey());
// override any os-login that may be set on the project-level metadata
// this metadata is only needed if ssh is being used to launch the jobs - CDAP-15369
metadata.put("enable-oslogin", "false");
}
// Check if ClusterMetaData is provided and add them.
metadata.putAll(conf.getClusterMetaData());
GceClusterConfig.Builder clusterConfig = GceClusterConfig.newBuilder().addServiceAccountScopes(DataprocConf.CLOUD_PLATFORM_SCOPE).setShieldedInstanceConfig(ShieldedInstanceConfig.newBuilder().setEnableSecureBoot(conf.isSecureBootEnabled()).setEnableVtpm(conf.isvTpmEnabled()).setEnableIntegrityMonitoring(conf.isIntegrityMonitoringEnabled()).build()).putAllMetadata(metadata);
if (conf.getServiceAccount() != null) {
clusterConfig.setServiceAccount(conf.getServiceAccount());
}
if (conf.getZone() != null) {
clusterConfig.setZoneUri(conf.getZone());
}
// subnets are unique within a location, not within a network, which is why these configs are mutually exclusive.
if (conf.getSubnet() != null) {
clusterConfig.setSubnetworkUri(conf.getSubnet());
} else {
clusterConfig.setNetworkUri(network.getSelfLink());
}
// Add any defined Network Tags
clusterConfig.addAllTags(conf.getNetworkTags());
boolean internalIPOnly = isInternalIPOnly(network, privateInstance, publicKey != null);
// if public key is not null that means ssh is used to launch / monitor job on dataproc
if (publicKey != null) {
int maxTags = Math.max(0, DataprocConf.MAX_NETWORK_TAGS - clusterConfig.getTagsCount());
List<String> tags = getFirewallTargetTags(network, internalIPOnly);
if (tags.size() > maxTags) {
LOG.warn("No more than 64 tags can be added. Firewall tags ignored: {}", tags.subList(maxTags, tags.size()));
}
tags.stream().limit(maxTags).forEach(clusterConfig::addTags);
}
// if internal ip is preferred then create dataproc cluster without external ip for better security
clusterConfig.setInternalIpOnly(internalIPOnly);
Map<String, String> clusterProperties = new HashMap<>(conf.getClusterProperties());
// Enable/Disable stackdriver
clusterProperties.put("dataproc:dataproc.logging.stackdriver.enable", Boolean.toString(conf.isStackdriverLoggingEnabled()));
clusterProperties.put("dataproc:dataproc.monitoring.stackdriver.enable", Boolean.toString(conf.isStackdriverMonitoringEnabled()));
DiskConfig workerDiskConfig = DiskConfig.newBuilder().setBootDiskSizeGb(conf.getWorkerDiskGB()).setBootDiskType(conf.getWorkerDiskType()).setNumLocalSsds(0).build();
InstanceGroupConfig.Builder primaryWorkerConfig = InstanceGroupConfig.newBuilder().setNumInstances(conf.getWorkerNumNodes()).setMachineTypeUri(conf.getWorkerMachineType()).setDiskConfig(workerDiskConfig);
InstanceGroupConfig.Builder secondaryWorkerConfig = InstanceGroupConfig.newBuilder().setNumInstances(conf.getSecondaryWorkerNumNodes()).setMachineTypeUri(conf.getWorkerMachineType()).setPreemptibility(InstanceGroupConfig.Preemptibility.NON_PREEMPTIBLE).setDiskConfig(workerDiskConfig);
// Set default concurrency settings for fixed cluster
if (Strings.isNullOrEmpty(conf.getAutoScalingPolicy()) && !conf.isPredefinedAutoScaleEnabled()) {
// Set spark.default.parallelism according to cluster size.
// Spark defaults it to number of current executors, but when we configure the job
// executors may not have started yet, so this value gets artificially low.
int defaultConcurrency = Math.max(conf.getTotalWorkerCPUs(), MIN_DEFAULT_CONCURRENCY);
// Set spark.sql.adaptive.coalescePartitions.initialPartitionNum as 32x of default parallelism,
// but no more than 8192. This value is used only in spark 3 with adaptive execution and
// according to our tests spark can handle really large numbers and 32x is a reasonable default.
int initialPartitionNum = Math.min(Math.max(conf.getTotalWorkerCPUs() * PARTITION_NUM_FACTOR, MIN_INITIAL_PARTITIONS_DEFAULT), MAX_INITIAL_PARTITIONS_DEFAULT);
clusterProperties.putIfAbsent("spark:spark.default.parallelism", Integer.toString(defaultConcurrency));
clusterProperties.putIfAbsent("spark:spark.sql.adaptive.coalescePartitions.initialPartitionNum", Integer.toString(initialPartitionNum));
}
SoftwareConfig.Builder softwareConfigBuilder = SoftwareConfig.newBuilder().putAllProperties(clusterProperties);
// Use image version only if custom Image URI is not specified, otherwise may cause image version conflicts
if (conf.getCustomImageUri() == null || conf.getCustomImageUri().isEmpty()) {
softwareConfigBuilder.setImageVersion(imageVersion);
} else {
// If custom Image URI is specified, use that for cluster creation
primaryWorkerConfig.setImageUri(conf.getCustomImageUri());
secondaryWorkerConfig.setImageUri(conf.getCustomImageUri());
}
ClusterConfig.Builder builder = ClusterConfig.newBuilder().setEndpointConfig(EndpointConfig.newBuilder().setEnableHttpPortAccess(conf.isComponentGatewayEnabled()).build()).setMasterConfig(InstanceGroupConfig.newBuilder().setNumInstances(conf.getMasterNumNodes()).setMachineTypeUri(conf.getMasterMachineType()).setDiskConfig(DiskConfig.newBuilder().setBootDiskType(conf.getMasterDiskType()).setBootDiskSizeGb(conf.getMasterDiskGB()).setNumLocalSsds(0).build()).build()).setWorkerConfig(primaryWorkerConfig.build()).setSecondaryWorkerConfig(secondaryWorkerConfig.build()).setGceClusterConfig(clusterConfig.build()).setSoftwareConfig(softwareConfigBuilder);
// Cluster TTL if one should be set
if (conf.getIdleTTLMinutes() > 0) {
long seconds = TimeUnit.MINUTES.toSeconds(conf.getIdleTTLMinutes());
builder.setLifecycleConfig(LifecycleConfig.newBuilder().setIdleDeleteTtl(Duration.newBuilder().setSeconds(seconds).build()).build());
}
// Add any Node Initialization action scripts
for (String action : conf.getInitActions()) {
builder.addInitializationActions(NodeInitializationAction.newBuilder().setExecutableFile(action).build());
}
// Set Auto Scaling Policy
String autoScalingPolicy = conf.getAutoScalingPolicy();
if (conf.isPredefinedAutoScaleEnabled()) {
PredefinedAutoScaling predefinedAutoScaling = new PredefinedAutoScaling(conf);
autoScalingPolicy = predefinedAutoScaling.createPredefinedAutoScalingPolicy();
}
if (!Strings.isNullOrEmpty(autoScalingPolicy)) {
// Check if policy is URI or ID. If ID Convert to URI
if (!autoScalingPolicy.contains("/")) {
autoScalingPolicy = "projects/" + conf.getProjectId() + "/regions/" + conf.getRegion() + "/autoscalingPolicies/" + autoScalingPolicy;
}
builder.setAutoscalingConfig(AutoscalingConfig.newBuilder().setPolicyUri(autoScalingPolicy).build());
}
if (conf.getEncryptionKeyName() != null) {
builder.setEncryptionConfig(EncryptionConfig.newBuilder().setGcePdKmsKeyName(conf.getEncryptionKeyName()).build());
}
if (conf.getGcsBucket() != null) {
builder.setConfigBucket(conf.getGcsBucket());
}
Cluster cluster = com.google.cloud.dataproc.v1.Cluster.newBuilder().setClusterName(name).putAllLabels(labels).setConfig(builder.build()).build();
OperationFuture<Cluster, ClusterOperationMetadata> operationFuture = client.createClusterAsync(conf.getProjectId(), conf.getRegion(), cluster);
return operationFuture.getMetadata().get();
} catch (ExecutionException e) {
cleanUpClusterAfterCreationFailure(name);
Throwable cause = e.getCause();
if (cause instanceof ApiException) {
throw handleApiException((ApiException) cause);
}
throw new DataprocRuntimeException(cause);
}
}
Aggregations