Search in sources :

Example 1 with Cluster

use of io.cdap.cdap.runtime.spi.provisioner.Cluster in project cdap by caskdata.

the class DefaultRuntimeJobTest method testInjector.

@Test
public void testInjector() throws Exception {
    CConfiguration cConf = CConfiguration.create();
    cConf.set(Constants.CFG_LOCAL_DATA_DIR, TEMP_FOLDER.newFolder().toString());
    LocationFactory locationFactory = new LocalLocationFactory(TEMP_FOLDER.newFile());
    DefaultRuntimeJob defaultRuntimeJob = new DefaultRuntimeJob();
    Arguments systemArgs = new BasicArguments(Collections.singletonMap(SystemArguments.PROFILE_NAME, "test"));
    Node node = new Node("test", Node.Type.MASTER, "127.0.0.1", System.currentTimeMillis(), Collections.emptyMap());
    Cluster cluster = new Cluster("test", ClusterStatus.RUNNING, Collections.singleton(node), Collections.emptyMap());
    ProgramRunId programRunId = NamespaceId.DEFAULT.app("app").workflow("workflow").run(RunIds.generate());
    SimpleProgramOptions programOpts = new SimpleProgramOptions(programRunId.getParent(), systemArgs, new BasicArguments());
    Injector injector = Guice.createInjector(defaultRuntimeJob.createModules(new RuntimeJobEnvironment() {

        @Override
        public LocationFactory getLocationFactory() {
            return locationFactory;
        }

        @Override
        public TwillRunner getTwillRunner() {
            return new NoopTwillRunnerService();
        }

        @Override
        public Map<String, String> getProperties() {
            return Collections.emptyMap();
        }
    }, cConf, programRunId, programOpts));
    injector.getInstance(LogAppenderInitializer.class);
    defaultRuntimeJob.createCoreServices(injector, systemArgs, cluster);
}
Also used : Node(io.cdap.cdap.runtime.spi.provisioner.Node) Arguments(io.cdap.cdap.app.runtime.Arguments) SystemArguments(io.cdap.cdap.internal.app.runtime.SystemArguments) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) CConfiguration(io.cdap.cdap.common.conf.CConfiguration) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) LocationFactory(org.apache.twill.filesystem.LocationFactory) NoopTwillRunnerService(io.cdap.cdap.common.twill.NoopTwillRunnerService) Injector(com.google.inject.Injector) RuntimeJobEnvironment(io.cdap.cdap.runtime.spi.runtimejob.RuntimeJobEnvironment) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) Test(org.junit.Test)

Example 2 with Cluster

use of io.cdap.cdap.runtime.spi.provisioner.Cluster in project cdap by caskdata.

the class ClusterInitializeSubtask method execute.

@Override
public Cluster execute(Cluster cluster) throws Exception {
    // get the full details, since many times, information like ip addresses is not available until we're done
    // polling for status and are ready to initialize. Up until now, the cluster object is what we got from
    // the original createCluster() call, except with the status updated.
    Cluster fullClusterDetails = provisioner.getClusterDetail(provisionerContext, cluster);
    provisioner.initializeCluster(provisionerContext, fullClusterDetails);
    Map<String, String> properties = new HashMap<>(cluster.getProperties());
    properties.putAll(fullClusterDetails.getProperties());
    return new Cluster(fullClusterDetails.getName(), ClusterStatus.RUNNING, fullClusterDetails.getNodes(), properties);
}
Also used : HashMap(java.util.HashMap) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster)

Example 3 with Cluster

use of io.cdap.cdap.runtime.spi.provisioner.Cluster in project cdap by caskdata.

the class ProvisioningServiceTest method testGetClusterStatus.

@Test
public void testGetClusterStatus() throws Exception {
    TaskFields taskFields = createTaskInfo(new MockProvisioner.PropertyBuilder().setFirstClusterStatus(ClusterStatus.RUNNING).failRetryablyEveryN(2).setExpectedAppCDAPVersion(APP_CDAP_VERSION).build());
    Cluster cluster = new Cluster("test", ClusterStatus.NOT_EXISTS, Collections.emptyList(), Collections.emptyMap());
    Assert.assertEquals(ClusterStatus.RUNNING, provisioningService.getClusterStatus(taskFields.programRunId, taskFields.programOptions, cluster, "cdap"));
}
Also used : Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) Test(org.junit.Test)

Example 4 with Cluster

use of io.cdap.cdap.runtime.spi.provisioner.Cluster in project cdap by caskdata.

the class ProgramNotificationSubscriberService method handleClusterEvent.

/**
 * Handles a notification related to cluster operations.
 *
 * @param programRunId program run id from the event
 * @param clusterStatus cluster status from the event
 * @param notification the notification to process
 * @param messageIdBytes the unique ID for the notification message
 * @param appMetadataStore the data table to use
 * @param context the table context for performing table operations
 * @return an {@link Optional} of {@link Runnable} to carry a task to execute after handling of this event completed.
 *         See {@link #postProcess()} for details.
 * @throws IOException if failed to read/write to the app metadata store.
 */
private Optional<Runnable> handleClusterEvent(ProgramRunId programRunId, ProgramRunClusterStatus clusterStatus, Notification notification, byte[] messageIdBytes, AppMetadataStore appMetadataStore, StructuredTableContext context) throws IOException {
    Map<String, String> properties = notification.getProperties();
    ProgramOptions programOptions = ProgramOptions.fromNotification(notification, GSON);
    String userId = properties.get(ProgramOptionConstants.USER_ID);
    long endTs = getTimeSeconds(properties, ProgramOptionConstants.CLUSTER_END_TIME);
    ProgramDescriptor programDescriptor = GSON.fromJson(properties.get(ProgramOptionConstants.PROGRAM_DESCRIPTOR), ProgramDescriptor.class);
    switch(clusterStatus) {
        case PROVISIONING:
            appMetadataStore.recordProgramProvisioning(programRunId, programOptions.getUserArguments().asMap(), programOptions.getArguments().asMap(), messageIdBytes, programDescriptor.getArtifactId().toApiArtifactId());
            ProvisionRequest provisionRequest = new ProvisionRequest(programRunId, programOptions, programDescriptor, userId);
            return Optional.of(provisioningService.provision(provisionRequest, context));
        case PROVISIONED:
            Cluster cluster = GSON.fromJson(properties.get(ProgramOptionConstants.CLUSTER), Cluster.class);
            appMetadataStore.recordProgramProvisioned(programRunId, cluster.getNodes().size(), messageIdBytes);
            // Update the ProgramOptions system arguments to include information needed for program execution
            Map<String, String> systemArgs = new HashMap<>(programOptions.getArguments().asMap());
            systemArgs.put(ProgramOptionConstants.USER_ID, properties.get(ProgramOptionConstants.USER_ID));
            systemArgs.put(ProgramOptionConstants.CLUSTER, properties.get(ProgramOptionConstants.CLUSTER));
            systemArgs.put(ProgramOptionConstants.SECURE_KEYS_DIR, properties.get(ProgramOptionConstants.SECURE_KEYS_DIR));
            ProgramOptions newProgramOptions = new SimpleProgramOptions(programOptions.getProgramId(), new BasicArguments(systemArgs), programOptions.getUserArguments());
            // Publish the program STARTING state before starting the program
            programStateWriter.start(programRunId, newProgramOptions, null, programDescriptor);
            // emit provisioning time metric
            long provisioningTime = System.currentTimeMillis() / 1000 - RunIds.getTime(programRunId.getRun(), TimeUnit.SECONDS);
            SystemArguments.getProfileIdFromArgs(programRunId.getNamespaceId(), systemArgs).ifPresent(profileId -> emitProvisioningTimeMetric(programRunId, profileId, programOptions, provisioningTime));
            break;
        case DEPROVISIONING:
            RunRecordDetail recordedMeta = appMetadataStore.recordProgramDeprovisioning(programRunId, messageIdBytes);
            // or an invalid state transition. In both cases, we should not try to deprovision the cluster.
            if (recordedMeta != null) {
                return Optional.of(provisioningService.deprovision(programRunId, context));
            }
            break;
        case DEPROVISIONED:
            appMetadataStore.recordProgramDeprovisioned(programRunId, endTs, messageIdBytes);
            break;
        case ORPHANED:
            appMetadataStore.recordProgramOrphaned(programRunId, endTs, messageIdBytes);
            break;
    }
    return Optional.empty();
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) RunRecordDetail(io.cdap.cdap.internal.app.store.RunRecordDetail) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) ProvisionRequest(io.cdap.cdap.internal.provision.ProvisionRequest) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions)

Example 5 with Cluster

use of io.cdap.cdap.runtime.spi.provisioner.Cluster in project cdap by caskdata.

the class DataprocProvisioner method createCluster.

@Override
public Cluster createCluster(ProvisionerContext context) throws Exception {
    DataprocConf conf = DataprocConf.create(createContextProperties(context));
    if (!isAutoscalingFieldsValid(conf, createContextProperties(context))) {
        LOG.warn("The configs : {}, {}, {} will not be considered when {} is enabled ", DataprocConf.WORKER_NUM_NODES, DataprocConf.SECONDARY_WORKER_NUM_NODES, DataprocConf.AUTOSCALING_POLICY, DataprocConf.PREDEFINED_AUTOSCALE_ENABLED);
    }
    if (context.getRuntimeMonitorType() == RuntimeMonitorType.SSH || !conf.isRuntimeJobManagerEnabled()) {
        // Generates and set the ssh key if it does not have one.
        // Since invocation of this method can come from a retry, we don't need to keep regenerating the keys
        SSHContext sshContext = context.getSSHContext();
        if (sshContext != null) {
            SSHKeyPair sshKeyPair = sshContext.getSSHKeyPair().orElse(null);
            if (sshKeyPair == null) {
                sshKeyPair = sshContext.generate("cdap");
                sshContext.setSSHKeyPair(sshKeyPair);
            }
            conf = DataprocConf.create(createContextProperties(context), sshKeyPair.getPublicKey());
        }
    }
    try (DataprocClient client = getClient(conf)) {
        Cluster reused = tryReuseCluster(client, context, conf);
        if (reused != null) {
            DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.reuse.count");
            return reused;
        }
        String clusterName = getRunKey(context);
        // if it already exists, it means this is a retry. We can skip actually making the request
        Optional<Cluster> existing = client.getCluster(clusterName);
        if (existing.isPresent()) {
            return existing.get();
        }
        String imageVersion = getImageVersion(context, conf);
        String imageDescription = conf.getCustomImageUri();
        if (imageDescription == null || imageDescription.isEmpty()) {
            imageDescription = imageVersion;
        }
        // Reload system context properties and get system labels
        Map<String, String> labels = new HashMap<>();
        labels.putAll(getSystemLabels());
        labels.putAll(getReuseLabels(context, conf));
        labels.putAll(conf.getClusterLabels());
        LOG.info("Creating Dataproc cluster {} in project {}, in region {}, with image {}, with labels {}, endpoint {}", clusterName, conf.getProjectId(), conf.getRegion(), imageDescription, labels, getRootUrl(conf));
        boolean privateInstance = Boolean.parseBoolean(getSystemContext().getProperties().get(PRIVATE_INSTANCE));
        ClusterOperationMetadata createOperationMeta = client.createCluster(clusterName, imageVersion, labels, privateInstance);
        int numWarnings = createOperationMeta.getWarningsCount();
        if (numWarnings > 0) {
            LOG.warn("Encountered {} warning{} while creating Dataproc cluster:\n{}", numWarnings, numWarnings > 1 ? "s" : "", String.join("\n", createOperationMeta.getWarningsList()));
        }
        DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.response.count");
        return new Cluster(clusterName, ClusterStatus.CREATING, Collections.emptyList(), Collections.emptyMap());
    } catch (Exception e) {
        DataprocUtils.emitMetric(context, conf.getRegion(), "provisioner.createCluster.response.count", e);
        throw e;
    }
}
Also used : SSHKeyPair(io.cdap.cdap.runtime.spi.ssh.SSHKeyPair) SSHContext(io.cdap.cdap.runtime.spi.ssh.SSHContext) ClusterOperationMetadata(com.google.cloud.dataproc.v1.ClusterOperationMetadata) HashMap(java.util.HashMap) Cluster(io.cdap.cdap.runtime.spi.provisioner.Cluster) GeneralSecurityException(java.security.GeneralSecurityException) RetryableProvisionException(io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException) IOException(java.io.IOException)

Aggregations

Cluster (io.cdap.cdap.runtime.spi.provisioner.Cluster)18 HashMap (java.util.HashMap)8 IOException (java.io.IOException)6 SSHKeyPair (io.cdap.cdap.runtime.spi.ssh.SSHKeyPair)5 Test (org.junit.Test)5 ClusterStatus (io.cdap.cdap.runtime.spi.provisioner.ClusterStatus)4 RetryableProvisionException (io.cdap.cdap.runtime.spi.provisioner.RetryableProvisionException)4 SSHContext (io.cdap.cdap.runtime.spi.ssh.SSHContext)4 ProgramOptions (io.cdap.cdap.app.runtime.ProgramOptions)3 CConfiguration (io.cdap.cdap.common.conf.CConfiguration)3 SystemArguments (io.cdap.cdap.internal.app.runtime.SystemArguments)3 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)3 RuntimeMonitorType (io.cdap.cdap.runtime.spi.RuntimeMonitorType)3 ProvisionerContext (io.cdap.cdap.runtime.spi.provisioner.ProvisionerContext)3 ProvisionerSpecification (io.cdap.cdap.runtime.spi.provisioner.ProvisionerSpecification)3 GeneralSecurityException (java.security.GeneralSecurityException)3 Map (java.util.Map)3 Optional (java.util.Optional)3 TimeUnit (java.util.concurrent.TimeUnit)3 Logger (org.slf4j.Logger)3