Search in sources :

Example 26 with ProgramRunId

use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.

the class SparkRuntimeContextProvider method createIfNotExists.

/**
   * Creates a singleton {@link SparkRuntimeContext}.
   * It has assumption on file location that are localized by the SparkRuntimeService.
   */
private static synchronized SparkRuntimeContext createIfNotExists() {
    if (sparkRuntimeContext != null) {
        return sparkRuntimeContext;
    }
    try {
        CConfiguration cConf = createCConf();
        Configuration hConf = createHConf();
        SparkRuntimeContextConfig contextConfig = new SparkRuntimeContextConfig(hConf);
        // Should be yarn only and only for executor node, not the driver node.
        Preconditions.checkState(!contextConfig.isLocal() && Boolean.parseBoolean(System.getenv("SPARK_YARN_MODE")), "SparkContextProvider.getSparkContext should only be called in Spark executor process.");
        // Create the program
        Program program = createProgram(cConf, contextConfig);
        Injector injector = createInjector(cConf, hConf, contextConfig.getProgramId(), contextConfig.getProgramOptions());
        final Service logAppenderService = new LogAppenderService(injector.getInstance(LogAppenderInitializer.class), contextConfig.getProgramOptions());
        final ZKClientService zkClientService = injector.getInstance(ZKClientService.class);
        final KafkaClientService kafkaClientService = injector.getInstance(KafkaClientService.class);
        final MetricsCollectionService metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
        final StreamCoordinatorClient streamCoordinatorClient = injector.getInstance(StreamCoordinatorClient.class);
        // Use the shutdown hook to shutdown services, since this class should only be loaded from System classloader
        // of the spark executor, hence there should be exactly one instance only.
        // The problem with not shutting down nicely is that some logs/metrics might be lost
        Services.chainStart(logAppenderService, zkClientService, kafkaClientService, metricsCollectionService, streamCoordinatorClient);
        Runtime.getRuntime().addShutdownHook(new Thread() {

            @Override
            public void run() {
                // The logger may already been shutdown. Use System.out/err instead
                System.out.println("Shutting SparkClassLoader services");
                Future<List<ListenableFuture<Service.State>>> future = Services.chainStop(logAppenderService, streamCoordinatorClient, metricsCollectionService, kafkaClientService, zkClientService);
                try {
                    List<ListenableFuture<Service.State>> futures = future.get(5, TimeUnit.SECONDS);
                    System.out.println("SparkClassLoader services shutdown completed: " + futures);
                } catch (Exception e) {
                    System.err.println("Exception when shutting down services");
                    e.printStackTrace(System.err);
                }
            }
        });
        // Constructor the DatasetFramework
        DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
        WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
        DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, contextConfig.getApplicationSpecification());
        // Setup dataset framework context, if required
        if (programDatasetFramework instanceof ProgramContextAware) {
            ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
            ((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
        }
        PluginInstantiator pluginInstantiator = createPluginInstantiator(cConf, contextConfig, program.getClassLoader());
        // Create the context object
        sparkRuntimeContext = new SparkRuntimeContext(contextConfig.getConfiguration(), program, contextConfig.getProgramOptions(), cConf, getHostname(), injector.getInstance(TransactionSystemClient.class), programDatasetFramework, injector.getInstance(DiscoveryServiceClient.class), metricsCollectionService, injector.getInstance(StreamAdmin.class), contextConfig.getWorkflowProgramInfo(), pluginInstantiator, injector.getInstance(SecureStore.class), injector.getInstance(SecureStoreManager.class), injector.getInstance(AuthorizationEnforcer.class), injector.getInstance(AuthenticationContext.class), injector.getInstance(MessagingService.class));
        LoggingContextAccessor.setLoggingContext(sparkRuntimeContext.getLoggingContext());
        return sparkRuntimeContext;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : CConfiguration(co.cask.cdap.common.conf.CConfiguration) Configuration(org.apache.hadoop.conf.Configuration) NameMappedDatasetFramework(co.cask.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework) DatasetFramework(co.cask.cdap.data2.dataset2.DatasetFramework) LogAppenderInitializer(co.cask.cdap.logging.appender.LogAppenderInitializer) Injector(com.google.inject.Injector) List(java.util.List) Program(co.cask.cdap.app.program.Program) DefaultProgram(co.cask.cdap.app.program.DefaultProgram) KafkaClientService(org.apache.twill.kafka.client.KafkaClientService) MetricsCollectionService(co.cask.cdap.api.metrics.MetricsCollectionService) MessagingService(co.cask.cdap.messaging.MessagingService) MetricsCollectionService(co.cask.cdap.api.metrics.MetricsCollectionService) AbstractService(com.google.common.util.concurrent.AbstractService) ZKClientService(org.apache.twill.zookeeper.ZKClientService) Service(com.google.common.util.concurrent.Service) KafkaClientService(org.apache.twill.kafka.client.KafkaClientService) StreamCoordinatorClient(co.cask.cdap.data.stream.StreamCoordinatorClient) BasicProgramContext(co.cask.cdap.internal.app.runtime.BasicProgramContext) CConfiguration(co.cask.cdap.common.conf.CConfiguration) InvocationTargetException(java.lang.reflect.InvocationTargetException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) ZKClientService(org.apache.twill.zookeeper.ZKClientService) WorkflowProgramInfo(co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo) Future(java.util.concurrent.Future) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) PluginInstantiator(co.cask.cdap.internal.app.runtime.plugin.PluginInstantiator) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramContextAware(co.cask.cdap.data.ProgramContextAware)

Example 27 with ProgramRunId

use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.

the class SparkExecutionServiceTest method testCompletion.

@Test
public void testCompletion() throws Exception {
    ProgramRunId programRunId = new ProgramRunId("ns", "app", ProgramType.SPARK, "test", RunIds.generate().getId());
    // Start a service that no token is supported
    SparkExecutionService service = new SparkExecutionService(locationFactory, InetAddress.getLoopbackAddress().getCanonicalHostName(), programRunId, null);
    service.startAndWait();
    try {
        SparkExecutionClient client = new SparkExecutionClient(service.getBaseURI(), programRunId);
        // Heartbeats multiple times.
        for (int i = 0; i < 5; i++) {
            Assert.assertNull(client.heartbeat(null));
            TimeUnit.MILLISECONDS.sleep(50);
        }
        // Call complete to notify the service it has been stopped
        client.completed(null);
    } finally {
        service.stopAndWait();
    }
}
Also used : ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Test(org.junit.Test)

Example 28 with ProgramRunId

use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.

the class WorkflowHttpHandler method getWorkflowNodeStates.

@GET
@Path("/apps/{app-id}/workflows/{workflow-id}/runs/{run-id}/nodes/state")
public void getWorkflowNodeStates(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("app-id") String applicationId, @PathParam("workflow-id") String workflowId, @PathParam("run-id") String runId) throws NotFoundException {
    ApplicationId appId = Ids.namespace(namespaceId).app(applicationId);
    ApplicationSpecification appSpec = store.getApplication(appId);
    if (appSpec == null) {
        throw new ApplicationNotFoundException(appId);
    }
    ProgramId workflowProgramId = appId.workflow(workflowId);
    WorkflowSpecification workflowSpec = appSpec.getWorkflows().get(workflowProgramId.getProgram());
    if (workflowSpec == null) {
        throw new ProgramNotFoundException(workflowProgramId);
    }
    ProgramRunId workflowRunId = workflowProgramId.run(runId);
    if (store.getRun(workflowProgramId, runId) == null) {
        throw new NotFoundException(workflowRunId);
    }
    List<WorkflowNodeStateDetail> nodeStateDetails = store.getWorkflowNodeStates(workflowRunId);
    Map<String, WorkflowNodeStateDetail> nodeStates = new HashMap<>();
    for (WorkflowNodeStateDetail nodeStateDetail : nodeStateDetails) {
        nodeStates.put(nodeStateDetail.getNodeId(), nodeStateDetail);
    }
    responder.sendJson(HttpResponseStatus.OK, nodeStates, STRING_TO_NODESTATEDETAIL_MAP_TYPE);
}
Also used : ApplicationSpecification(co.cask.cdap.api.app.ApplicationSpecification) HashMap(java.util.HashMap) ProgramNotFoundException(co.cask.cdap.common.ProgramNotFoundException) ApplicationNotFoundException(co.cask.cdap.common.ApplicationNotFoundException) InstanceNotFoundException(co.cask.cdap.api.dataset.InstanceNotFoundException) NotFoundException(co.cask.cdap.common.NotFoundException) ProgramId(co.cask.cdap.proto.id.ProgramId) WorkflowNodeStateDetail(co.cask.cdap.proto.WorkflowNodeStateDetail) ApplicationNotFoundException(co.cask.cdap.common.ApplicationNotFoundException) WorkflowSpecification(co.cask.cdap.api.workflow.WorkflowSpecification) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) ProgramNotFoundException(co.cask.cdap.common.ProgramNotFoundException) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 29 with ProgramRunId

use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.

the class FlowletProgramRunner method run.

@SuppressWarnings("unchecked")
@Override
public ProgramController run(Program program, ProgramOptions options) {
    BasicFlowletContext flowletContext = null;
    try {
        // Extract and verify parameters
        String flowletName = options.getName();
        int instanceId = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCE_ID, "-1"));
        Preconditions.checkArgument(instanceId >= 0, "Missing instance Id");
        int instanceCount = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCES, "0"));
        Preconditions.checkArgument(instanceCount > 0, "Invalid or missing instance count");
        RunId runId = ProgramRunners.getRunId(options);
        ApplicationSpecification appSpec = program.getApplicationSpecification();
        Preconditions.checkNotNull(appSpec, "Missing application specification.");
        ProgramType processorType = program.getType();
        Preconditions.checkNotNull(processorType, "Missing processor type.");
        Preconditions.checkArgument(processorType == ProgramType.FLOW, "Only FLOW process type is supported.");
        String processorName = program.getName();
        Preconditions.checkNotNull(processorName, "Missing processor name.");
        FlowSpecification flowSpec = appSpec.getFlows().get(processorName);
        FlowletDefinition flowletDef = flowSpec.getFlowlets().get(flowletName);
        Preconditions.checkNotNull(flowletDef, "Definition missing for flowlet \"%s\"", flowletName);
        Class<?> clz = Class.forName(flowletDef.getFlowletSpec().getClassName(), true, program.getClassLoader());
        Preconditions.checkArgument(Flowlet.class.isAssignableFrom(clz), "%s is not a Flowlet.", clz);
        // Setup dataset framework context, if required
        ProgramId programId = program.getId();
        FlowletId flowletId = programId.flowlet(flowletName);
        ProgramRunId run = programId.run(runId);
        ProgramContext programContext = new BasicProgramContext(run, flowletId);
        if (dsFramework instanceof ProgramContextAware) {
            ((ProgramContextAware) dsFramework).setContext(programContext);
        }
        Class<? extends Flowlet> flowletClass = (Class<? extends Flowlet>) clz;
        // Creates flowlet context
        flowletContext = new BasicFlowletContext(program, options, flowletId, instanceId, instanceCount, flowletDef.getDatasets(), flowletDef.getFlowletSpec(), metricsCollectionService, discoveryServiceClient, txClient, dsFramework, secureStore, secureStoreManager, messageService, cConf);
        // Creates tx related objects
        DataFabricFacade dataFabricFacade = dataFabricFacadeFactory.create(program, flowletContext.getDatasetCache());
        if (dataFabricFacade instanceof ProgramContextAware) {
            ((ProgramContextAware) dataFabricFacade).setContext(programContext);
        }
        // Creates QueueSpecification
        Table<Node, String, Set<QueueSpecification>> queueSpecs = new SimpleQueueSpecificationGenerator(new ApplicationId(program.getNamespaceId(), program.getApplicationId())).create(flowSpec);
        Flowlet flowlet = new InstantiatorFactory(false).get(TypeToken.of(flowletClass)).create();
        TypeToken<? extends Flowlet> flowletType = TypeToken.of(flowletClass);
        // Set the context classloader to the cdap classloader. It is needed for the DatumWriterFactory be able
        // to load cdap classes
        Thread.currentThread().setContextClassLoader(FlowletProgramRunner.class.getClassLoader());
        // Inject DataSet, OutputEmitter, Metric fields
        ImmutableList.Builder<ProducerSupplier> queueProducerSupplierBuilder = ImmutableList.builder();
        Reflections.visit(flowlet, flowlet.getClass(), new PropertyFieldSetter(flowletDef.getFlowletSpec().getProperties()), new DataSetFieldSetter(flowletContext), new MetricsFieldSetter(flowletContext.getMetrics()), new OutputEmitterFieldSetter(outputEmitterFactory(flowletContext, flowletName, dataFabricFacade, queueProducerSupplierBuilder, queueSpecs)));
        ImmutableList.Builder<ConsumerSupplier<?>> queueConsumerSupplierBuilder = ImmutableList.builder();
        Collection<ProcessSpecification<?>> processSpecs = createProcessSpecification(flowletContext, flowletType, processMethodFactory(flowlet), processSpecificationFactory(flowletContext, dataFabricFacade, queueReaderFactory, flowletName, queueSpecs, queueConsumerSupplierBuilder, createSchemaCache(program)), Lists.<ProcessSpecification<?>>newLinkedList());
        List<ConsumerSupplier<?>> consumerSuppliers = queueConsumerSupplierBuilder.build();
        // Create the flowlet driver
        AtomicReference<FlowletProgramController> controllerRef = new AtomicReference<>();
        Service serviceHook = createServiceHook(flowletName, consumerSuppliers, controllerRef);
        FlowletRuntimeService driver = new FlowletRuntimeService(flowlet, flowletContext, processSpecs, createCallback(flowlet, flowletDef.getFlowletSpec()), dataFabricFacade, serviceHook);
        FlowletProgramController controller = new FlowletProgramController(program.getId(), flowletName, flowletContext, driver, queueProducerSupplierBuilder.build(), consumerSuppliers);
        controllerRef.set(controller);
        LOG.info("Starting flowlet: {}", flowletContext);
        driver.start();
        LOG.info("Flowlet started: {}", flowletContext);
        return controller;
    } catch (Exception e) {
        // of the flowlet context.
        if (flowletContext != null) {
            flowletContext.close();
        }
        throw Throwables.propagate(e);
    }
}
Also used : ApplicationSpecification(co.cask.cdap.api.app.ApplicationSpecification) Set(java.util.Set) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableList(com.google.common.collect.ImmutableList) Node(co.cask.cdap.app.queue.QueueSpecificationGenerator.Node) BasicProgramContext(co.cask.cdap.internal.app.runtime.BasicProgramContext) ProgramContext(co.cask.cdap.data.ProgramContext) InstantiatorFactory(co.cask.cdap.common.lang.InstantiatorFactory) FlowletDefinition(co.cask.cdap.api.flow.FlowletDefinition) FlowSpecification(co.cask.cdap.api.flow.FlowSpecification) MetricsFieldSetter(co.cask.cdap.internal.app.runtime.MetricsFieldSetter) ProgramType(co.cask.cdap.proto.ProgramType) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Flowlet(co.cask.cdap.api.flow.flowlet.Flowlet) MetricsCollectionService(co.cask.cdap.api.metrics.MetricsCollectionService) Service(com.google.common.util.concurrent.Service) MessagingService(co.cask.cdap.messaging.MessagingService) AbstractService(com.google.common.util.concurrent.AbstractService) AtomicReference(java.util.concurrent.atomic.AtomicReference) ProgramId(co.cask.cdap.proto.id.ProgramId) BasicProgramContext(co.cask.cdap.internal.app.runtime.BasicProgramContext) DataSetFieldSetter(co.cask.cdap.internal.app.runtime.DataSetFieldSetter) UnsupportedTypeException(co.cask.cdap.api.data.schema.UnsupportedTypeException) IOException(java.io.IOException) SimpleQueueSpecificationGenerator(co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator) PropertyFieldSetter(co.cask.cdap.common.lang.PropertyFieldSetter) FlowletId(co.cask.cdap.proto.id.FlowletId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) DataFabricFacade(co.cask.cdap.internal.app.runtime.DataFabricFacade) ApplicationId(co.cask.cdap.proto.id.ApplicationId) ProgramContextAware(co.cask.cdap.data.ProgramContextAware)

Example 30 with ProgramRunId

use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.

the class BasicLineageWriterTest method testWrites.

@Test
public void testWrites() throws Exception {
    Injector injector = getInjector();
    MetadataStore metadataStore = injector.getInstance(MetadataStore.class);
    LineageStore lineageStore = injector.getInstance(LineageStore.class);
    LineageWriter lineageWriter = new BasicLineageWriter(lineageStore);
    // Define entities
    ProgramId program = new ProgramId(NamespaceId.DEFAULT.getNamespace(), "app", ProgramType.FLOW, "flow");
    StreamId stream = new StreamId(NamespaceId.DEFAULT.getNamespace(), "stream");
    ProgramRunId run1 = new ProgramRunId(program.getNamespace(), program.getApplication(), program.getType(), program.getEntityName(), RunIds.generate(10000).getId());
    ProgramRunId run2 = new ProgramRunId(program.getNamespace(), program.getApplication(), program.getType(), program.getEntityName(), RunIds.generate(20000).getId());
    // Tag stream
    metadataStore.addTags(MetadataScope.USER, stream, "stag1", "stag2");
    // Write access for run1
    lineageWriter.addAccess(run1, stream, AccessType.READ);
    Assert.assertEquals(ImmutableSet.of(program, stream), lineageStore.getEntitiesForRun(run1));
    // Record time to verify duplicate writes.
    long beforeSecondTag = System.currentTimeMillis();
    // Wait for next millisecond, since access time is stored in milliseconds.
    TimeUnit.MILLISECONDS.sleep(1);
    // Add another tag to stream
    metadataStore.addTags(MetadataScope.USER, stream, "stag3");
    // Write access for run1 again
    lineageWriter.addAccess(run1, stream, AccessType.READ);
    // The write should be no-op, and access time for run1 should not be updated
    Assert.assertTrue(lineageStore.getAccessTimesForRun(run1).get(0) < beforeSecondTag);
    // However, you can write access for another run
    lineageWriter.addAccess(run2, stream, AccessType.READ);
    // Assert new access time is written
    Assert.assertTrue(lineageStore.getAccessTimesForRun(run2).get(0) >= beforeSecondTag);
}
Also used : DefaultMetadataStore(co.cask.cdap.data2.metadata.store.DefaultMetadataStore) MetadataStore(co.cask.cdap.data2.metadata.store.MetadataStore) StreamId(co.cask.cdap.proto.id.StreamId) Injector(com.google.inject.Injector) LineageStore(co.cask.cdap.data2.metadata.lineage.LineageStore) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramId(co.cask.cdap.proto.id.ProgramId) Test(org.junit.Test)

Aggregations

ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)53 ProgramId (co.cask.cdap.proto.id.ProgramId)23 Test (org.junit.Test)22 ApplicationId (co.cask.cdap.proto.id.ApplicationId)12 RunRecordMeta (co.cask.cdap.internal.app.store.RunRecordMeta)10 Path (javax.ws.rs.Path)10 RunId (org.apache.twill.api.RunId)10 DatasetId (co.cask.cdap.proto.id.DatasetId)9 Relation (co.cask.cdap.data2.metadata.lineage.Relation)7 HashSet (java.util.HashSet)7 NotFoundException (co.cask.cdap.common.NotFoundException)6 WorkflowNodeStateDetail (co.cask.cdap.proto.WorkflowNodeStateDetail)6 HashMap (java.util.HashMap)6 GET (javax.ws.rs.GET)6 ApplicationSpecification (co.cask.cdap.api.app.ApplicationSpecification)5 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)5 StreamId (co.cask.cdap.proto.id.StreamId)5 Map (java.util.Map)5 CommandInputError (co.cask.cdap.cli.exception.CommandInputError)4 MethodArgument (co.cask.cdap.common.internal.remote.MethodArgument)4