use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class SparkRuntimeContextProvider method createIfNotExists.
/**
* Creates a singleton {@link SparkRuntimeContext}.
* It has assumption on file location that are localized by the SparkRuntimeService.
*/
private static synchronized SparkRuntimeContext createIfNotExists() {
if (sparkRuntimeContext != null) {
return sparkRuntimeContext;
}
try {
CConfiguration cConf = createCConf();
Configuration hConf = createHConf();
SparkRuntimeContextConfig contextConfig = new SparkRuntimeContextConfig(hConf);
// Should be yarn only and only for executor node, not the driver node.
Preconditions.checkState(!contextConfig.isLocal() && Boolean.parseBoolean(System.getenv("SPARK_YARN_MODE")), "SparkContextProvider.getSparkContext should only be called in Spark executor process.");
// Create the program
Program program = createProgram(cConf, contextConfig);
Injector injector = createInjector(cConf, hConf, contextConfig.getProgramId(), contextConfig.getProgramOptions());
final Service logAppenderService = new LogAppenderService(injector.getInstance(LogAppenderInitializer.class), contextConfig.getProgramOptions());
final ZKClientService zkClientService = injector.getInstance(ZKClientService.class);
final KafkaClientService kafkaClientService = injector.getInstance(KafkaClientService.class);
final MetricsCollectionService metricsCollectionService = injector.getInstance(MetricsCollectionService.class);
final StreamCoordinatorClient streamCoordinatorClient = injector.getInstance(StreamCoordinatorClient.class);
// Use the shutdown hook to shutdown services, since this class should only be loaded from System classloader
// of the spark executor, hence there should be exactly one instance only.
// The problem with not shutting down nicely is that some logs/metrics might be lost
Services.chainStart(logAppenderService, zkClientService, kafkaClientService, metricsCollectionService, streamCoordinatorClient);
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
// The logger may already been shutdown. Use System.out/err instead
System.out.println("Shutting SparkClassLoader services");
Future<List<ListenableFuture<Service.State>>> future = Services.chainStop(logAppenderService, streamCoordinatorClient, metricsCollectionService, kafkaClientService, zkClientService);
try {
List<ListenableFuture<Service.State>> futures = future.get(5, TimeUnit.SECONDS);
System.out.println("SparkClassLoader services shutdown completed: " + futures);
} catch (Exception e) {
System.err.println("Exception when shutting down services");
e.printStackTrace(System.err);
}
}
});
// Constructor the DatasetFramework
DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
WorkflowProgramInfo workflowInfo = contextConfig.getWorkflowProgramInfo();
DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, contextConfig.getApplicationSpecification());
// Setup dataset framework context, if required
if (programDatasetFramework instanceof ProgramContextAware) {
ProgramRunId programRunId = program.getId().run(ProgramRunners.getRunId(contextConfig.getProgramOptions()));
((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programRunId));
}
PluginInstantiator pluginInstantiator = createPluginInstantiator(cConf, contextConfig, program.getClassLoader());
// Create the context object
sparkRuntimeContext = new SparkRuntimeContext(contextConfig.getConfiguration(), program, contextConfig.getProgramOptions(), cConf, getHostname(), injector.getInstance(TransactionSystemClient.class), programDatasetFramework, injector.getInstance(DiscoveryServiceClient.class), metricsCollectionService, injector.getInstance(StreamAdmin.class), contextConfig.getWorkflowProgramInfo(), pluginInstantiator, injector.getInstance(SecureStore.class), injector.getInstance(SecureStoreManager.class), injector.getInstance(AuthorizationEnforcer.class), injector.getInstance(AuthenticationContext.class), injector.getInstance(MessagingService.class));
LoggingContextAccessor.setLoggingContext(sparkRuntimeContext.getLoggingContext());
return sparkRuntimeContext;
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class SparkExecutionServiceTest method testCompletion.
@Test
public void testCompletion() throws Exception {
ProgramRunId programRunId = new ProgramRunId("ns", "app", ProgramType.SPARK, "test", RunIds.generate().getId());
// Start a service that no token is supported
SparkExecutionService service = new SparkExecutionService(locationFactory, InetAddress.getLoopbackAddress().getCanonicalHostName(), programRunId, null);
service.startAndWait();
try {
SparkExecutionClient client = new SparkExecutionClient(service.getBaseURI(), programRunId);
// Heartbeats multiple times.
for (int i = 0; i < 5; i++) {
Assert.assertNull(client.heartbeat(null));
TimeUnit.MILLISECONDS.sleep(50);
}
// Call complete to notify the service it has been stopped
client.completed(null);
} finally {
service.stopAndWait();
}
}
use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class WorkflowHttpHandler method getWorkflowNodeStates.
@GET
@Path("/apps/{app-id}/workflows/{workflow-id}/runs/{run-id}/nodes/state")
public void getWorkflowNodeStates(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("app-id") String applicationId, @PathParam("workflow-id") String workflowId, @PathParam("run-id") String runId) throws NotFoundException {
ApplicationId appId = Ids.namespace(namespaceId).app(applicationId);
ApplicationSpecification appSpec = store.getApplication(appId);
if (appSpec == null) {
throw new ApplicationNotFoundException(appId);
}
ProgramId workflowProgramId = appId.workflow(workflowId);
WorkflowSpecification workflowSpec = appSpec.getWorkflows().get(workflowProgramId.getProgram());
if (workflowSpec == null) {
throw new ProgramNotFoundException(workflowProgramId);
}
ProgramRunId workflowRunId = workflowProgramId.run(runId);
if (store.getRun(workflowProgramId, runId) == null) {
throw new NotFoundException(workflowRunId);
}
List<WorkflowNodeStateDetail> nodeStateDetails = store.getWorkflowNodeStates(workflowRunId);
Map<String, WorkflowNodeStateDetail> nodeStates = new HashMap<>();
for (WorkflowNodeStateDetail nodeStateDetail : nodeStateDetails) {
nodeStates.put(nodeStateDetail.getNodeId(), nodeStateDetail);
}
responder.sendJson(HttpResponseStatus.OK, nodeStates, STRING_TO_NODESTATEDETAIL_MAP_TYPE);
}
use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class FlowletProgramRunner method run.
@SuppressWarnings("unchecked")
@Override
public ProgramController run(Program program, ProgramOptions options) {
BasicFlowletContext flowletContext = null;
try {
// Extract and verify parameters
String flowletName = options.getName();
int instanceId = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCE_ID, "-1"));
Preconditions.checkArgument(instanceId >= 0, "Missing instance Id");
int instanceCount = Integer.parseInt(options.getArguments().getOption(ProgramOptionConstants.INSTANCES, "0"));
Preconditions.checkArgument(instanceCount > 0, "Invalid or missing instance count");
RunId runId = ProgramRunners.getRunId(options);
ApplicationSpecification appSpec = program.getApplicationSpecification();
Preconditions.checkNotNull(appSpec, "Missing application specification.");
ProgramType processorType = program.getType();
Preconditions.checkNotNull(processorType, "Missing processor type.");
Preconditions.checkArgument(processorType == ProgramType.FLOW, "Only FLOW process type is supported.");
String processorName = program.getName();
Preconditions.checkNotNull(processorName, "Missing processor name.");
FlowSpecification flowSpec = appSpec.getFlows().get(processorName);
FlowletDefinition flowletDef = flowSpec.getFlowlets().get(flowletName);
Preconditions.checkNotNull(flowletDef, "Definition missing for flowlet \"%s\"", flowletName);
Class<?> clz = Class.forName(flowletDef.getFlowletSpec().getClassName(), true, program.getClassLoader());
Preconditions.checkArgument(Flowlet.class.isAssignableFrom(clz), "%s is not a Flowlet.", clz);
// Setup dataset framework context, if required
ProgramId programId = program.getId();
FlowletId flowletId = programId.flowlet(flowletName);
ProgramRunId run = programId.run(runId);
ProgramContext programContext = new BasicProgramContext(run, flowletId);
if (dsFramework instanceof ProgramContextAware) {
((ProgramContextAware) dsFramework).setContext(programContext);
}
Class<? extends Flowlet> flowletClass = (Class<? extends Flowlet>) clz;
// Creates flowlet context
flowletContext = new BasicFlowletContext(program, options, flowletId, instanceId, instanceCount, flowletDef.getDatasets(), flowletDef.getFlowletSpec(), metricsCollectionService, discoveryServiceClient, txClient, dsFramework, secureStore, secureStoreManager, messageService, cConf);
// Creates tx related objects
DataFabricFacade dataFabricFacade = dataFabricFacadeFactory.create(program, flowletContext.getDatasetCache());
if (dataFabricFacade instanceof ProgramContextAware) {
((ProgramContextAware) dataFabricFacade).setContext(programContext);
}
// Creates QueueSpecification
Table<Node, String, Set<QueueSpecification>> queueSpecs = new SimpleQueueSpecificationGenerator(new ApplicationId(program.getNamespaceId(), program.getApplicationId())).create(flowSpec);
Flowlet flowlet = new InstantiatorFactory(false).get(TypeToken.of(flowletClass)).create();
TypeToken<? extends Flowlet> flowletType = TypeToken.of(flowletClass);
// Set the context classloader to the cdap classloader. It is needed for the DatumWriterFactory be able
// to load cdap classes
Thread.currentThread().setContextClassLoader(FlowletProgramRunner.class.getClassLoader());
// Inject DataSet, OutputEmitter, Metric fields
ImmutableList.Builder<ProducerSupplier> queueProducerSupplierBuilder = ImmutableList.builder();
Reflections.visit(flowlet, flowlet.getClass(), new PropertyFieldSetter(flowletDef.getFlowletSpec().getProperties()), new DataSetFieldSetter(flowletContext), new MetricsFieldSetter(flowletContext.getMetrics()), new OutputEmitterFieldSetter(outputEmitterFactory(flowletContext, flowletName, dataFabricFacade, queueProducerSupplierBuilder, queueSpecs)));
ImmutableList.Builder<ConsumerSupplier<?>> queueConsumerSupplierBuilder = ImmutableList.builder();
Collection<ProcessSpecification<?>> processSpecs = createProcessSpecification(flowletContext, flowletType, processMethodFactory(flowlet), processSpecificationFactory(flowletContext, dataFabricFacade, queueReaderFactory, flowletName, queueSpecs, queueConsumerSupplierBuilder, createSchemaCache(program)), Lists.<ProcessSpecification<?>>newLinkedList());
List<ConsumerSupplier<?>> consumerSuppliers = queueConsumerSupplierBuilder.build();
// Create the flowlet driver
AtomicReference<FlowletProgramController> controllerRef = new AtomicReference<>();
Service serviceHook = createServiceHook(flowletName, consumerSuppliers, controllerRef);
FlowletRuntimeService driver = new FlowletRuntimeService(flowlet, flowletContext, processSpecs, createCallback(flowlet, flowletDef.getFlowletSpec()), dataFabricFacade, serviceHook);
FlowletProgramController controller = new FlowletProgramController(program.getId(), flowletName, flowletContext, driver, queueProducerSupplierBuilder.build(), consumerSuppliers);
controllerRef.set(controller);
LOG.info("Starting flowlet: {}", flowletContext);
driver.start();
LOG.info("Flowlet started: {}", flowletContext);
return controller;
} catch (Exception e) {
// of the flowlet context.
if (flowletContext != null) {
flowletContext.close();
}
throw Throwables.propagate(e);
}
}
use of co.cask.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class BasicLineageWriterTest method testWrites.
@Test
public void testWrites() throws Exception {
Injector injector = getInjector();
MetadataStore metadataStore = injector.getInstance(MetadataStore.class);
LineageStore lineageStore = injector.getInstance(LineageStore.class);
LineageWriter lineageWriter = new BasicLineageWriter(lineageStore);
// Define entities
ProgramId program = new ProgramId(NamespaceId.DEFAULT.getNamespace(), "app", ProgramType.FLOW, "flow");
StreamId stream = new StreamId(NamespaceId.DEFAULT.getNamespace(), "stream");
ProgramRunId run1 = new ProgramRunId(program.getNamespace(), program.getApplication(), program.getType(), program.getEntityName(), RunIds.generate(10000).getId());
ProgramRunId run2 = new ProgramRunId(program.getNamespace(), program.getApplication(), program.getType(), program.getEntityName(), RunIds.generate(20000).getId());
// Tag stream
metadataStore.addTags(MetadataScope.USER, stream, "stag1", "stag2");
// Write access for run1
lineageWriter.addAccess(run1, stream, AccessType.READ);
Assert.assertEquals(ImmutableSet.of(program, stream), lineageStore.getEntitiesForRun(run1));
// Record time to verify duplicate writes.
long beforeSecondTag = System.currentTimeMillis();
// Wait for next millisecond, since access time is stored in milliseconds.
TimeUnit.MILLISECONDS.sleep(1);
// Add another tag to stream
metadataStore.addTags(MetadataScope.USER, stream, "stag3");
// Write access for run1 again
lineageWriter.addAccess(run1, stream, AccessType.READ);
// The write should be no-op, and access time for run1 should not be updated
Assert.assertTrue(lineageStore.getAccessTimesForRun(run1).get(0) < beforeSecondTag);
// However, you can write access for another run
lineageWriter.addAccess(run2, stream, AccessType.READ);
// Assert new access time is written
Assert.assertTrue(lineageStore.getAccessTimesForRun(run2).get(0) >= beforeSecondTag);
}
Aggregations