Search in sources :

Example 1 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class WorkflowStatsSLAHttpHandlerTest method testCompare.

@Test
public void testCompare() throws Exception {
    deploy(WorkflowApp.class);
    String workflowName = "FunWorkflow";
    String mapreduceName = "ClassicWordCount";
    String sparkName = "SparkWorkflowTest";
    WorkflowId workflowProgram = WORKFLOW_APP.workflow(workflowName);
    ProgramId mapreduceProgram = WORKFLOW_APP.mr(mapreduceName);
    ProgramId sparkProgram = WORKFLOW_APP.spark(sparkName);
    List<RunId> workflowRunIdList = setupRuns(workflowProgram, mapreduceProgram, sparkProgram, store, 2);
    RunId workflowRun1 = workflowRunIdList.get(0);
    RunId workflowRun2 = workflowRunIdList.get(1);
    String request = String.format("%s/namespaces/%s/apps/%s/workflows/%s/runs/%s/compare?other-run-id=%s", Constants.Gateway.API_VERSION_3, Id.Namespace.DEFAULT.getId(), WorkflowApp.class.getSimpleName(), workflowProgram.getProgram(), workflowRun1.getId(), workflowRun2.getId());
    HttpResponse response = doGet(request);
    Collection<WorkflowStatsComparison.ProgramNodes> workflowStatistics = readResponse(response, new TypeToken<Collection<WorkflowStatsComparison.ProgramNodes>>() {
    }.getType());
    Assert.assertNotNull(workflowStatistics.iterator().next());
    Assert.assertEquals(2, workflowStatistics.size());
    for (WorkflowStatsComparison.ProgramNodes node : workflowStatistics) {
        if (node.getProgramType() == ProgramType.MAPREDUCE) {
            Assert.assertEquals(38L, (long) node.getWorkflowProgramDetailsList().get(0).getMetrics().get(TaskCounter.MAP_INPUT_RECORDS.name()));
        }
    }
}
Also used : WorkflowApp(co.cask.cdap.WorkflowApp) WorkflowStatsComparison(co.cask.cdap.proto.WorkflowStatsComparison) TypeToken(com.google.gson.reflect.TypeToken) HttpResponse(org.apache.http.HttpResponse) WorkflowId(co.cask.cdap.proto.id.WorkflowId) ProgramId(co.cask.cdap.proto.id.ProgramId) RunId(org.apache.twill.api.RunId) Test(org.junit.Test)

Example 2 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class MapReduceProgramRunner method run.

@Override
public ProgramController run(final Program program, ProgramOptions options) {
    // Extract and verify parameters
    ApplicationSpecification appSpec = program.getApplicationSpecification();
    Preconditions.checkNotNull(appSpec, "Missing application specification.");
    ProgramType processorType = program.getType();
    Preconditions.checkNotNull(processorType, "Missing processor type.");
    Preconditions.checkArgument(processorType == ProgramType.MAPREDUCE, "Only MAPREDUCE process type is supported.");
    MapReduceSpecification spec = appSpec.getMapReduce().get(program.getName());
    Preconditions.checkNotNull(spec, "Missing MapReduceSpecification for %s", program.getName());
    Arguments arguments = options.getArguments();
    RunId runId = ProgramRunners.getRunId(options);
    WorkflowProgramInfo workflowInfo = WorkflowProgramInfo.create(arguments);
    DatasetFramework programDatasetFramework = workflowInfo == null ? datasetFramework : NameMappedDatasetFramework.createFromWorkflowProgramInfo(datasetFramework, workflowInfo, appSpec);
    // Setup dataset framework context, if required
    if (programDatasetFramework instanceof ProgramContextAware) {
        ProgramId programId = program.getId();
        ((ProgramContextAware) programDatasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
    }
    MapReduce mapReduce;
    try {
        mapReduce = new InstantiatorFactory(false).get(TypeToken.of(program.<MapReduce>getMainClass())).create();
    } catch (Exception e) {
        LOG.error("Failed to instantiate MapReduce class for {}", spec.getClassName(), e);
        throw Throwables.propagate(e);
    }
    // List of all Closeable resources that needs to be cleanup
    List<Closeable> closeables = new ArrayList<>();
    try {
        PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
        if (pluginInstantiator != null) {
            closeables.add(pluginInstantiator);
        }
        final BasicMapReduceContext context = new BasicMapReduceContext(program, options, cConf, spec, workflowInfo, discoveryServiceClient, metricsCollectionService, txSystemClient, programDatasetFramework, streamAdmin, getPluginArchive(options), pluginInstantiator, secureStore, secureStoreManager, messagingService);
        Reflections.visit(mapReduce, mapReduce.getClass(), new PropertyFieldSetter(context.getSpecification().getProperties()), new MetricsFieldSetter(context.getMetrics()), new DataSetFieldSetter(context));
        // note: this sets logging context on the thread level
        LoggingContextAccessor.setLoggingContext(context.getLoggingContext());
        // Set the job queue to hConf if it is provided
        Configuration hConf = new Configuration(this.hConf);
        String schedulerQueue = options.getArguments().getOption(Constants.AppFabric.APP_SCHEDULER_QUEUE);
        if (schedulerQueue != null && !schedulerQueue.isEmpty()) {
            hConf.set(JobContext.QUEUE_NAME, schedulerQueue);
        }
        Service mapReduceRuntimeService = new MapReduceRuntimeService(injector, cConf, hConf, mapReduce, spec, context, program.getJarLocation(), locationFactory, streamAdmin, txSystemClient, authorizationEnforcer, authenticationContext);
        mapReduceRuntimeService.addListener(createRuntimeServiceListener(program.getId(), runId, closeables, arguments, options.getUserArguments()), Threads.SAME_THREAD_EXECUTOR);
        final ProgramController controller = new MapReduceProgramController(mapReduceRuntimeService, context);
        LOG.debug("Starting MapReduce Job: {}", context);
        // be running the job, but the data directory will be owned by cdap.
        if (MapReduceTaskContextProvider.isLocal(hConf) || UserGroupInformation.isSecurityEnabled()) {
            mapReduceRuntimeService.start();
        } else {
            ProgramRunners.startAsUser(cConf.get(Constants.CFG_HDFS_USER), mapReduceRuntimeService);
        }
        return controller;
    } catch (Exception e) {
        closeAllQuietly(closeables);
        throw Throwables.propagate(e);
    }
}
Also used : ApplicationSpecification(co.cask.cdap.api.app.ApplicationSpecification) CConfiguration(co.cask.cdap.common.conf.CConfiguration) Configuration(org.apache.hadoop.conf.Configuration) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) MapReduce(co.cask.cdap.api.mapreduce.MapReduce) NameMappedDatasetFramework(co.cask.cdap.internal.app.runtime.workflow.NameMappedDatasetFramework) DatasetFramework(co.cask.cdap.data2.dataset2.DatasetFramework) InstantiatorFactory(co.cask.cdap.common.lang.InstantiatorFactory) MetricsFieldSetter(co.cask.cdap.internal.app.runtime.MetricsFieldSetter) ProgramType(co.cask.cdap.proto.ProgramType) RunId(org.apache.twill.api.RunId) ProgramController(co.cask.cdap.app.runtime.ProgramController) MapReduceSpecification(co.cask.cdap.api.mapreduce.MapReduceSpecification) Arguments(co.cask.cdap.app.runtime.Arguments) MessagingService(co.cask.cdap.messaging.MessagingService) MetricsCollectionService(co.cask.cdap.api.metrics.MetricsCollectionService) Service(com.google.common.util.concurrent.Service) ProgramId(co.cask.cdap.proto.id.ProgramId) BasicProgramContext(co.cask.cdap.internal.app.runtime.BasicProgramContext) DataSetFieldSetter(co.cask.cdap.internal.app.runtime.DataSetFieldSetter) PropertyFieldSetter(co.cask.cdap.common.lang.PropertyFieldSetter) WorkflowProgramInfo(co.cask.cdap.internal.app.runtime.workflow.WorkflowProgramInfo) PluginInstantiator(co.cask.cdap.internal.app.runtime.plugin.PluginInstantiator) ProgramContextAware(co.cask.cdap.data.ProgramContextAware)

Example 3 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class LineageAdmin method getMetadataForRun.

/**
   * @return metadata associated with a run
   */
public Set<MetadataRecord> getMetadataForRun(ProgramRunId run) throws NotFoundException {
    entityExistenceVerifier.ensureExists(run);
    Set<NamespacedEntityId> runEntities = new HashSet<>(lineageStoreReader.getEntitiesForRun(run));
    // No entities associated with the run, but run exists.
    if (runEntities.isEmpty()) {
        return ImmutableSet.of();
    }
    RunId runId = RunIds.fromString(run.getRun());
    // The entities returned by lineageStore does not contain application
    ApplicationId application = run.getParent().getParent();
    runEntities.add(application);
    return metadataStore.getSnapshotBeforeTime(MetadataScope.USER, runEntities, RunIds.getTime(runId, TimeUnit.MILLISECONDS));
}
Also used : NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Example 4 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class AbstractProgramRuntimeServiceTest method testUpdateDeadLock.

@Test(timeout = 5000L)
public void testUpdateDeadLock() {
    // This test is for testing (CDAP-3716)
    // Create a service to simulate an existing running app.
    Service service = new TestService();
    ProgramId programId = NamespaceId.DEFAULT.app("dummyApp").program(ProgramType.WORKER, "dummy");
    RunId runId = RunIds.generate();
    ProgramRuntimeService.RuntimeInfo extraInfo = createRuntimeInfo(service, programId, runId);
    service.startAndWait();
    ProgramRunnerFactory runnerFactory = createProgramRunnerFactory();
    TestProgramRuntimeService runtimeService = new TestProgramRuntimeService(CConfiguration.create(), runnerFactory, null, extraInfo);
    runtimeService.startAndWait();
    // The lookup will get deadlock for CDAP-3716
    Assert.assertNotNull(runtimeService.lookup(programId, runId));
    service.stopAndWait();
    runtimeService.stopAndWait();
}
Also used : AbstractExecutionThreadService(com.google.common.util.concurrent.AbstractExecutionThreadService) AbstractIdleService(com.google.common.util.concurrent.AbstractIdleService) Service(com.google.common.util.concurrent.Service) ProgramId(co.cask.cdap.proto.id.ProgramId) RunId(org.apache.twill.api.RunId) Test(org.junit.Test)

Example 5 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class LineageDatasetTest method testMultipleRelations.

@Test
public void testMultipleRelations() throws Exception {
    final LineageDataset lineageDataset = getLineageDataset("testMultipleRelations");
    Assert.assertNotNull(lineageDataset);
    TransactionExecutor txnl = dsFrameworkUtil.newInMemoryTransactionExecutor((TransactionAware) lineageDataset);
    final RunId runId1 = RunIds.generate(10000);
    final RunId runId2 = RunIds.generate(20000);
    final RunId runId3 = RunIds.generate(30000);
    final RunId runId4 = RunIds.generate(40000);
    final DatasetId datasetInstance1 = NamespaceId.DEFAULT.dataset("dataset1");
    final DatasetId datasetInstance2 = NamespaceId.DEFAULT.dataset("dataset2");
    final StreamId stream1 = NamespaceId.DEFAULT.stream("stream1");
    final StreamId stream2 = NamespaceId.DEFAULT.stream("stream2");
    final ProgramId program1 = NamespaceId.DEFAULT.app("app1").flow("flow1");
    final FlowletId flowlet1 = program1.flowlet("flowlet1");
    final ProgramId program2 = NamespaceId.DEFAULT.app("app2").worker("worker2");
    final ProgramId program3 = NamespaceId.DEFAULT.app("app3").service("service3");
    final ProgramRunId run11 = program1.run(runId1.getId());
    final ProgramRunId run22 = program2.run(runId2.getId());
    final ProgramRunId run23 = program2.run(runId3.getId());
    final ProgramRunId run34 = program3.run(runId4.getId());
    final long now = System.currentTimeMillis();
    final long run11Data1AccessTime = now;
    final long run22Data2AccessTime = now + 1;
    final long run22Stream1AccessTime = now + 2;
    final long run23Stream2AccessTime = now + 1;
    final long run23Data2AccessTime = now + 3;
    //noinspection UnnecessaryLocalVariable
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            lineageDataset.addAccess(run11, datasetInstance1, AccessType.READ, run11Data1AccessTime, flowlet1);
            lineageDataset.addAccess(run22, datasetInstance2, AccessType.WRITE, run22Data2AccessTime);
            lineageDataset.addAccess(run22, stream1, AccessType.READ, run22Stream1AccessTime);
            lineageDataset.addAccess(run23, stream2, AccessType.READ, run23Stream2AccessTime);
            lineageDataset.addAccess(run23, datasetInstance2, AccessType.WRITE, run23Data2AccessTime);
            lineageDataset.addAccess(run34, datasetInstance2, AccessType.READ_WRITE, System.currentTimeMillis());
            lineageDataset.addAccess(run34, stream2, AccessType.UNKNOWN, System.currentTimeMillis());
        }
    });
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Assert.assertEquals(ImmutableSet.of(new Relation(datasetInstance1, program1, AccessType.READ, runId1, ImmutableSet.of(flowlet1))), lineageDataset.getRelations(datasetInstance1, 0, 100000, Predicates.<Relation>alwaysTrue()));
            Assert.assertEquals(ImmutableSet.of(new Relation(datasetInstance2, program2, AccessType.WRITE, runId2), new Relation(datasetInstance2, program2, AccessType.WRITE, runId3), new Relation(datasetInstance2, program3, AccessType.READ_WRITE, runId4)), lineageDataset.getRelations(datasetInstance2, 0, 100000, Predicates.<Relation>alwaysTrue()));
            Assert.assertEquals(ImmutableSet.of(new Relation(stream1, program2, AccessType.READ, runId2)), lineageDataset.getRelations(stream1, 0, 100000, Predicates.<Relation>alwaysTrue()));
            Assert.assertEquals(ImmutableSet.of(new Relation(stream2, program2, AccessType.READ, runId3), new Relation(stream2, program3, AccessType.UNKNOWN, runId4)), lineageDataset.getRelations(stream2, 0, 100000, Predicates.<Relation>alwaysTrue()));
            Assert.assertEquals(ImmutableSet.of(new Relation(datasetInstance2, program2, AccessType.WRITE, runId2), new Relation(stream1, program2, AccessType.READ, runId2), new Relation(datasetInstance2, program2, AccessType.WRITE, runId3), new Relation(stream2, program2, AccessType.READ, runId3)), lineageDataset.getRelations(program2, 0, 100000, Predicates.<Relation>alwaysTrue()));
            // Reduced time range
            Assert.assertEquals(ImmutableSet.of(new Relation(datasetInstance2, program2, AccessType.WRITE, runId2), new Relation(datasetInstance2, program2, AccessType.WRITE, runId3)), lineageDataset.getRelations(datasetInstance2, 0, 35000, Predicates.<Relation>alwaysTrue()));
            Assert.assertEquals(toSet(program1, datasetInstance1), lineageDataset.getEntitiesForRun(run11));
            Assert.assertEquals(ImmutableList.of(run11Data1AccessTime), lineageDataset.getAccessTimesForRun(run11));
            Assert.assertEquals(toSet(program2, datasetInstance2, stream1), lineageDataset.getEntitiesForRun(run22));
            Assert.assertEquals(ImmutableList.of(run22Data2AccessTime, run22Stream1AccessTime), lineageDataset.getAccessTimesForRun(run22));
            Assert.assertEquals(toSet(program2, datasetInstance2, stream2), lineageDataset.getEntitiesForRun(run23));
            Assert.assertEquals(ImmutableList.of(run23Data2AccessTime, run23Stream2AccessTime), lineageDataset.getAccessTimesForRun(run23));
            Assert.assertEquals(toSet(program3, datasetInstance2, stream2), lineageDataset.getEntitiesForRun(run34));
        }
    });
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) TransactionExecutor(org.apache.tephra.TransactionExecutor) ProgramId(co.cask.cdap.proto.id.ProgramId) DatasetId(co.cask.cdap.proto.id.DatasetId) FlowletId(co.cask.cdap.proto.id.FlowletId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Test(org.junit.Test)

Aggregations

RunId (org.apache.twill.api.RunId)49 ProgramId (co.cask.cdap.proto.id.ProgramId)35 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)21 Test (org.junit.Test)19 ApplicationId (co.cask.cdap.proto.id.ApplicationId)13 ProgramType (co.cask.cdap.proto.ProgramType)12 ApplicationSpecification (co.cask.cdap.api.app.ApplicationSpecification)10 DatasetId (co.cask.cdap.proto.id.DatasetId)9 ProgramContextAware (co.cask.cdap.data.ProgramContextAware)6 BasicProgramContext (co.cask.cdap.internal.app.runtime.BasicProgramContext)6 NamespaceId (co.cask.cdap.proto.id.NamespaceId)6 Service (com.google.common.util.concurrent.Service)6 HashSet (java.util.HashSet)6 MetricsCollectionService (co.cask.cdap.api.metrics.MetricsCollectionService)5 ProgramController (co.cask.cdap.app.runtime.ProgramController)5 MessagingService (co.cask.cdap.messaging.MessagingService)5 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)5 StreamId (co.cask.cdap.proto.id.StreamId)5 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5