Search in sources :

Example 11 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class WorkflowProgramRunner method run.

@Override
public ProgramController run(final Program program, final ProgramOptions options) {
    // Extract and verify options
    ApplicationSpecification appSpec = program.getApplicationSpecification();
    Preconditions.checkNotNull(appSpec, "Missing application specification.");
    ProgramType processorType = program.getType();
    Preconditions.checkNotNull(processorType, "Missing processor type.");
    Preconditions.checkArgument(processorType == ProgramType.WORKFLOW, "Only WORKFLOW process type is supported.");
    WorkflowSpecification workflowSpec = appSpec.getWorkflows().get(program.getName());
    Preconditions.checkNotNull(workflowSpec, "Missing WorkflowSpecification for %s", program.getName());
    final RunId runId = ProgramRunners.getRunId(options);
    // Setup dataset framework context, if required
    if (datasetFramework instanceof ProgramContextAware) {
        ProgramId programId = program.getId();
        ((ProgramContextAware) datasetFramework).setContext(new BasicProgramContext(programId.run(runId)));
    }
    // List of all Closeable resources that needs to be cleanup
    final List<Closeable> closeables = new ArrayList<>();
    try {
        PluginInstantiator pluginInstantiator = createPluginInstantiator(options, program.getClassLoader());
        if (pluginInstantiator != null) {
            closeables.add(pluginInstantiator);
        }
        WorkflowDriver driver = new WorkflowDriver(program, options, hostname, workflowSpec, programRunnerFactory, metricsCollectionService, datasetFramework, discoveryServiceClient, txClient, runtimeStore, cConf, pluginInstantiator, secureStore, secureStoreManager, messagingService);
        // Controller needs to be created before starting the driver so that the state change of the driver
        // service can be fully captured by the controller.
        final ProgramController controller = new WorkflowProgramController(program, driver, serviceAnnouncer, runId);
        final String twillRunId = options.getArguments().getOption(ProgramOptionConstants.TWILL_RUN_ID);
        controller.addListener(new AbstractListener() {

            @Override
            public void init(ProgramController.State state, @Nullable Throwable cause) {
                // Get start time from RunId
                long startTimeInSeconds = RunIds.getTime(controller.getRunId(), TimeUnit.SECONDS);
                if (startTimeInSeconds == -1) {
                    // If RunId is not time-based, use current time as start time
                    startTimeInSeconds = TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis());
                }
                final long finalStartTimeInSeconds = startTimeInSeconds;
                Retries.supplyWithRetries(new Supplier<Void>() {

                    @Override
                    public Void get() {
                        runtimeStore.setStart(program.getId(), runId.getId(), finalStartTimeInSeconds, twillRunId, options.getUserArguments().asMap(), options.getArguments().asMap());
                        return null;
                    }
                }, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
                // This can happen if there is a delay in calling the init listener
                if (state == ProgramController.State.COMPLETED) {
                    completed();
                }
                // This can happen if there is a delay in calling the init listener
                if (state == ProgramController.State.ERROR) {
                    error(controller.getFailureCause());
                }
            }

            @Override
            public void completed() {
                LOG.debug("Program {} with run id {} completed successfully.", program.getId(), runId.getId());
                Retries.supplyWithRetries(new Supplier<Void>() {

                    @Override
                    public Void get() {
                        runtimeStore.setStop(program.getId(), runId.getId(), TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()), ProgramController.State.COMPLETED.getRunStatus());
                        return null;
                    }
                }, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
            }

            @Override
            public void killed() {
                LOG.debug("Program {} with run id {} killed.", program.getId(), runId.getId());
                Retries.supplyWithRetries(new Supplier<Void>() {

                    @Override
                    public Void get() {
                        runtimeStore.setStop(program.getId(), runId.getId(), TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()), ProgramController.State.KILLED.getRunStatus());
                        return null;
                    }
                }, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
            }

            @Override
            public void suspended() {
                LOG.debug("Suspending Program {} with run id {}.", program.getId(), runId.getId());
                Retries.supplyWithRetries(new Supplier<Void>() {

                    @Override
                    public Void get() {
                        runtimeStore.setSuspend(program.getId(), runId.getId());
                        return null;
                    }
                }, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
            }

            @Override
            public void resuming() {
                LOG.debug("Resuming Program {} {}.", program.getId(), runId.getId());
                Retries.supplyWithRetries(new Supplier<Void>() {

                    @Override
                    public Void get() {
                        runtimeStore.setResume(program.getId(), runId.getId());
                        return null;
                    }
                }, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
            }

            @Override
            public void error(final Throwable cause) {
                LOG.info("Program {} with run id {} stopped because of error {}.", program.getId(), runId.getId(), cause);
                closeAllQuietly(closeables);
                Retries.supplyWithRetries(new Supplier<Void>() {

                    @Override
                    public Void get() {
                        runtimeStore.setStop(program.getId(), runId.getId(), TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis()), ProgramController.State.ERROR.getRunStatus(), new BasicThrowable(cause));
                        return null;
                    }
                }, RetryStrategies.fixDelay(Constants.Retry.RUN_RECORD_UPDATE_RETRY_DELAY_SECS, TimeUnit.SECONDS));
            }
        }, Threads.SAME_THREAD_EXECUTOR);
        driver.start();
        return controller;
    } catch (Exception e) {
        closeAllQuietly(closeables);
        throw Throwables.propagate(e);
    }
}
Also used : ApplicationSpecification(co.cask.cdap.api.app.ApplicationSpecification) ProgramController(co.cask.cdap.app.runtime.ProgramController) Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) ProgramId(co.cask.cdap.proto.id.ProgramId) BasicProgramContext(co.cask.cdap.internal.app.runtime.BasicProgramContext) WorkflowSpecification(co.cask.cdap.api.workflow.WorkflowSpecification) BasicThrowable(co.cask.cdap.proto.BasicThrowable) PluginInstantiator(co.cask.cdap.internal.app.runtime.plugin.PluginInstantiator) AbstractListener(co.cask.cdap.internal.app.runtime.AbstractListener) Supplier(com.google.common.base.Supplier) ProgramType(co.cask.cdap.proto.ProgramType) RunId(org.apache.twill.api.RunId) BasicThrowable(co.cask.cdap.proto.BasicThrowable) ProgramContextAware(co.cask.cdap.data.ProgramContextAware)

Example 12 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class WebappProgramRunner method run.

@Override
public ProgramController run(Program program, ProgramOptions options) {
    try {
        ProgramType processorType = program.getType();
        Preconditions.checkNotNull(processorType, "Missing processor type");
        Preconditions.checkArgument(processorType == ProgramType.WEBAPP, "Only WEBAPP process type is supported");
        LOG.info("Initializing Webapp for app {} with jar {}", program.getApplicationId(), program.getJarLocation().getName());
        String serviceName = getServiceName(program.getId());
        Preconditions.checkNotNull(serviceName, "Cannot determine service name for program %s", program.getName());
        LOG.info("Got service name {}", serviceName);
        // Start netty server
        // TODO: add metrics reporting
        JarHttpHandler jarHttpHandler = webappHttpHandlerFactory.createHandler(program.getJarLocation());
        NettyHttpService.Builder builder = new CommonNettyHttpServiceBuilder(cConf, program.getId().toString());
        builder.addHttpHandlers(ImmutableSet.of(jarHttpHandler));
        builder.setUrlRewriter(new WebappURLRewriter(jarHttpHandler));
        builder.setHost(hostname.getCanonicalHostName());
        NettyHttpService httpService = builder.build();
        httpService.startAndWait();
        final InetSocketAddress address = httpService.getBindAddress();
        RunId runId = ProgramRunners.getRunId(options);
        // Register service, and the serving host names.
        final List<Cancellable> cancellables = Lists.newArrayList();
        LOG.info("Webapp {} running on address {} registering as {}", program.getApplicationId(), address, serviceName);
        cancellables.add(serviceAnnouncer.announce(serviceName, address.getPort()));
        for (String hname : getServingHostNames(Locations.newInputSupplier(program.getJarLocation()))) {
            final String sname = ProgramType.WEBAPP.name().toLowerCase() + "/" + hname;
            LOG.info("Webapp {} running on address {} registering as {}", program.getApplicationId(), address, sname);
            cancellables.add(discoveryService.register(ResolvingDiscoverable.of(new Discoverable(sname, address))));
        }
        return new WebappProgramController(program.getId(), runId, httpService, new Cancellable() {

            @Override
            public void cancel() {
                for (Cancellable cancellable : cancellables) {
                    cancellable.cancel();
                }
            }
        });
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : Discoverable(org.apache.twill.discovery.Discoverable) ResolvingDiscoverable(co.cask.cdap.common.discovery.ResolvingDiscoverable) CommonNettyHttpServiceBuilder(co.cask.cdap.common.http.CommonNettyHttpServiceBuilder) InetSocketAddress(java.net.InetSocketAddress) Cancellable(org.apache.twill.common.Cancellable) NettyHttpService(co.cask.http.NettyHttpService) ProgramType(co.cask.cdap.proto.ProgramType) RunId(org.apache.twill.api.RunId)

Example 13 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class ProgramLifecycleService method validateAndCorrectRunningRunRecords.

/**
   * Fix all the possible inconsistent states for RunRecords that shows it is in RUNNING state but actually not
   * via check to {@link ProgramRuntimeService} for a type of CDAP program.
   *
   * @param programType The type of program the run records need to validate and update.
   * @param processedInvalidRunRecordIds the {@link Set} of processed invalid run record ids.
   */
@VisibleForTesting
void validateAndCorrectRunningRunRecords(final ProgramType programType, final Set<String> processedInvalidRunRecordIds) {
    final Map<RunId, RuntimeInfo> runIdToRuntimeInfo = runtimeService.list(programType);
    LOG.trace("Start getting run records not actually running ...");
    Collection<RunRecordMeta> notActuallyRunning = store.getRuns(ProgramRunStatus.RUNNING, new com.google.common.base.Predicate<RunRecordMeta>() {

        @Override
        public boolean apply(RunRecordMeta input) {
            String runId = input.getPid();
            // Check if it is not actually running.
            return !runIdToRuntimeInfo.containsKey(RunIds.fromString(runId));
        }
    }).values();
    LOG.trace("End getting {} run records not actually running.", notActuallyRunning.size());
    final Map<String, ProgramId> runIdToProgramId = new HashMap<>();
    LOG.trace("Start getting invalid run records  ...");
    Collection<RunRecordMeta> invalidRunRecords = Collections2.filter(notActuallyRunning, new com.google.common.base.Predicate<RunRecordMeta>() {

        @Override
        public boolean apply(RunRecordMeta input) {
            String runId = input.getPid();
            // check for program Id for the run record, if null then it is invalid program type.
            ProgramId targetProgramId = retrieveProgramIdForRunRecord(programType, runId);
            // Check if run id is for the right program type
            if (targetProgramId != null) {
                runIdToProgramId.put(runId, targetProgramId);
                return true;
            } else {
                return false;
            }
        }
    });
    // don't correct run records for programs running inside a workflow
    // for instance, a MapReduce running in a Workflow will not be contained in the runtime info in this class
    invalidRunRecords = Collections2.filter(invalidRunRecords, new com.google.common.base.Predicate<RunRecordMeta>() {

        @Override
        public boolean apply(RunRecordMeta invalidRunRecordMeta) {
            boolean shouldCorrect = shouldCorrectForWorkflowChildren(invalidRunRecordMeta, processedInvalidRunRecordIds);
            if (!shouldCorrect) {
                LOG.trace("Will not correct invalid run record {} since it's parent workflow still running.", invalidRunRecordMeta);
                return false;
            }
            return true;
        }
    });
    LOG.trace("End getting invalid run records.");
    if (!invalidRunRecords.isEmpty()) {
        LOG.warn("Found {} RunRecords with RUNNING status and the program not actually running for program type {}", invalidRunRecords.size(), programType.getPrettyName());
    } else {
        LOG.trace("No RunRecords found with RUNNING status and the program not actually running for program type {}", programType.getPrettyName());
    }
    // Now lets correct the invalid RunRecords
    for (RunRecordMeta invalidRunRecordMeta : invalidRunRecords) {
        String runId = invalidRunRecordMeta.getPid();
        ProgramId targetProgramId = runIdToProgramId.get(runId);
        boolean updated = store.compareAndSetStatus(targetProgramId, runId, ProgramController.State.ALIVE.getRunStatus(), ProgramController.State.ERROR.getRunStatus());
        if (updated) {
            LOG.warn("Fixed RunRecord {} for program {} with RUNNING status because the program was not " + "actually running", runId, targetProgramId);
            processedInvalidRunRecordIds.add(runId);
        }
    }
}
Also used : RuntimeInfo(co.cask.cdap.app.runtime.ProgramRuntimeService.RuntimeInfo) HashMap(java.util.HashMap) RunRecordMeta(co.cask.cdap.internal.app.store.RunRecordMeta) ProgramId(co.cask.cdap.proto.id.ProgramId) Predicate(co.cask.cdap.api.Predicate) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 14 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class AppMetadataStoreTest method testScanRunningInRangeWithBatch.

@Test
public void testScanRunningInRangeWithBatch() throws Exception {
    DatasetId storeTable = NamespaceId.DEFAULT.dataset("testScanRunningInRange");
    datasetFramework.addInstance(Table.class.getName(), storeTable, DatasetProperties.EMPTY);
    Table table = datasetFramework.getDataset(storeTable, ImmutableMap.<String, String>of(), null);
    Assert.assertNotNull(table);
    final AppMetadataStore metadataStoreDataset = new AppMetadataStore(table, cConf, new AtomicBoolean(false));
    TransactionExecutor txnl = txExecutorFactory.createExecutor(Collections.singleton((TransactionAware) metadataStoreDataset));
    // Add some run records
    TreeSet<Long> expected = new TreeSet<>();
    for (int i = 0; i < 100; ++i) {
        ApplicationId application = NamespaceId.DEFAULT.app("app" + i);
        final ProgramId program = application.program(ProgramType.values()[i % ProgramType.values().length], "program" + i);
        final RunId runId = RunIds.generate((i + 1) * 10000);
        expected.add(RunIds.getTime(runId, TimeUnit.MILLISECONDS));
        // Start the program and stop it
        final int j = i;
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                metadataStoreDataset.recordProgramStart(program, runId.getId(), RunIds.getTime(runId, TimeUnit.SECONDS), null, null, null);
                metadataStoreDataset.recordProgramStop(program, runId.getId(), RunIds.getTime(runId, TimeUnit.SECONDS), ProgramRunStatus.values()[j % ProgramRunStatus.values().length], null);
            }
        });
    }
    // Run full scan
    runScan(txnl, metadataStoreDataset, expected, 0, Long.MAX_VALUE);
    // In all below assertions, TreeSet and metadataStore both have start time inclusive and end time exclusive.
    // Run the scan with time limit
    runScan(txnl, metadataStoreDataset, expected.subSet(30 * 10000L, 90 * 10000L), TimeUnit.MILLISECONDS.toSeconds(30 * 10000), TimeUnit.MILLISECONDS.toSeconds(90 * 10000));
    runScan(txnl, metadataStoreDataset, expected.subSet(90 * 10000L, 101 * 10000L), TimeUnit.MILLISECONDS.toSeconds(90 * 10000), TimeUnit.MILLISECONDS.toSeconds(101 * 10000));
    // After range
    runScan(txnl, metadataStoreDataset, expected.subSet(101 * 10000L, 200 * 10000L), TimeUnit.MILLISECONDS.toSeconds(101 * 10000), TimeUnit.MILLISECONDS.toSeconds(200 * 10000));
    // Identical start and end time
    runScan(txnl, metadataStoreDataset, expected.subSet(31 * 10000L, 31 * 10000L), TimeUnit.MILLISECONDS.toSeconds(31 * 10000), TimeUnit.MILLISECONDS.toSeconds(31 * 10000));
    // One unit difference between start and end time
    runScan(txnl, metadataStoreDataset, expected.subSet(30 * 10000L, 31 * 10000L), TimeUnit.MILLISECONDS.toSeconds(30 * 10000), TimeUnit.MILLISECONDS.toSeconds(31 * 10000));
    // Before range
    runScan(txnl, metadataStoreDataset, expected.subSet(1000L, 10000L), TimeUnit.MILLISECONDS.toSeconds(1000), TimeUnit.MILLISECONDS.toSeconds(10000));
}
Also used : Table(co.cask.cdap.api.dataset.table.Table) TransactionExecutor(org.apache.tephra.TransactionExecutor) ProgramId(co.cask.cdap.proto.id.ProgramId) TransactionFailureException(org.apache.tephra.TransactionFailureException) DatasetId(co.cask.cdap.proto.id.DatasetId) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TransactionAware(org.apache.tephra.TransactionAware) TreeSet(java.util.TreeSet) ApplicationId(co.cask.cdap.proto.id.ApplicationId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Test(org.junit.Test)

Example 15 with RunId

use of org.apache.twill.api.RunId in project cdap by caskdata.

the class AppMetadataStoreTest method testgetRuns.

@Test
public void testgetRuns() throws Exception {
    DatasetId storeTable = NamespaceId.DEFAULT.dataset("testgetRuns");
    datasetFramework.addInstance(Table.class.getName(), storeTable, DatasetProperties.EMPTY);
    Table table = datasetFramework.getDataset(storeTable, ImmutableMap.<String, String>of(), null);
    Assert.assertNotNull(table);
    final AppMetadataStore metadataStoreDataset = new AppMetadataStore(table, cConf, new AtomicBoolean(false));
    TransactionExecutor txnl = txExecutorFactory.createExecutor(Collections.singleton((TransactionAware) metadataStoreDataset));
    // Add some run records
    final Set<String> expected = new TreeSet<>();
    final Set<String> expectedHalf = new TreeSet<>();
    final Set<ProgramRunId> programRunIdSet = new HashSet<>();
    final Set<ProgramRunId> programRunIdSetHalf = new HashSet<>();
    for (int i = 0; i < 100; ++i) {
        ApplicationId application = NamespaceId.DEFAULT.app("app");
        final ProgramId program = application.program(ProgramType.FLOW, "program");
        final RunId runId = RunIds.generate((i + 1) * 10000);
        expected.add(runId.toString());
        final int index = i;
        // Add every other runId
        if ((i % 2) == 0) {
            expectedHalf.add(runId.toString());
        }
        ProgramRunId programRunId = new ProgramRunId(program.getNamespace(), program.getApplication(), program.getType(), program.getProgram(), runId.toString());
        programRunIdSet.add(programRunId);
        //Add every other programRunId
        if ((i % 2) == 0) {
            programRunIdSetHalf.add(programRunId);
        }
        txnl.execute(new TransactionExecutor.Subroutine() {

            @Override
            public void apply() throws Exception {
                // Start the program and stop it
                metadataStoreDataset.recordProgramStart(program, runId.getId(), RunIds.getTime(runId, TimeUnit.SECONDS), null, null, null);
                metadataStoreDataset.recordProgramStop(program, runId.getId(), RunIds.getTime(runId, TimeUnit.SECONDS), ProgramRunStatus.values()[index % ProgramRunStatus.values().length], null);
            }
        });
    }
    txnl.execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() throws Exception {
            Map<ProgramRunId, RunRecordMeta> runMap = metadataStoreDataset.getRuns(programRunIdSet);
            Set<String> actual = new TreeSet<>();
            for (Map.Entry<ProgramRunId, RunRecordMeta> entry : runMap.entrySet()) {
                actual.add(entry.getValue().getPid());
            }
            Assert.assertEquals(expected, actual);
            Map<ProgramRunId, RunRecordMeta> runMapHalf = metadataStoreDataset.getRuns(programRunIdSetHalf);
            Set<String> actualHalf = new TreeSet<>();
            for (Map.Entry<ProgramRunId, RunRecordMeta> entry : runMapHalf.entrySet()) {
                actualHalf.add(entry.getValue().getPid());
            }
            Assert.assertEquals(expectedHalf, actualHalf);
        }
    });
}
Also used : Table(co.cask.cdap.api.dataset.table.Table) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) Set(java.util.Set) TransactionExecutor(org.apache.tephra.TransactionExecutor) ProgramId(co.cask.cdap.proto.id.ProgramId) TransactionFailureException(org.apache.tephra.TransactionFailureException) DatasetId(co.cask.cdap.proto.id.DatasetId) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TransactionAware(org.apache.tephra.TransactionAware) TreeSet(java.util.TreeSet) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

RunId (org.apache.twill.api.RunId)49 ProgramId (co.cask.cdap.proto.id.ProgramId)35 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)21 Test (org.junit.Test)19 ApplicationId (co.cask.cdap.proto.id.ApplicationId)13 ProgramType (co.cask.cdap.proto.ProgramType)12 ApplicationSpecification (co.cask.cdap.api.app.ApplicationSpecification)10 DatasetId (co.cask.cdap.proto.id.DatasetId)9 ProgramContextAware (co.cask.cdap.data.ProgramContextAware)6 BasicProgramContext (co.cask.cdap.internal.app.runtime.BasicProgramContext)6 NamespaceId (co.cask.cdap.proto.id.NamespaceId)6 Service (com.google.common.util.concurrent.Service)6 HashSet (java.util.HashSet)6 MetricsCollectionService (co.cask.cdap.api.metrics.MetricsCollectionService)5 ProgramController (co.cask.cdap.app.runtime.ProgramController)5 MessagingService (co.cask.cdap.messaging.MessagingService)5 NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)5 StreamId (co.cask.cdap.proto.id.StreamId)5 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5