Search in sources :

Example 1 with NoOpProgramStateWriter

use of co.cask.cdap.app.runtime.NoOpProgramStateWriter in project cdap by caskdata.

the class RunRecordCorrectorServiceTest method testFixProgram.

@Test
public void testFixProgram() throws Exception {
    final AtomicInteger sourceId = new AtomicInteger(0);
    // Write 10 services with starting state
    // Write 10 workers with running state
    Map<ProgramRunId, ProgramRunStatus> expectedStates = new HashMap<>();
    for (int i = 0; i < 10; i++) {
        ProgramRunId serviceId = NamespaceId.DEFAULT.app("test").service("service" + i).run(RunIds.generate());
        store.setProvisioning(serviceId, RunIds.getTime(serviceId.getRun(), TimeUnit.SECONDS), Collections.emptyMap(), Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
        store.setProvisioned(serviceId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
        store.setStart(serviceId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
        expectedStates.put(serviceId, ProgramRunStatus.FAILED);
        ProgramRunId workerId = new NamespaceId("ns").app("test").service("worker" + i).run(RunIds.generate());
        store.setProvisioning(workerId, RunIds.getTime(serviceId.getRun(), TimeUnit.SECONDS), Collections.emptyMap(), Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
        store.setProvisioned(workerId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
        store.setStart(workerId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
        store.setRunning(workerId, System.currentTimeMillis(), null, Bytes.toBytes(sourceId.getAndIncrement()));
        expectedStates.put(workerId, ProgramRunStatus.FAILED);
    }
    // Write a flow with suspend state
    ProgramRunId flowId = new NamespaceId("ns").app("test").service("flow").run(RunIds.generate());
    store.setProvisioning(flowId, RunIds.getTime(flowId.getRun(), TimeUnit.SECONDS), Collections.emptyMap(), Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
    store.setProvisioned(flowId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setStart(flowId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
    store.setRunning(flowId, System.currentTimeMillis(), null, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setSuspend(flowId, Bytes.toBytes(sourceId.getAndIncrement()));
    expectedStates.put(flowId, ProgramRunStatus.SUSPENDED);
    // Write two MR in starting state. One with workflow information, one without.
    ProgramRunId mrId = NamespaceId.DEFAULT.app("app").mr("mr").run(RunIds.generate());
    store.setProvisioning(mrId, RunIds.getTime(mrId.getRun(), TimeUnit.SECONDS), Collections.emptyMap(), Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
    store.setProvisioned(mrId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setStart(mrId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
    expectedStates.put(mrId, ProgramRunStatus.FAILED);
    ProgramRunId workflowId = NamespaceId.DEFAULT.app("app").workflow("workflow").run(RunIds.generate());
    ProgramRunId mrInWorkflowId = workflowId.getParent().getParent().mr("mrInWorkflow").run(RunIds.generate());
    store.setProvisioning(mrInWorkflowId, RunIds.getTime(mrInWorkflowId.getRun(), TimeUnit.SECONDS), Collections.emptyMap(), ImmutableMap.of(ProgramOptionConstants.WORKFLOW_NAME, workflowId.getProgram(), ProgramOptionConstants.WORKFLOW_RUN_ID, workflowId.getRun(), ProgramOptionConstants.WORKFLOW_NODE_ID, "mr"), Bytes.toBytes(sourceId.getAndIncrement()));
    store.setProvisioned(mrInWorkflowId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setStart(mrInWorkflowId, null, ImmutableMap.of(ProgramOptionConstants.WORKFLOW_NAME, workflowId.getProgram(), ProgramOptionConstants.WORKFLOW_RUN_ID, workflowId.getRun(), ProgramOptionConstants.WORKFLOW_NODE_ID, "mr"), Bytes.toBytes(sourceId.getAndIncrement()));
    expectedStates.put(workflowId, ProgramRunStatus.STARTING);
    // Write the workflow in RUNNING state.
    store.setProvisioning(workflowId, RunIds.getTime(workflowId.getRun(), TimeUnit.SECONDS), Collections.emptyMap(), Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
    store.setProvisioned(workflowId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setStart(workflowId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
    store.setRunning(workflowId, System.currentTimeMillis(), null, Bytes.toBytes(sourceId.getAndIncrement()));
    expectedStates.put(workflowId, ProgramRunStatus.RUNNING);
    // Use a ProgramRuntimeService that only reports running state based on a set of know ids
    final Map<ProgramId, RunId> runningSet = new HashMap<>();
    ProgramRuntimeService programRuntimeService = new AbstractProgramRuntimeService(cConf, null, null, null) {

        @Override
        public ProgramLiveInfo getLiveInfo(ProgramId programId) {
            return new NotRunningProgramLiveInfo(programId);
        }

        @Override
        public Map<RunId, RuntimeInfo> list(ProgramId program) {
            RunId runId = runningSet.get(program);
            if (runId != null) {
                RuntimeInfo runtimeInfo = new SimpleRuntimeInfo(null, program);
                return Collections.singletonMap(runId, runtimeInfo);
            }
            return Collections.emptyMap();
        }
    };
    // Have both flow and workflow running
    runningSet.put(flowId.getParent(), RunIds.fromString(flowId.getRun()));
    runningSet.put(workflowId.getParent(), RunIds.fromString(workflowId.getRun()));
    ProgramStateWriter programStateWriter = new NoOpProgramStateWriter() {

        @Override
        public void error(ProgramRunId programRunId, Throwable failureCause) {
            store.setStop(programRunId, System.currentTimeMillis(), ProgramRunStatus.FAILED, new BasicThrowable(failureCause), Bytes.toBytes(sourceId.getAndIncrement()));
        }
    };
    // Create a run record fixer.
    // Set the start buffer time to -1 so that it fixes right away.
    // Also use a small tx batch size to validate the batching logic.
    RunRecordCorrectorService fixer = new RunRecordCorrectorService(cConf, store, programStateWriter, programRuntimeService, namespaceAdmin, datasetFramework, -1L, 5) {
    };
    fixer.fixRunRecords();
    // Validates all expected states
    for (Map.Entry<ProgramRunId, ProgramRunStatus> entry : expectedStates.entrySet()) {
        validateExpectedState(entry.getKey(), entry.getValue());
    }
    // Remove the workflow from the running set and mark it as completed
    runningSet.remove(workflowId.getParent());
    store.setStop(workflowId, System.currentTimeMillis(), ProgramRunStatus.COMPLETED, Bytes.toBytes(sourceId.getAndIncrement()));
    fixer.fixRunRecords();
    // Both the workflow and the MR in workflow should be changed to failed state
    expectedStates.put(workflowId, ProgramRunStatus.COMPLETED);
    expectedStates.put(mrInWorkflowId, ProgramRunStatus.FAILED);
    // Validates all expected states again
    for (Map.Entry<ProgramRunId, ProgramRunStatus> entry : expectedStates.entrySet()) {
        validateExpectedState(entry.getKey(), entry.getValue());
    }
}
Also used : NoOpProgramStateWriter(co.cask.cdap.app.runtime.NoOpProgramStateWriter) SimpleRuntimeInfo(co.cask.cdap.internal.app.runtime.service.SimpleRuntimeInfo) HashMap(java.util.HashMap) ProgramId(co.cask.cdap.proto.id.ProgramId) AbstractProgramRuntimeService(co.cask.cdap.app.runtime.AbstractProgramRuntimeService) SimpleRuntimeInfo(co.cask.cdap.internal.app.runtime.service.SimpleRuntimeInfo) NotRunningProgramLiveInfo(co.cask.cdap.proto.NotRunningProgramLiveInfo) ProgramRunStatus(co.cask.cdap.proto.ProgramRunStatus) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ProgramStateWriter(co.cask.cdap.app.runtime.ProgramStateWriter) NoOpProgramStateWriter(co.cask.cdap.app.runtime.NoOpProgramStateWriter) BasicThrowable(co.cask.cdap.proto.BasicThrowable) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) NamespaceId(co.cask.cdap.proto.id.NamespaceId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) BasicThrowable(co.cask.cdap.proto.BasicThrowable) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) AbstractProgramRuntimeService(co.cask.cdap.app.runtime.AbstractProgramRuntimeService) ProgramRuntimeService(co.cask.cdap.app.runtime.ProgramRuntimeService) Test(org.junit.Test)

Example 2 with NoOpProgramStateWriter

use of co.cask.cdap.app.runtime.NoOpProgramStateWriter in project cdap by caskdata.

the class TwillAppLifecycleEventHandler method initialize.

@Override
public void initialize(EventHandlerContext context) {
    super.initialize(context);
    this.runningPublished = new AtomicBoolean();
    this.twillRunId = context.getRunId();
    this.programRunId = GSON.fromJson(context.getSpecification().getConfigs().get("programRunId"), ProgramRunId.class);
    // Fetch cConf and hConf from resources jar
    File cConfFile = new File("resources.jar/resources/" + CDAP_CONF_FILE_NAME);
    File hConfFile = new File("resources.jar/resources/" + HADOOP_CONF_FILE_NAME);
    if (cConfFile.exists() && hConfFile.exists()) {
        CConfiguration cConf = CConfiguration.create();
        cConf.clear();
        Configuration hConf = new Configuration();
        hConf.clear();
        try {
            cConf.addResource(cConfFile.toURI().toURL());
            hConf.addResource(hConfFile.toURI().toURL());
            // Create the injector to inject a program state writer
            Injector injector = Guice.createInjector(new ConfigModule(cConf, hConf), new ZKClientModule(), new KafkaClientModule(), new DiscoveryRuntimeModule().getDistributedModules(), new MessagingClientModule(), new AbstractModule() {

                @Override
                protected void configure() {
                    bind(ProgramStateWriter.class).to(MessagingProgramStateWriter.class);
                }
            });
            zkClientService = injector.getInstance(ZKClientService.class);
            zkClientService.startAndWait();
            this.programStateWriter = injector.getInstance(ProgramStateWriter.class);
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    } else {
        LOG.warn("{} and {} were not found in the resources.jar. Not recording program states", CDAP_CONF_FILE_NAME, HADOOP_CONF_FILE_NAME);
        this.programStateWriter = new NoOpProgramStateWriter();
    }
}
Also used : MessagingClientModule(co.cask.cdap.messaging.guice.MessagingClientModule) NoOpProgramStateWriter(co.cask.cdap.app.runtime.NoOpProgramStateWriter) CConfiguration(co.cask.cdap.common.conf.CConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ConfigModule(co.cask.cdap.common.guice.ConfigModule) MessagingProgramStateWriter(co.cask.cdap.internal.app.program.MessagingProgramStateWriter) CConfiguration(co.cask.cdap.common.conf.CConfiguration) AbstractModule(com.google.inject.AbstractModule) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ZKClientModule(co.cask.cdap.common.guice.ZKClientModule) ZKClientService(org.apache.twill.zookeeper.ZKClientService) MessagingProgramStateWriter(co.cask.cdap.internal.app.program.MessagingProgramStateWriter) ProgramStateWriter(co.cask.cdap.app.runtime.ProgramStateWriter) NoOpProgramStateWriter(co.cask.cdap.app.runtime.NoOpProgramStateWriter) Injector(com.google.inject.Injector) KafkaClientModule(co.cask.cdap.common.guice.KafkaClientModule) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) File(java.io.File) DiscoveryRuntimeModule(co.cask.cdap.common.guice.DiscoveryRuntimeModule)

Aggregations

NoOpProgramStateWriter (co.cask.cdap.app.runtime.NoOpProgramStateWriter)2 ProgramStateWriter (co.cask.cdap.app.runtime.ProgramStateWriter)2 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)2 AbstractProgramRuntimeService (co.cask.cdap.app.runtime.AbstractProgramRuntimeService)1 ProgramRuntimeService (co.cask.cdap.app.runtime.ProgramRuntimeService)1 CConfiguration (co.cask.cdap.common.conf.CConfiguration)1 ConfigModule (co.cask.cdap.common.guice.ConfigModule)1 DiscoveryRuntimeModule (co.cask.cdap.common.guice.DiscoveryRuntimeModule)1 KafkaClientModule (co.cask.cdap.common.guice.KafkaClientModule)1 ZKClientModule (co.cask.cdap.common.guice.ZKClientModule)1 MessagingProgramStateWriter (co.cask.cdap.internal.app.program.MessagingProgramStateWriter)1 SimpleRuntimeInfo (co.cask.cdap.internal.app.runtime.service.SimpleRuntimeInfo)1 MessagingClientModule (co.cask.cdap.messaging.guice.MessagingClientModule)1 BasicThrowable (co.cask.cdap.proto.BasicThrowable)1 NotRunningProgramLiveInfo (co.cask.cdap.proto.NotRunningProgramLiveInfo)1 ProgramRunStatus (co.cask.cdap.proto.ProgramRunStatus)1 NamespaceId (co.cask.cdap.proto.id.NamespaceId)1 ProgramId (co.cask.cdap.proto.id.ProgramId)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 AbstractModule (com.google.inject.AbstractModule)1