Search in sources :

Example 1 with NoOpProgramStateWriter

use of io.cdap.cdap.app.runtime.NoOpProgramStateWriter in project cdap by caskdata.

the class RunRecordCorrectorServiceTest method testFixProgram.

@Test
public void testFixProgram() throws Exception {
    final AtomicInteger sourceId = new AtomicInteger(0);
    // Write 10 services with starting state
    // Write 10 workers with running state
    Map<ProgramRunId, ProgramRunStatus> expectedStates = new HashMap<>();
    ArtifactId artifactId = NamespaceId.DEFAULT.artifact("testArtifact", "1.0").toApiArtifactId();
    for (int i = 0; i < 10; i++) {
        ProgramRunId serviceId = NamespaceId.DEFAULT.app("test").service("service" + i).run(randomRunId());
        store.setProvisioning(serviceId, Collections.emptyMap(), SINGLETON_PROFILE_MAP, Bytes.toBytes(sourceId.getAndIncrement()), artifactId);
        store.setProvisioned(serviceId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
        store.setStart(serviceId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
        expectedStates.put(serviceId, ProgramRunStatus.FAILED);
        ProgramRunId workerId = new NamespaceId("ns").app("test").worker("worker" + i).run(randomRunId());
        store.setProvisioning(workerId, Collections.emptyMap(), SINGLETON_PROFILE_MAP, Bytes.toBytes(sourceId.getAndIncrement()), artifactId);
        store.setProvisioned(workerId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
        store.setStart(workerId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
        store.setRunning(workerId, System.currentTimeMillis(), null, Bytes.toBytes(sourceId.getAndIncrement()));
        expectedStates.put(workerId, ProgramRunStatus.FAILED);
    }
    // Write a service with suspend state
    ProgramRunId flowId = new NamespaceId("ns").app("test").service("flow").run(randomRunId());
    store.setProvisioning(flowId, Collections.emptyMap(), SINGLETON_PROFILE_MAP, Bytes.toBytes(sourceId.getAndIncrement()), artifactId);
    store.setProvisioned(flowId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setStart(flowId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
    store.setRunning(flowId, System.currentTimeMillis(), null, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setSuspend(flowId, Bytes.toBytes(sourceId.getAndIncrement()), -1);
    expectedStates.put(flowId, ProgramRunStatus.SUSPENDED);
    // Write two MR in starting state. One with workflow information, one without.
    ProgramRunId mrId = NamespaceId.DEFAULT.app("app").mr("mr").run(randomRunId());
    store.setProvisioning(mrId, Collections.emptyMap(), SINGLETON_PROFILE_MAP, Bytes.toBytes(sourceId.getAndIncrement()), artifactId);
    store.setProvisioned(mrId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setStart(mrId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
    expectedStates.put(mrId, ProgramRunStatus.FAILED);
    ProgramRunId workflowId = NamespaceId.DEFAULT.app("app").workflow("workflow").run(randomRunId());
    ProgramRunId mrInWorkflowId = workflowId.getParent().getParent().mr("mrInWorkflow").run(randomRunId());
    store.setProvisioning(mrInWorkflowId, Collections.emptyMap(), ImmutableMap.of(ProgramOptionConstants.WORKFLOW_NAME, workflowId.getProgram(), ProgramOptionConstants.WORKFLOW_RUN_ID, workflowId.getRun(), ProgramOptionConstants.WORKFLOW_NODE_ID, "mr", SystemArguments.PROFILE_NAME, ProfileId.NATIVE.getScopedName()), Bytes.toBytes(sourceId.getAndIncrement()), artifactId);
    store.setProvisioned(mrInWorkflowId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setStart(mrInWorkflowId, null, ImmutableMap.of(ProgramOptionConstants.WORKFLOW_NAME, workflowId.getProgram(), ProgramOptionConstants.WORKFLOW_RUN_ID, workflowId.getRun(), ProgramOptionConstants.WORKFLOW_NODE_ID, "mr"), Bytes.toBytes(sourceId.getAndIncrement()));
    expectedStates.put(workflowId, ProgramRunStatus.STARTING);
    // Write the workflow in RUNNING state.
    store.setProvisioning(workflowId, Collections.emptyMap(), SINGLETON_PROFILE_MAP, Bytes.toBytes(sourceId.getAndIncrement()), artifactId);
    store.setProvisioned(workflowId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setStart(workflowId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
    store.setRunning(workflowId, System.currentTimeMillis(), null, Bytes.toBytes(sourceId.getAndIncrement()));
    expectedStates.put(workflowId, ProgramRunStatus.RUNNING);
    // Use a ProgramRuntimeService that only reports running state based on a set of know ids
    final Map<ProgramId, RunId> runningSet = new HashMap<>();
    ProgramRuntimeService programRuntimeService = new AbstractProgramRuntimeService(cConf, null, null, new NoOpProgramStateWriter(), null) {

        @Override
        public ProgramLiveInfo getLiveInfo(ProgramId programId) {
            return new NotRunningProgramLiveInfo(programId);
        }

        @Override
        public Map<RunId, RuntimeInfo> list(ProgramId program) {
            RunId runId = runningSet.get(program);
            if (runId != null) {
                RuntimeInfo runtimeInfo = new SimpleRuntimeInfo(null, program);
                return Collections.singletonMap(runId, runtimeInfo);
            }
            return Collections.emptyMap();
        }
    };
    // Have both flow and workflow running
    runningSet.put(flowId.getParent(), RunIds.fromString(flowId.getRun()));
    runningSet.put(workflowId.getParent(), RunIds.fromString(workflowId.getRun()));
    ProgramStateWriter programStateWriter = new NoOpProgramStateWriter() {

        @Override
        public void error(ProgramRunId programRunId, Throwable failureCause) {
            store.setStop(programRunId, System.currentTimeMillis(), ProgramRunStatus.FAILED, new BasicThrowable(failureCause), Bytes.toBytes(sourceId.getAndIncrement()));
        }
    };
    // Create a run record fixer.
    // Set the start buffer time to -1 so that it fixes right away.
    // Also use a small tx batch size to validate the batching logic.
    RunRecordCorrectorService fixer = new RunRecordCorrectorService(cConf, store, programStateWriter, programRuntimeService, namespaceAdmin, datasetFramework, -1L, 5) {
    };
    fixer.fixRunRecords();
    // Validates all expected states
    for (Map.Entry<ProgramRunId, ProgramRunStatus> entry : expectedStates.entrySet()) {
        validateExpectedState(entry.getKey(), entry.getValue());
    }
    // Remove the workflow from the running set and mark it as completed
    runningSet.remove(workflowId.getParent());
    store.setStop(workflowId, System.currentTimeMillis(), ProgramRunStatus.COMPLETED, Bytes.toBytes(sourceId.getAndIncrement()));
    fixer.fixRunRecords();
    // Both the workflow and the MR in workflow should be changed to failed state
    expectedStates.put(workflowId, ProgramRunStatus.COMPLETED);
    expectedStates.put(mrInWorkflowId, ProgramRunStatus.FAILED);
    // Validates all expected states again
    for (Map.Entry<ProgramRunId, ProgramRunStatus> entry : expectedStates.entrySet()) {
        validateExpectedState(entry.getKey(), entry.getValue());
    }
}
Also used : NoOpProgramStateWriter(io.cdap.cdap.app.runtime.NoOpProgramStateWriter) ArtifactId(io.cdap.cdap.api.artifact.ArtifactId) SimpleRuntimeInfo(io.cdap.cdap.internal.app.runtime.service.SimpleRuntimeInfo) HashMap(java.util.HashMap) ProgramId(io.cdap.cdap.proto.id.ProgramId) AbstractProgramRuntimeService(io.cdap.cdap.app.runtime.AbstractProgramRuntimeService) SimpleRuntimeInfo(io.cdap.cdap.internal.app.runtime.service.SimpleRuntimeInfo) NotRunningProgramLiveInfo(io.cdap.cdap.proto.NotRunningProgramLiveInfo) ProgramRunStatus(io.cdap.cdap.proto.ProgramRunStatus) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NoOpProgramStateWriter(io.cdap.cdap.app.runtime.NoOpProgramStateWriter) ProgramStateWriter(io.cdap.cdap.app.runtime.ProgramStateWriter) BasicThrowable(io.cdap.cdap.proto.BasicThrowable) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) RunId(org.apache.twill.api.RunId) BasicThrowable(io.cdap.cdap.proto.BasicThrowable) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) AbstractProgramRuntimeService(io.cdap.cdap.app.runtime.AbstractProgramRuntimeService) ProgramRuntimeService(io.cdap.cdap.app.runtime.ProgramRuntimeService) Test(org.junit.Test)

Example 2 with NoOpProgramStateWriter

use of io.cdap.cdap.app.runtime.NoOpProgramStateWriter in project cdap by caskdata.

the class RunRecordCorrectorServiceTest method testFixUnderlyingProgramInWorkflow.

@Test
public void testFixUnderlyingProgramInWorkflow() throws Exception {
    AtomicInteger sourceId = new AtomicInteger(0);
    ArtifactId artifactId = NamespaceId.DEFAULT.artifact("testArtifact", "1.0").toApiArtifactId();
    // set up a workflow
    Map<String, String> wfSystemArg = ImmutableMap.of(ProgramOptionConstants.CLUSTER_MODE, ClusterMode.ISOLATED.name(), SystemArguments.PROFILE_NAME, ProfileId.NATIVE.getScopedName());
    ProgramRunId wfId = NamespaceId.DEFAULT.app("test").workflow("testWF").run(randomRunId());
    store.setProvisioning(wfId, Collections.emptyMap(), wfSystemArg, Bytes.toBytes(sourceId.getAndIncrement()), artifactId);
    store.setProvisioned(wfId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setStart(wfId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
    store.setRunning(wfId, System.currentTimeMillis(), null, Bytes.toBytes(sourceId.getAndIncrement()));
    // set up a spark program inside workflow, and in ISOLATED mode
    Map<String, String> spSystemArgs = ImmutableMap.of(ProgramOptionConstants.CLUSTER_MODE, ClusterMode.ISOLATED.name(), SystemArguments.PROFILE_NAME, ProfileId.NATIVE.getScopedName(), ProgramOptionConstants.WORKFLOW_NAME, wfId.getProgram(), ProgramOptionConstants.WORKFLOW_RUN_ID, wfId.getRun(), ProgramOptionConstants.WORKFLOW_NODE_ID, "spark");
    ProgramRunId spId = NamespaceId.DEFAULT.app("test").spark("phase-1").run(randomRunId());
    store.setProvisioning(spId, Collections.emptyMap(), spSystemArgs, Bytes.toBytes(sourceId.getAndIncrement()), artifactId);
    store.setProvisioned(spId, 0, Bytes.toBytes(sourceId.getAndIncrement()));
    store.setStart(spId, null, Collections.emptyMap(), Bytes.toBytes(sourceId.getAndIncrement()));
    store.setRunning(spId, System.currentTimeMillis(), null, Bytes.toBytes(sourceId.getAndIncrement()));
    // mark the workflow as finished
    store.setStop(wfId, System.currentTimeMillis(), ProgramRunStatus.COMPLETED, Bytes.toBytes(sourceId.getAndIncrement()));
    ProgramStateWriter programStateWriter = new NoOpProgramStateWriter() {

        @Override
        public void error(ProgramRunId programRunId, Throwable failureCause) {
            store.setStop(programRunId, System.currentTimeMillis(), ProgramRunStatus.FAILED, new BasicThrowable(failureCause), Bytes.toBytes(sourceId.getAndIncrement()));
        }
    };
    ProgramRuntimeService noOpRuntimeSerivce = new AbstractProgramRuntimeService(cConf, null, null, new NoOpProgramStateWriter(), null) {

        @Override
        public ProgramLiveInfo getLiveInfo(ProgramId programId) {
            return new NotRunningProgramLiveInfo(programId);
        }

        @Override
        public Map<RunId, RuntimeInfo> list(ProgramId program) {
            return Collections.emptyMap();
        }
    };
    // Create a run record fixer.
    // Set the start buffer time to -1 so that it fixes right away.
    RunRecordCorrectorService fixer = new RunRecordCorrectorService(cConf, store, programStateWriter, noOpRuntimeSerivce, namespaceAdmin, datasetFramework, -1L, 5) {
    };
    fixer.fixRunRecords();
    // check the record is fixed for spark program
    Assert.assertEquals(ProgramRunStatus.FAILED, store.getRun(spId).getStatus());
}
Also used : NoOpProgramStateWriter(io.cdap.cdap.app.runtime.NoOpProgramStateWriter) ArtifactId(io.cdap.cdap.api.artifact.ArtifactId) SimpleRuntimeInfo(io.cdap.cdap.internal.app.runtime.service.SimpleRuntimeInfo) ProgramId(io.cdap.cdap.proto.id.ProgramId) AbstractProgramRuntimeService(io.cdap.cdap.app.runtime.AbstractProgramRuntimeService) NotRunningProgramLiveInfo(io.cdap.cdap.proto.NotRunningProgramLiveInfo) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NoOpProgramStateWriter(io.cdap.cdap.app.runtime.NoOpProgramStateWriter) ProgramStateWriter(io.cdap.cdap.app.runtime.ProgramStateWriter) BasicThrowable(io.cdap.cdap.proto.BasicThrowable) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) BasicThrowable(io.cdap.cdap.proto.BasicThrowable) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) RunId(org.apache.twill.api.RunId) AbstractProgramRuntimeService(io.cdap.cdap.app.runtime.AbstractProgramRuntimeService) ProgramRuntimeService(io.cdap.cdap.app.runtime.ProgramRuntimeService) Test(org.junit.Test)

Example 3 with NoOpProgramStateWriter

use of io.cdap.cdap.app.runtime.NoOpProgramStateWriter in project cdap by caskdata.

the class ProgramStateWriterWithHeartBeatTest method testHeartBeatThread.

@Test
public void testHeartBeatThread() throws InterruptedException, ExecutionException, TimeoutException {
    // configure program state writer to emit heart beat every second
    ProgramStatePublisher programStatePublisher = new MockProgramStatePublisher();
    NoOpProgramStateWriter programStateWriter = new NoOpProgramStateWriter();
    // mock program configurations
    ProgramId programId = NamespaceId.DEFAULT.app("someapp").program(ProgramType.SERVICE, "s");
    Map<String, String> systemArguments = new HashMap<>();
    systemArguments.put(ProgramOptionConstants.SKIP_PROVISIONING, Boolean.TRUE.toString());
    ProgramOptions programOptions = new SimpleProgramOptions(programId, new BasicArguments(systemArguments), new BasicArguments());
    ProgramRunId runId = programId.run(RunIds.generate());
    ArtifactId artifactId = NamespaceId.DEFAULT.artifact("testArtifact", "1.0").toApiArtifactId();
    ProgramStateWriterWithHeartBeat programStateWriterWithHeartBeat = new ProgramStateWriterWithHeartBeat(runId, programStateWriter, 1, programStatePublisher);
    ApplicationSpecification appSpec = new DefaultApplicationSpecification("name", "1.0.0", ProjectInfo.getVersion().toString(), "desc", null, artifactId, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap());
    ProgramDescriptor programDescriptor = new ProgramDescriptor(programId, appSpec);
    // start the program and ensure heart beat is 0 before we call running
    programStateWriterWithHeartBeat.start(programOptions, null, programDescriptor);
    Assert.assertEquals(0, ((MockProgramStatePublisher) programStatePublisher).getHeartBeatCount());
    programStateWriterWithHeartBeat.running(null);
    // on running, we start receiving heart beat messages, verify if we heart beat count goes to 2.
    Tasks.waitFor(true, () -> ((MockProgramStatePublisher) programStatePublisher).getHeartBeatCount() > 1, 10, TimeUnit.SECONDS, "Didn't receive expected heartbeat after 10 seconds");
    // make sure suspending program suspended the heartbeat thread
    programStateWriterWithHeartBeat.suspend();
    Tasks.waitFor(false, () -> programStateWriterWithHeartBeat.isHeartBeatThreadAlive(), 5, TimeUnit.SECONDS, "Heartbeat thread did not stop after 5 seconds");
    long heartBeatAfterSuspend = ((MockProgramStatePublisher) programStatePublisher).getHeartBeatCount();
    // resume the program and make sure that the heart beat messages goes up after resuming program
    programStateWriterWithHeartBeat.resume();
    long expected = heartBeatAfterSuspend + 1;
    Tasks.waitFor(true, () -> ((MockProgramStatePublisher) programStatePublisher).getHeartBeatCount() > expected, 10, TimeUnit.SECONDS, "Didn't receive expected heartbeat after 10 seconds after resuming program");
    // kill the program and make sure the heart beat thread also gets stopped
    programStateWriterWithHeartBeat.killed();
    Tasks.waitFor(false, () -> programStateWriterWithHeartBeat.isHeartBeatThreadAlive(), 5, TimeUnit.SECONDS, "Heartbeat thread did not stop after 5 seconds");
}
Also used : NoOpProgramStateWriter(io.cdap.cdap.app.runtime.NoOpProgramStateWriter) DefaultApplicationSpecification(io.cdap.cdap.internal.app.DefaultApplicationSpecification) ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) ArtifactId(io.cdap.cdap.api.artifact.ArtifactId) HashMap(java.util.HashMap) ProgramId(io.cdap.cdap.proto.id.ProgramId) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) ProgramOptions(io.cdap.cdap.app.runtime.ProgramOptions) SimpleProgramOptions(io.cdap.cdap.internal.app.runtime.SimpleProgramOptions) BasicArguments(io.cdap.cdap.internal.app.runtime.BasicArguments) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) DefaultApplicationSpecification(io.cdap.cdap.internal.app.DefaultApplicationSpecification) ProgramDescriptor(io.cdap.cdap.app.program.ProgramDescriptor) Test(org.junit.Test)

Aggregations

ArtifactId (io.cdap.cdap.api.artifact.ArtifactId)3 NoOpProgramStateWriter (io.cdap.cdap.app.runtime.NoOpProgramStateWriter)3 ProgramId (io.cdap.cdap.proto.id.ProgramId)3 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)3 Test (org.junit.Test)3 AbstractProgramRuntimeService (io.cdap.cdap.app.runtime.AbstractProgramRuntimeService)2 ProgramRuntimeService (io.cdap.cdap.app.runtime.ProgramRuntimeService)2 ProgramStateWriter (io.cdap.cdap.app.runtime.ProgramStateWriter)2 SimpleRuntimeInfo (io.cdap.cdap.internal.app.runtime.service.SimpleRuntimeInfo)2 BasicThrowable (io.cdap.cdap.proto.BasicThrowable)2 NotRunningProgramLiveInfo (io.cdap.cdap.proto.NotRunningProgramLiveInfo)2 HashMap (java.util.HashMap)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 RunId (org.apache.twill.api.RunId)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)1 ProgramDescriptor (io.cdap.cdap.app.program.ProgramDescriptor)1 ProgramOptions (io.cdap.cdap.app.runtime.ProgramOptions)1 DefaultApplicationSpecification (io.cdap.cdap.internal.app.DefaultApplicationSpecification)1 BasicArguments (io.cdap.cdap.internal.app.runtime.BasicArguments)1