use of io.cdap.cdap.proto.ProgramRunCluster in project cdap by caskdata.
the class AppMetadataStore method recordProgramProvisioned.
/**
* Record that the program run has completed provisioning compute resources for the run. If the current status has
* a higher source id, this call will be ignored.
*
* @param programRunId program run
* @param numNodes number of cluster nodes provisioned
* @param sourceId unique id representing the source of program run status, such as the message id of the program
* run status notification in TMS. The source id must increase as the recording time of the program
* run status increases, so that the attempt to persist program run status older than the existing
* program run status will be ignored
* @return {@link RunRecordDetail} that was persisted, or {@code null} if the update was ignored.
*/
@Nullable
public RunRecordDetail recordProgramProvisioned(ProgramRunId programRunId, int numNodes, byte[] sourceId) throws IOException {
RunRecordDetail existing = getRun(programRunId);
if (existing == null) {
LOG.warn("Ignoring unexpected request to transition program run {} from non-existent state to cluster state {}.", programRunId, ProgramRunClusterStatus.PROVISIONED);
return null;
}
if (!isValid(existing, existing.getStatus(), ProgramRunClusterStatus.PROVISIONED, sourceId)) {
return null;
}
// Delete the old run record
delete(existing);
List<Field<?>> key = getProgramRunInvertedTimeKey(TYPE_RUN_RECORD_ACTIVE, programRunId, existing.getStartTs());
ProgramRunCluster cluster = new ProgramRunCluster(ProgramRunClusterStatus.PROVISIONED, null, numNodes);
RunRecordDetail meta = RunRecordDetail.builder(existing).setCluster(cluster).setSourceId(sourceId).build();
writeToRunRecordTableWithPrimaryKeys(key, meta);
LOG.trace("Recorded {} for program {}", ProgramRunClusterStatus.PROVISIONED, programRunId);
return meta;
}
use of io.cdap.cdap.proto.ProgramRunCluster in project cdap by caskdata.
the class AppMetadataStore method recordProgramOrphaned.
/**
* Record that the program run has been orphaned. If the current status has a higher source id,
* this call will be ignored.
*
* @param programRunId program run
* @param sourceId unique id representing the source of program run status, such as the message id of the program
* run status notification in TMS. The source id must increase as the recording time of the program
* run status increases, so that the attempt to persist program run status older than the existing
* program run status will be ignored
* @param endTs timestamp in seconds for when the cluster was orphaned
* @return {@link RunRecordDetail} that was persisted, or {@code null} if the update was ignored.
*/
@Nullable
public RunRecordDetail recordProgramOrphaned(ProgramRunId programRunId, long endTs, byte[] sourceId) throws IOException {
RunRecordDetail existing = getRun(programRunId);
if (existing == null) {
LOG.debug("Ignoring unexpected transition of program run {} to cluster state {} with no existing run record.", programRunId, ProgramRunClusterStatus.DEPROVISIONED);
return null;
}
if (!isValid(existing, existing.getStatus(), ProgramRunClusterStatus.ORPHANED, sourceId)) {
return null;
}
delete(existing);
List<Field<?>> key = getProgramRunInvertedTimeKey(TYPE_RUN_RECORD_COMPLETED, programRunId, existing.getStartTs());
ProgramRunCluster cluster = new ProgramRunCluster(ProgramRunClusterStatus.ORPHANED, endTs, existing.getCluster().getNumNodes());
RunRecordDetail meta = RunRecordDetail.builder(existing).setCluster(cluster).setSourceId(sourceId).build();
writeToRunRecordTableWithPrimaryKeys(key, meta);
LOG.trace("Recorded {} for program {}", ProgramRunClusterStatus.ORPHANED, programRunId);
return meta;
}
use of io.cdap.cdap.proto.ProgramRunCluster in project cdap by caskdata.
the class AppMetadataStore method recordProgramDeprovisioned.
/**
* Record that the program run has deprovisioned compute resources for the run. If the current status has
* a higher source id, this call will be ignored.
*
* @param programRunId program run
* @param sourceId unique id representing the source of program run status, such as the message id of the program
* run status notification in TMS. The source id must increase as the recording time of the program
* run status increases, so that the attempt to persist program run status older than the existing
* program run status will be ignored
* @param endTs timestamp in seconds for when the cluster was deprovisioned. This is null if the program is run
* as part of a workflow
* @return {@link RunRecordDetail} that was persisted, or {@code null} if the update was ignored.
*/
@Nullable
public RunRecordDetail recordProgramDeprovisioned(ProgramRunId programRunId, @Nullable Long endTs, byte[] sourceId) throws IOException {
RunRecordDetail existing = getRun(programRunId);
if (existing == null) {
LOG.debug("Ignoring unexpected transition of program run {} to cluster state {} with no existing run record.", programRunId, ProgramRunClusterStatus.DEPROVISIONED);
return null;
}
if (!isValid(existing, existing.getStatus(), ProgramRunClusterStatus.DEPROVISIONED, sourceId)) {
return null;
}
delete(existing);
List<Field<?>> key = getProgramRunInvertedTimeKey(TYPE_RUN_RECORD_COMPLETED, programRunId, existing.getStartTs());
ProgramRunCluster cluster = new ProgramRunCluster(ProgramRunClusterStatus.DEPROVISIONED, endTs, existing.getCluster().getNumNodes());
RunRecordDetail meta = RunRecordDetail.builder(existing).setCluster(cluster).setSourceId(sourceId).build();
writeToRunRecordTableWithPrimaryKeys(key, meta);
LOG.trace("Recorded {} for program {}", ProgramRunClusterStatus.DEPROVISIONED, programRunId);
return meta;
}
use of io.cdap.cdap.proto.ProgramRunCluster in project cdap by caskdata.
the class MockLogReader method generateLogs.
/**
* This method is used to generate the logs for program which are used for testing.
* Single call to this method would add {@link #MAX} number of events.
* First 20 events are generated without {@link ApplicationLoggingContext#TAG_RUN_ID} tag.
* For next 40 events, alternate event is tagged with {@code ApplicationLoggingContext#TAG_RUN_ID}.
* Last 20 events are not tagged with {@code ApplicationLoggingContext#TAG_RUN_ID}.
* All events are alternately marked as {@link Level#ERROR} and {@link Level#WARN}.
* All events are alternately tagged with "plugin", "program" and "system" as value of MDC property ".origin"
* All events are alternately tagged with "lifecycle" as value of MDC property "MDC:eventType
*/
private void generateLogs(LoggingContext loggingContext, ProgramId programId, ProgramRunStatus runStatus) throws InterruptedException {
// All possible values of " MDC property ".origin
String[] origins = { "plugin", "program", "system" };
String entityId = LoggingContextHelper.getEntityId(loggingContext).getValue();
StackTraceElement stackTraceElementNative = new StackTraceElement("io.cdap.Test", "testMethod", null, -2);
RunId runId = null;
Long stopTs = null;
for (int i = 0; i < MAX; ++i) {
// Setup run id for event with ids >= 20
if (i == 20) {
runId = RunIds.generate(TimeUnit.SECONDS.toMillis(getMockTimeSecs(i)));
} else if (i == 60 && runStatus != ProgramRunStatus.RUNNING && runStatus != ProgramRunStatus.SUSPENDED) {
// Record stop time for run for 60th event, but still continue to record run in the other logging events.
stopTs = getMockTimeSecs(i);
}
LoggingEvent event = new LoggingEvent("io.cdap.Test", (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME), i % 2 == 0 ? Level.ERROR : Level.WARN, entityId + "<img>-" + i, null, null);
event.setTimeStamp(TimeUnit.SECONDS.toMillis(getMockTimeSecs(i)));
// Add runid to logging context
Map<String, String> tagMap = Maps.newHashMap(Maps.transformValues(loggingContext.getSystemTagsMap(), TAG_TO_STRING_FUNCTION));
if (runId != null && stopTs == null && i % 2 == 0) {
tagMap.put(ApplicationLoggingContext.TAG_RUN_ID, runId.getId());
}
// Determine the value of ".origin" property by (i % 3)
tagMap.put(".origin", origins[i % 3]);
if (i % 2 == 0) {
tagMap.put("MDC:eventType", "lifecycle");
}
if (i == 30) {
event.setCallerData(new StackTraceElement[] { stackTraceElementNative });
}
event.setMDCPropertyMap(tagMap);
logEvents.add(new LogEvent(event, new LogOffset(i, i)));
}
long startTs = RunIds.getTime(runId, TimeUnit.SECONDS);
if (programId != null) {
// noinspection ConstantConditions
runRecordMap.put(programId, RunRecord.builder().setRunId(runId.getId()).setStartTime(startTs).setRunTime(startTs + 1).setStopTime(stopTs).setStatus(runStatus).setCluster(new ProgramRunCluster(ProgramRunClusterStatus.PROVISIONED, null, null)).build());
setStartAndRunning(programId.run(runId.getId()));
if (stopTs != null) {
store.setStop(programId.run(runId.getId()), stopTs, runStatus, AppFabricTestHelper.createSourceId(++sourceId));
}
}
}
use of io.cdap.cdap.proto.ProgramRunCluster in project cdap by cdapio.
the class DefaultStoreTest method testLogProgramRunHistory.
@Test
public void testLogProgramRunHistory() {
Map<String, String> noRuntimeArgsProps = ImmutableMap.of("runtimeArgs", GSON.toJson(ImmutableMap.<String, String>of()));
// record finished Workflow
ProgramId programId = new ProgramId("account1", "application1", ProgramType.WORKFLOW, "wf1");
long now = System.currentTimeMillis();
long startTimeSecs = TimeUnit.MILLISECONDS.toSeconds(now);
RunId run1 = RunIds.generate(now - 20000);
ArtifactId artifactId = programId.getNamespaceId().artifact("testArtifact", "1.0").toApiArtifactId();
setStartAndRunning(programId.run(run1.getId()), artifactId);
store.setStop(programId.run(run1.getId()), startTimeSecs - 10, ProgramController.State.ERROR.getRunStatus(), AppFabricTestHelper.createSourceId(++sourceId));
// record another finished Workflow
RunId run2 = RunIds.generate(now - 10000);
setStartAndRunning(programId.run(run2.getId()), artifactId);
store.setStop(programId.run(run2.getId()), startTimeSecs - 5, ProgramController.State.COMPLETED.getRunStatus(), AppFabricTestHelper.createSourceId(++sourceId));
// record a suspended Workflow
RunId run21 = RunIds.generate(now - 7500);
setStartAndRunning(programId.run(run21.getId()), artifactId);
store.setSuspend(programId.run(run21.getId()), AppFabricTestHelper.createSourceId(++sourceId), -1);
// record not finished Workflow
RunId run3 = RunIds.generate(now);
setStartAndRunning(programId.run(run3.getId()), artifactId);
// For a RunRecordDetail that has not yet been completed, getStopTs should return null
RunRecordDetail runRecord = store.getRun(programId.run(run3.getId()));
Assert.assertNotNull(runRecord);
Assert.assertNull(runRecord.getStopTs());
// record run of different program
ProgramId programId2 = new ProgramId("account1", "application1", ProgramType.WORKFLOW, "wf2");
RunId run4 = RunIds.generate(now - 5000);
setStartAndRunning(programId2.run(run4.getId()), artifactId);
store.setStop(programId2.run(run4.getId()), startTimeSecs - 4, ProgramController.State.COMPLETED.getRunStatus(), AppFabricTestHelper.createSourceId(++sourceId));
// record for different account
setStartAndRunning(new ProgramId("account2", "application1", ProgramType.WORKFLOW, "wf1").run(run3.getId()), artifactId);
// we should probably be better with "get" method in DefaultStore interface to do that, but we don't have one
Map<ProgramRunId, RunRecordDetail> successHistorymap = store.getRuns(programId, ProgramRunStatus.COMPLETED, 0, Long.MAX_VALUE, Integer.MAX_VALUE);
Map<ProgramRunId, RunRecordDetail> failureHistorymap = store.getRuns(programId, ProgramRunStatus.FAILED, startTimeSecs - 20, startTimeSecs - 10, Integer.MAX_VALUE);
Assert.assertEquals(failureHistorymap, store.getRuns(programId, ProgramRunStatus.FAILED, 0, Long.MAX_VALUE, Integer.MAX_VALUE));
Map<ProgramRunId, RunRecordDetail> suspendedHistorymap = store.getRuns(programId, ProgramRunStatus.SUSPENDED, startTimeSecs - 20, startTimeSecs, Integer.MAX_VALUE);
// only finished + succeeded runs should be returned
Assert.assertEquals(1, successHistorymap.size());
// only finished + failed runs should be returned
Assert.assertEquals(1, failureHistorymap.size());
// only suspended runs should be returned
Assert.assertEquals(1, suspendedHistorymap.size());
// records should be sorted by start time latest to earliest
RunRecordDetail run = successHistorymap.values().iterator().next();
Assert.assertEquals(startTimeSecs - 10, run.getStartTs());
Assert.assertEquals(Long.valueOf(startTimeSecs - 5), run.getStopTs());
Assert.assertEquals(ProgramController.State.COMPLETED.getRunStatus(), run.getStatus());
run = failureHistorymap.values().iterator().next();
Assert.assertEquals(startTimeSecs - 20, run.getStartTs());
Assert.assertEquals(Long.valueOf(startTimeSecs - 10), run.getStopTs());
Assert.assertEquals(ProgramController.State.ERROR.getRunStatus(), run.getStatus());
run = suspendedHistorymap.values().iterator().next();
Assert.assertEquals(run21.getId(), run.getPid());
Assert.assertEquals(ProgramController.State.SUSPENDED.getRunStatus(), run.getStatus());
// Assert all history
Map<ProgramRunId, RunRecordDetail> allHistorymap = store.getRuns(programId, ProgramRunStatus.ALL, startTimeSecs - 20, startTimeSecs + 1, Integer.MAX_VALUE);
Assert.assertEquals(allHistorymap.toString(), 4, allHistorymap.size());
// Assert running programs
Map<ProgramRunId, RunRecordDetail> runningHistorymap = store.getRuns(programId, ProgramRunStatus.RUNNING, startTimeSecs, startTimeSecs + 1, 100);
Assert.assertEquals(1, runningHistorymap.size());
Assert.assertEquals(runningHistorymap, store.getRuns(programId, ProgramRunStatus.RUNNING, 0, Long.MAX_VALUE, 100));
// Get a run record for running program
RunRecordDetail expectedRunning = runningHistorymap.values().iterator().next();
Assert.assertNotNull(expectedRunning);
RunRecordDetail actualRunning = store.getRun(programId.run(expectedRunning.getPid()));
Assert.assertEquals(expectedRunning, actualRunning);
// Get a run record for completed run
RunRecordDetail expectedCompleted = successHistorymap.values().iterator().next();
Assert.assertNotNull(expectedCompleted);
RunRecordDetail actualCompleted = store.getRun(programId.run(expectedCompleted.getPid()));
Assert.assertEquals(expectedCompleted, actualCompleted);
// Get a run record for suspended run
RunRecordDetail expectedSuspended = suspendedHistorymap.values().iterator().next();
Assert.assertNotNull(expectedSuspended);
RunRecordDetail actualSuspended = store.getRun(programId.run(expectedSuspended.getPid()));
Assert.assertEquals(expectedSuspended, actualSuspended);
ProgramRunCluster emptyCluster = new ProgramRunCluster(ProgramRunClusterStatus.PROVISIONED, null, 0);
// Record workflow that starts but encounters error before it runs
RunId run7 = RunIds.generate(now);
Map<String, String> emptyArgs = ImmutableMap.of();
setStart(programId.run(run7.getId()), emptyArgs, emptyArgs, artifactId);
store.setStop(programId.run(run7.getId()), startTimeSecs + 1, ProgramController.State.ERROR.getRunStatus(), AppFabricTestHelper.createSourceId(++sourceId));
RunRecordDetail expectedRunRecord7 = RunRecordDetail.builder().setProgramRunId(programId.run(run7)).setStartTime(startTimeSecs).setStopTime(startTimeSecs + 1).setStatus(ProgramRunStatus.FAILED).setProperties(noRuntimeArgsProps).setCluster(emptyCluster).setArtifactId(artifactId).setSourceId(AppFabricTestHelper.createSourceId(sourceId)).build();
RunRecordDetail actualRecord7 = store.getRun(programId.run(run7.getId()));
Assert.assertEquals(expectedRunRecord7, actualRecord7);
// Record workflow that starts and suspends before it runs
RunId run8 = RunIds.generate(now);
setStart(programId.run(run8.getId()), emptyArgs, emptyArgs, artifactId);
store.setSuspend(programId.run(run8.getId()), AppFabricTestHelper.createSourceId(++sourceId), -1);
RunRecordDetail expectedRunRecord8 = RunRecordDetail.builder().setProgramRunId(programId.run(run8)).setStartTime(startTimeSecs).setStatus(ProgramRunStatus.SUSPENDED).setProperties(noRuntimeArgsProps).setCluster(emptyCluster).setArtifactId(artifactId).setSourceId(AppFabricTestHelper.createSourceId(sourceId)).build();
RunRecordDetail actualRecord8 = store.getRun(programId.run(run8.getId()));
Assert.assertEquals(expectedRunRecord8, actualRecord8);
// Record workflow that is killed while suspended
RunId run9 = RunIds.generate(now);
setStartAndRunning(programId.run(run9.getId()), artifactId);
store.setSuspend(programId.run(run9.getId()), AppFabricTestHelper.createSourceId(++sourceId), -1);
store.setStop(programId.run(run9.getId()), startTimeSecs + 5, ProgramRunStatus.KILLED, AppFabricTestHelper.createSourceId(++sourceId));
RunRecordDetail expectedRunRecord9 = RunRecordDetail.builder().setProgramRunId(programId.run(run9)).setStartTime(startTimeSecs).setRunTime(startTimeSecs + 1).setStopTime(startTimeSecs + 5).setStatus(ProgramRunStatus.KILLED).setProperties(noRuntimeArgsProps).setCluster(emptyCluster).setArtifactId(artifactId).setSourceId(AppFabricTestHelper.createSourceId(sourceId)).build();
RunRecordDetail actualRecord9 = store.getRun(programId.run(run9.getId()));
Assert.assertEquals(expectedRunRecord9, actualRecord9);
// Non-existent run record should give null
Assert.assertNull(store.getRun(programId.run(UUID.randomUUID().toString())));
// Searching for history in wrong time range should give us no results
Assert.assertTrue(store.getRuns(programId, ProgramRunStatus.COMPLETED, startTimeSecs - 5000, startTimeSecs - 2000, Integer.MAX_VALUE).isEmpty());
Assert.assertTrue(store.getRuns(programId, ProgramRunStatus.ALL, startTimeSecs - 5000, startTimeSecs - 2000, Integer.MAX_VALUE).isEmpty());
}
Aggregations