use of io.cdap.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class AppMetadataStore method delete.
private void delete(RunRecordDetail record) throws IOException {
ProgramRunId programRunId = record.getProgramRunId();
List<Field<?>> key = getProgramRunInvertedTimeKey(STATUS_TYPE_MAP.get(record.getStatus()), programRunId, record.getStartTs());
getRunRecordsTable().delete(key);
}
use of io.cdap.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class AppMetadataStore method addWorkflowNodeState.
private void addWorkflowNodeState(ProgramRunId programRunId, Map<String, String> systemArgs, ProgramRunStatus status, @Nullable BasicThrowable failureCause, byte[] sourceId) throws IOException {
String workflowNodeId = systemArgs.get(ProgramOptionConstants.WORKFLOW_NODE_ID);
String workflowName = systemArgs.get(ProgramOptionConstants.WORKFLOW_NAME);
String workflowRun = systemArgs.get(ProgramOptionConstants.WORKFLOW_RUN_ID);
ApplicationId appId = programRunId.getParent().getParent();
ProgramRunId workflowRunId = appId.workflow(workflowName).run(workflowRun);
// Get the run record of the Workflow which started this program
List<Field<?>> runRecordFields = getProgramRunInvertedTimeKey(TYPE_RUN_RECORD_ACTIVE, workflowRunId, RunIds.getTime(workflowRun, TimeUnit.SECONDS));
RunRecordDetail record = getRunRecordsTable().read(runRecordFields).map(AppMetadataStore::deserializeRunRecordMeta).orElse(null);
// If the workflow is gone, just ignore the update
if (record == null) {
return;
}
List<Field<?>> primaryKeys = getWorkflowPrimaryKeys(workflowRunId, workflowNodeId);
WorkflowNodeStateDetail nodeState = getWorkflowNodeStateTable().read(primaryKeys).map(r -> r.getString(StoreDefinition.AppMetadataStore.NODE_STATE_DATA)).map(f -> GSON.fromJson(f, WorkflowNodeStateDetail.class)).orElse(null);
// - the program runId is the same as the existing workflow state
if (status == ProgramRunStatus.STARTING || nodeState == null || programRunId.getRun().equals(nodeState.getRunId())) {
WorkflowNodeStateDetail nodeStateDetail = new WorkflowNodeStateDetail(workflowNodeId, ProgramRunStatus.toNodeStatus(status), programRunId.getRun(), failureCause);
writeToStructuredTableWithPrimaryKeys(primaryKeys, nodeStateDetail, getWorkflowNodeStateTable(), StoreDefinition.AppMetadataStore.NODE_STATE_DATA);
// Update the parent Workflow run record by adding node id and program run id in the properties
Map<String, String> properties = new HashMap<>(record.getProperties());
properties.put(workflowNodeId, programRunId.getRun());
writeToRunRecordTableWithPrimaryKeys(runRecordFields, RunRecordDetail.builder(record).setProperties(properties).setSourceId(sourceId).build());
}
}
use of io.cdap.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class AppMetadataStore method getCompletedRuns.
private Map<ProgramRunId, RunRecordDetail> getCompletedRuns(Set<ProgramRunId> programRunIds) throws IOException {
List<List<Field<?>>> allKeys = new ArrayList<>();
for (ProgramRunId programRunId : programRunIds) {
List<Field<?>> keys = getRunRecordProgramPrefix(TYPE_RUN_RECORD_COMPLETED, programRunId.getParent());
// Get start time from RunId
long programStartSecs = RunIds.getTime(RunIds.fromString(programRunId.getRun()), TimeUnit.SECONDS);
keys.add(Fields.longField(StoreDefinition.AppMetadataStore.RUN_START_TIME, getInvertedTsKeyPart(programStartSecs)));
keys.add(Fields.stringField(StoreDefinition.AppMetadataStore.RUN_FIELD, programRunId.getRun()));
allKeys.add(keys);
}
return getRunRecordsTable().multiRead(allKeys).stream().map(AppMetadataStore::deserializeRunRecordMeta).collect(Collectors.toMap(RunRecordDetail::getProgramRunId, r -> r, (r1, r2) -> {
throw new IllegalStateException("Duplicate run record for " + r1.getProgramRunId());
}, LinkedHashMap::new));
}
use of io.cdap.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class RunRecordCorrectorService method doFixRunRecords.
/**
* Fix all the possible inconsistent states for RunRecords that shows it is in RUNNING state but actually not
* via check to {@link ProgramRuntimeService} for a type of CDAP program.
*
* @return the set of fixed {@link ProgramRunId}.
*/
private Set<ProgramRunId> doFixRunRecords() {
LOG.trace("Start getting run records not actually running ...");
// Get run records in STARTING, RUNNING and SUSPENDED states that are actually not running
// Do it in micro batches of transactions to avoid tx timeout
Set<ProgramRunId> fixedPrograms = new HashSet<>();
Predicate<RunRecordDetail> filter = createFilter(fixedPrograms);
for (ProgramRunStatus status : NOT_STOPPED_STATUSES) {
while (true) {
// runs are not guaranteed to come back in order of start time, so need to scan the entire time range
// each time. Should not be worse in performance than specifying a more restrictive time range
// because time range is just used as a read-time filter.
Map<ProgramRunId, RunRecordDetail> runs = store.getRuns(status, 0L, Long.MAX_VALUE, txBatchSize, filter);
LOG.trace("{} run records in {} state but are not actually running", runs.size(), status);
if (runs.isEmpty()) {
break;
}
for (RunRecordDetail record : runs.values()) {
ProgramRunId programRunId = record.getProgramRunId();
String msg = String.format("Fixed RunRecord for program run %s in %s state because it is actually not running", programRunId, record.getStatus());
programStateWriter.error(programRunId, new ProgramRunAbortedException(msg));
fixedPrograms.add(programRunId);
LOG.warn(msg);
}
}
}
if (fixedPrograms.isEmpty()) {
LOG.trace("No RunRecord found with status in {}, but the program are not actually running", NOT_STOPPED_STATUSES);
} else {
LOG.warn("Fixed {} RunRecords with status in {}, but the programs are not actually running", fixedPrograms.size(), NOT_STOPPED_STATUSES);
}
return fixedPrograms;
}
use of io.cdap.cdap.proto.id.ProgramRunId in project cdap by caskdata.
the class DirectRuntimeRequestValidatorTest method testFetcher.
@Test
public void testFetcher() throws BadRequestException {
ArtifactId artifactId = new ArtifactId("test", new ArtifactVersion("1.0"), ArtifactScope.USER);
ProgramRunId programRunId = NamespaceId.DEFAULT.app("app").spark("spark").run(RunIds.generate());
ProgramRunStatus programRunStatus = ProgramRunStatus.RUNNING;
RunRecordDetail runRecord = RunRecordDetail.builder().setProgramRunId(programRunId).setStartTime(System.currentTimeMillis()).setArtifactId(artifactId).setStatus(programRunStatus).setSystemArgs(ImmutableMap.of(SystemArguments.PROFILE_NAME, "default", SystemArguments.PROFILE_PROVISIONER, "native")).setProfileId(NamespaceId.DEFAULT.profile("native")).setSourceId(new byte[MessageId.RAW_ID_SIZE]).build();
MockProgramRunRecordFetcher runRecordFetcher = new MockProgramRunRecordFetcher().setRunRecord(runRecord);
RuntimeRequestValidator validator = new DirectRuntimeRequestValidator(cConf, txRunner, runRecordFetcher, accessEnforcer, authenticationContext);
// The first call should be hitting the run record fetching to fetch the run record.
ProgramRunInfo programRunInfo = validator.getProgramRunStatus(programRunId, new DefaultHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.GET, "/"));
Assert.assertEquals(programRunStatus, programRunInfo.getProgramRunStatus());
// The second call will hit the runtime store, so it shouldn't matter what the run record fetch returns
runRecordFetcher.setRunRecord(null);
programRunInfo = validator.getProgramRunStatus(programRunId, new DefaultHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.GET, "/"));
Assert.assertEquals(programRunStatus, programRunInfo.getProgramRunStatus());
}
Aggregations