Search in sources :

Example 26 with ProgramId

use of io.cdap.cdap.proto.id.ProgramId in project cdap by caskdata.

the class LocalPreferencesFetcherInternal method get.

/**
 * Get preferences for the given identify
 */
public PreferencesDetail get(EntityId entityId, boolean resolved) {
    final PreferencesService service = preferencesService;
    PreferencesDetail detail = null;
    switch(entityId.getEntityType()) {
        case INSTANCE:
            detail = resolved ? service.getResolvedPreferences() : service.getPreferences();
            break;
        case NAMESPACE:
            NamespaceId namespaceId = (NamespaceId) entityId;
            detail = resolved ? service.getResolvedPreferences(namespaceId) : service.getPreferences(namespaceId);
            break;
        case APPLICATION:
            ApplicationId appId = (ApplicationId) entityId;
            detail = resolved ? service.getResolvedPreferences(appId) : service.getPreferences(appId);
            break;
        case PROGRAM:
            ProgramId programId = (ProgramId) entityId;
            detail = resolved ? service.getResolvedPreferences(programId) : service.getPreferences(programId);
            break;
        default:
            throw new UnsupportedOperationException(String.format("Preferences cannot be used on this entity type: %s", entityId.getEntityType()));
    }
    return detail;
}
Also used : PreferencesDetail(io.cdap.cdap.proto.PreferencesDetail) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) ProgramId(io.cdap.cdap.proto.id.ProgramId) PreferencesService(io.cdap.cdap.config.PreferencesService)

Example 27 with ProgramId

use of io.cdap.cdap.proto.id.ProgramId in project cdap by caskdata.

the class LineageAdmin method extractAndAddInnerPrograms.

/**
 * Extract inner programs and runs from the workflow run record, the run record's properties have all the
 * inner program run ids. The workflow spec can then be used to determine what the inner programs are and
 * create the program run ids for them
 */
private void extractAndAddInnerPrograms(Set<ProgramId> toVisitPrograms, Map<ProgramRunId, ProgramRunId> programWorkflowMap, Map<ApplicationId, ApplicationSpecification> appSpecs, ProgramRunId programRunId, RunRecordDetail wfRunRecord) {
    ApplicationId appId = programRunId.getParent().getParent();
    WorkflowSpecification workflowSpec = appSpecs.get(appId).getWorkflows().get(programRunId.getProgram());
    Map<String, WorkflowNode> nodeIdMap = workflowSpec.getNodeIdMap();
    wfRunRecord.getProperties().forEach((key, value) -> {
        if (nodeIdMap.containsKey(key)) {
            WorkflowActionNode node = (WorkflowActionNode) nodeIdMap.get(key);
            ProgramType type = ProgramType.valueOf(node.getProgram().getProgramType().name());
            ProgramId program = appId.program(type, key);
            programWorkflowMap.put(program.run(value), programRunId);
            toVisitPrograms.add(program);
        }
    });
}
Also used : WorkflowActionNode(io.cdap.cdap.api.workflow.WorkflowActionNode) WorkflowSpecification(io.cdap.cdap.api.workflow.WorkflowSpecification) ProgramType(io.cdap.cdap.proto.ProgramType) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) ProgramId(io.cdap.cdap.proto.id.ProgramId) WorkflowNode(io.cdap.cdap.api.workflow.WorkflowNode)

Example 28 with ProgramId

use of io.cdap.cdap.proto.id.ProgramId in project cdap by caskdata.

the class LineageAdmin method doComputeLineage.

private Lineage doComputeLineage(DatasetId sourceData, long startMillis, long endMillis, int levels, @Nullable String rollup) {
    LOG.trace("Computing lineage for data {}, startMillis {}, endMillis {}, levels {}", sourceData, startMillis, endMillis, levels);
    boolean rollUpWorkflow = rollup != null && rollup.contains("workflow");
    // Convert start time and end time period into scan keys in terms of program start times.
    Set<RunId> runningInRange = store.getRunningInRange(TimeUnit.MILLISECONDS.toSeconds(startMillis), TimeUnit.MILLISECONDS.toSeconds(endMillis));
    LOG.trace("Got {} rundIds in time range ({}, {})", runningInRange.size(), startMillis, endMillis);
    ScanRangeWithFilter scanRange = getScanRange(runningInRange);
    LOG.trace("Using scan start = {}, scan end = {}", scanRange.getStart(), scanRange.getEnd());
    Multimap<RelationKey, Relation> relations = HashMultimap.create();
    Set<DatasetId> visitedDatasets = new HashSet<>();
    Set<DatasetId> toVisitDatasets = new HashSet<>();
    Set<ProgramId> visitedPrograms = new HashSet<>();
    Set<ProgramId> toVisitPrograms = new HashSet<>();
    // this map is to map the inner program run id to the workflow run id, this is needed to collapse the inner
    // program and local datasets
    Map<ProgramRunId, ProgramRunId> programWorkflowMap = new HashMap<>();
    toVisitDatasets.add(sourceData);
    for (int i = 0; i < levels; ++i) {
        LOG.trace("Level {}", i);
        toVisitPrograms.clear();
        for (DatasetId d : toVisitDatasets) {
            if (visitedDatasets.add(d)) {
                LOG.trace("Visiting dataset {}", d);
                // Fetch related programs, the programs will be the inner programs which access the datasets. For example,
                // mapreduce or spark program in a workflow
                Set<Relation> programRelations = lineageStoreReader.getRelations(d, scanRange.getStart(), scanRange.getEnd(), scanRange.getFilter());
                LOG.trace("Got program relations {}", programRelations);
                // determine if a dataset is local dataset. The local dataset always ends with the workflow run id
                if (rollUpWorkflow) {
                    computeWorkflowInnerPrograms(toVisitPrograms, programWorkflowMap, programRelations);
                }
                // add to the relations, replace the inner program with the workflow using the map, ignore the
                // local datasets relations, the local dataset always ends with the run id of the workflow
                filterAndAddRelations(rollUpWorkflow, relations, programWorkflowMap, programRelations);
                toVisitPrograms.addAll(programRelations.stream().map(Relation::getProgram).collect(Collectors.toSet()));
            }
        }
        toVisitDatasets.clear();
        for (ProgramId p : toVisitPrograms) {
            if (visitedPrograms.add(p)) {
                LOG.trace("Visiting program {}", p);
                // Fetch related datasets
                Set<Relation> datasetRelations = lineageStoreReader.getRelations(p, scanRange.getStart(), scanRange.getEnd(), scanRange.getFilter());
                LOG.trace("Got data relations {}", datasetRelations);
                Set<DatasetId> localDatasets = filterAndAddRelations(rollUpWorkflow, relations, programWorkflowMap, datasetRelations);
                toVisitDatasets.addAll(datasetRelations.stream().map(relation -> (DatasetId) relation.getData()).filter(datasetId -> !localDatasets.contains(datasetId)).collect(Collectors.toSet()));
            }
        }
    }
    Lineage lineage = new Lineage(Iterables.concat(Maps.transformValues(relations.asMap(), COLLAPSE_UNKNOWN_TYPE_FUNCTION::apply).values()));
    LOG.trace("Got lineage {}", lineage);
    return lineage;
}
Also used : DefaultLineageStoreReader(io.cdap.cdap.data2.metadata.lineage.DefaultLineageStoreReader) RunRecordDetail(io.cdap.cdap.internal.app.store.RunRecordDetail) Iterables(com.google.common.collect.Iterables) WorkflowId(io.cdap.cdap.proto.id.WorkflowId) WorkflowSpecification(io.cdap.cdap.api.workflow.WorkflowSpecification) Inject(com.google.inject.Inject) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Collections2(com.google.common.collect.Collections2) Multimap(com.google.common.collect.Multimap) ProgramType(io.cdap.cdap.proto.ProgramType) Function(java.util.function.Function) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) HashSet(java.util.HashSet) WorkflowNode(io.cdap.cdap.api.workflow.WorkflowNode) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) HashMultimap(com.google.common.collect.HashMultimap) DatasetId(io.cdap.cdap.proto.id.DatasetId) Map(java.util.Map) RunId(org.apache.twill.api.RunId) WorkflowActionNode(io.cdap.cdap.api.workflow.WorkflowActionNode) AccessType(io.cdap.cdap.data2.metadata.lineage.AccessType) Nullable(javax.annotation.Nullable) Logger(org.slf4j.Logger) RunIds(io.cdap.cdap.common.app.RunIds) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) Predicate(java.util.function.Predicate) Collection(java.util.Collection) ApplicationSpecification(io.cdap.cdap.api.app.ApplicationSpecification) ProgramId(io.cdap.cdap.proto.id.ProgramId) Set(java.util.Set) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) Store(io.cdap.cdap.app.store.Store) Objects(java.util.Objects) TimeUnit(java.util.concurrent.TimeUnit) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LineageStoreReader(io.cdap.cdap.data2.metadata.lineage.LineageStoreReader) ProgramOptionConstants(io.cdap.cdap.internal.app.runtime.ProgramOptionConstants) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashMap(java.util.HashMap) Lineage(io.cdap.cdap.data2.metadata.lineage.Lineage) ProgramId(io.cdap.cdap.proto.id.ProgramId) DatasetId(io.cdap.cdap.proto.id.DatasetId) Relation(io.cdap.cdap.data2.metadata.lineage.Relation) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) ProgramRunId(io.cdap.cdap.proto.id.ProgramRunId) RunId(org.apache.twill.api.RunId) HashSet(java.util.HashSet)

Example 29 with ProgramId

use of io.cdap.cdap.proto.id.ProgramId in project cdap by caskdata.

the class RemotePreferencesFetcherInternal method getPreferencesURI.

/**
 * Construct URI to fetch preferences depending on the type of supplied entity
 */
private String getPreferencesURI(EntityId entityId, boolean resolved) {
    String uri;
    switch(entityId.getEntityType()) {
        case INSTANCE:
            uri = "preferences";
            break;
        case NAMESPACE:
            NamespaceId namespaceId = (NamespaceId) entityId;
            uri = String.format("namespaces/%s/preferences", namespaceId.getNamespace());
            break;
        case APPLICATION:
            ApplicationId appId = (ApplicationId) entityId;
            uri = String.format("namespaces/%s/apps/%s/preferences", appId.getNamespace(), appId.getApplication());
            break;
        case PROGRAM:
            ProgramId programId = (ProgramId) entityId;
            uri = String.format("namespaces/%s/apps/%s/%s/%s/preferences", programId.getNamespace(), programId.getApplication(), programId.getType().getCategoryName(), programId.getProgram());
            break;
        default:
            throw new UnsupportedOperationException(String.format("Preferences cannot be used on this entity type: %s", entityId.getEntityType()));
    }
    if (resolved) {
        uri += "?resolved=true";
    }
    return uri;
}
Also used : NamespaceId(io.cdap.cdap.proto.id.NamespaceId) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) ProgramId(io.cdap.cdap.proto.id.ProgramId)

Example 30 with ProgramId

use of io.cdap.cdap.proto.id.ProgramId in project cdap by caskdata.

the class WorkflowStatsSLAHttpHandlerTest method testStatistics.

@Test
public void testStatistics() throws Exception {
    deploy(WorkflowApp.class, 200);
    String workflowName = "FunWorkflow";
    String mapreduceName = "ClassicWordCount";
    String sparkName = "SparkWorkflowTest";
    ProgramId workflowProgram = WORKFLOW_APP.workflow(workflowName);
    ProgramId mapreduceProgram = WORKFLOW_APP.mr(mapreduceName);
    ProgramId sparkProgram = WORKFLOW_APP.spark(sparkName);
    ArtifactId artifactId = WORKFLOW_APP.getNamespaceId().artifact("testArtifact", "1.0").toApiArtifactId();
    long startTime = System.currentTimeMillis();
    long currentTimeMillis = startTime;
    String outlierRunId = null;
    for (int i = 0; i < 10; i++) {
        // workflow runs every 5 minutes
        currentTimeMillis = startTime + (i * TimeUnit.MINUTES.toMillis(5));
        RunId workflowRunId = RunIds.generate(currentTimeMillis);
        setStartAndRunning(workflowProgram, workflowRunId.getId(), artifactId);
        // MR job starts 2 seconds after workflow started
        RunId mapreduceRunid = RunIds.generate(currentTimeMillis + TimeUnit.SECONDS.toMillis(2));
        Map<String, String> systemArgs = ImmutableMap.of(ProgramOptionConstants.WORKFLOW_NODE_ID, mapreduceName, ProgramOptionConstants.WORKFLOW_NAME, workflowName, ProgramOptionConstants.WORKFLOW_RUN_ID, workflowRunId.getId());
        setStartAndRunning(mapreduceProgram, mapreduceRunid.getId(), ImmutableMap.of(), systemArgs, artifactId);
        store.setStop(mapreduceProgram.run(mapreduceRunid), // map-reduce job ran for 17 seconds
        TimeUnit.MILLISECONDS.toSeconds(currentTimeMillis) + 19, ProgramRunStatus.COMPLETED, AppFabricTestHelper.createSourceId(++sourceId));
        // This makes sure that not all runs have Spark programs in them
        if (i < 5) {
            // spark starts 20 seconds after workflow starts
            RunId sparkRunid = RunIds.generate(currentTimeMillis + TimeUnit.SECONDS.toMillis(20));
            systemArgs = ImmutableMap.of(ProgramOptionConstants.WORKFLOW_NODE_ID, sparkProgram.getProgram(), ProgramOptionConstants.WORKFLOW_NAME, workflowName, ProgramOptionConstants.WORKFLOW_RUN_ID, workflowRunId.getId());
            setStartAndRunning(sparkProgram, sparkRunid.getId(), ImmutableMap.of(), systemArgs, artifactId);
            // spark job runs for 38 seconds
            long stopTime = TimeUnit.MILLISECONDS.toSeconds(currentTimeMillis) + 58;
            if (i == 4) {
                // spark job ran for 100 seconds. 62 seconds greater than avg.
                stopTime = TimeUnit.MILLISECONDS.toSeconds(currentTimeMillis) + 120;
            }
            store.setStop(sparkProgram.run(sparkRunid.getId()), stopTime, ProgramRunStatus.COMPLETED, AppFabricTestHelper.createSourceId(++sourceId));
        }
        // workflow ran for 1 minute
        long workflowStopTime = TimeUnit.MILLISECONDS.toSeconds(currentTimeMillis) + 60;
        if (i == 4) {
            // spark job ran longer for this run
            workflowStopTime = TimeUnit.MILLISECONDS.toSeconds(currentTimeMillis) + 122;
            outlierRunId = workflowRunId.getId();
        }
        store.setStop(workflowProgram.run(workflowRunId.getId()), workflowStopTime, ProgramRunStatus.COMPLETED, AppFabricTestHelper.createSourceId(++sourceId));
    }
    String request = String.format("%s/namespaces/%s/apps/%s/workflows/%s/statistics?start=%s&end=%s" + "&percentile=%s", Constants.Gateway.API_VERSION_3, Id.Namespace.DEFAULT.getId(), WorkflowApp.class.getSimpleName(), workflowProgram.getProgram(), TimeUnit.MILLISECONDS.toSeconds(startTime), TimeUnit.MILLISECONDS.toSeconds(currentTimeMillis) + TimeUnit.MINUTES.toSeconds(2), "99");
    HttpResponse response = doGet(request);
    WorkflowStatistics workflowStatistics = readResponse(response, new TypeToken<WorkflowStatistics>() {
    }.getType());
    PercentileInformation percentileInformation = workflowStatistics.getPercentileInformationList().get(0);
    Assert.assertEquals(1, percentileInformation.getRunIdsOverPercentile().size());
    Assert.assertEquals(outlierRunId, percentileInformation.getRunIdsOverPercentile().get(0));
    Assert.assertEquals("5", workflowStatistics.getNodes().get(sparkName).get("runs"));
    request = String.format("%s/namespaces/%s/apps/%s/workflows/%s/statistics?start=%s&end=%s" + "&percentile=%s&percentile=%s", Constants.Gateway.API_VERSION_3, Id.Namespace.DEFAULT.getId(), WorkflowApp.class.getSimpleName(), workflowProgram.getProgram(), "now", "0", "90", "95");
    response = doGet(request);
    Assert.assertEquals(HttpResponseStatus.BAD_REQUEST.code(), response.getResponseCode());
    request = String.format("%s/namespaces/%s/apps/%s/workflows/%s/statistics?start=%s&end=%s" + "&percentile=%s&percentile=%s", Constants.Gateway.API_VERSION_3, Id.Namespace.DEFAULT.getId(), WorkflowApp.class.getSimpleName(), workflowProgram.getProgram(), "now", "0", "90.0", "950");
    response = doGet(request);
    Assert.assertEquals(HttpResponseStatus.BAD_REQUEST.code(), response.getResponseCode());
    Id.Application appId = new Id.Application(Id.Namespace.DEFAULT, WorkflowApp.class.getSimpleName());
    deleteApp(appId, HttpResponseStatus.OK.code());
    request = String.format("%s/namespaces/%s/apps/%s/workflows/%s/statistics?start=%s&end=%s" + "&percentile=%s", Constants.Gateway.API_VERSION_3, Id.Namespace.DEFAULT.getId(), WorkflowApp.class.getSimpleName(), workflowProgram, 0, System.currentTimeMillis(), "99");
    response = doGet(request);
    Assert.assertEquals(HttpResponseStatus.OK.code(), response.getResponseCode());
    Assert.assertTrue(response.getResponseBodyAsString().startsWith("There are no statistics associated with this workflow : "));
}
Also used : WorkflowApp(io.cdap.cdap.WorkflowApp) ArtifactId(io.cdap.cdap.api.artifact.ArtifactId) HttpResponse(io.cdap.common.http.HttpResponse) ProgramId(io.cdap.cdap.proto.id.ProgramId) WorkflowStatistics(io.cdap.cdap.proto.WorkflowStatistics) PercentileInformation(io.cdap.cdap.proto.PercentileInformation) TypeToken(com.google.gson.reflect.TypeToken) WorkflowId(io.cdap.cdap.proto.id.WorkflowId) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) RunId(org.apache.twill.api.RunId) ProgramId(io.cdap.cdap.proto.id.ProgramId) Id(io.cdap.cdap.common.id.Id) ProfileId(io.cdap.cdap.proto.id.ProfileId) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) ArtifactId(io.cdap.cdap.api.artifact.ArtifactId) RunId(org.apache.twill.api.RunId) Test(org.junit.Test)

Aggregations

ProgramId (io.cdap.cdap.proto.id.ProgramId)562 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)277 Test (org.junit.Test)268 ProgramRunId (io.cdap.cdap.proto.id.ProgramRunId)164 NamespaceId (io.cdap.cdap.proto.id.NamespaceId)130 RunId (org.apache.twill.api.RunId)118 ApplicationSpecification (io.cdap.cdap.api.app.ApplicationSpecification)110 ProgramType (io.cdap.cdap.proto.ProgramType)108 HashMap (java.util.HashMap)88 HashSet (java.util.HashSet)78 ArrayList (java.util.ArrayList)76 Id (io.cdap.cdap.common.id.Id)74 IOException (java.io.IOException)74 File (java.io.File)70 RunRecord (io.cdap.cdap.proto.RunRecord)68 Path (javax.ws.rs.Path)68 ArtifactId (io.cdap.cdap.api.artifact.ArtifactId)66 NotFoundException (io.cdap.cdap.common.NotFoundException)66 Map (java.util.Map)64 Set (java.util.Set)62