Search in sources :

Example 6 with NamespacedEntityId

use of co.cask.cdap.proto.id.NamespacedEntityId in project cdap by caskdata.

the class AuditPublishTest method testPublish.

@Test
public void testPublish() throws Exception {
    String defaultNs = NamespaceId.DEFAULT.getNamespace();
    String appName = WordCountApp.class.getSimpleName();
    // Define expected values
    Set<? extends EntityId> expectedMetadataChangeEntities = ImmutableSet.of(Ids.namespace(defaultNs).artifact(WordCountApp.class.getSimpleName(), "1"), Ids.namespace(defaultNs).app(appName), Ids.namespace(defaultNs).app(appName).flow(WordCountApp.WordCountFlow.class.getSimpleName()), Ids.namespace(defaultNs).app(appName).mr(WordCountApp.VoidMapReduceJob.class.getSimpleName()), Ids.namespace(defaultNs).app(appName).service(WordCountApp.WordFrequencyService.class.getSimpleName()), Ids.namespace(defaultNs).dataset("mydataset"), Ids.namespace(defaultNs).stream("text"));
    Multimap<AuditType, EntityId> expectedAuditEntities = HashMultimap.create();
    expectedAuditEntities.putAll(AuditType.METADATA_CHANGE, expectedMetadataChangeEntities);
    expectedAuditEntities.putAll(AuditType.CREATE, ImmutableSet.of(Ids.namespace(defaultNs).dataset("mydataset"), Ids.namespace(defaultNs).stream("text")));
    // Deploy application
    AppFabricTestHelper.deployApplication(Id.Namespace.DEFAULT, WordCountApp.class, null, cConf);
    // Verify audit messages
    List<AuditMessage> publishedMessages = fetchAuditMessages();
    Multimap<AuditType, EntityId> actualAuditEntities = HashMultimap.create();
    for (AuditMessage message : publishedMessages) {
        EntityId entityId = message.getEntityId();
        if (entityId instanceof NamespacedEntityId) {
            if (((NamespacedEntityId) entityId).getNamespace().equals(NamespaceId.SYSTEM.getNamespace())) {
                // Ignore system audit messages
                continue;
            }
        }
        if (entityId.getEntityType() == EntityType.ARTIFACT && entityId instanceof ArtifactId) {
            ArtifactId artifactId = (ArtifactId) entityId;
            // Version is dynamic for deploys in test cases
            entityId = Ids.namespace(artifactId.getNamespace()).artifact(artifactId.getArtifact(), "1");
        }
        actualAuditEntities.put(message.getType(), entityId);
    }
    Assert.assertEquals(expectedAuditEntities, actualAuditEntities);
}
Also used : NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) EntityId(co.cask.cdap.proto.id.EntityId) AuditMessage(co.cask.cdap.proto.audit.AuditMessage) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ArtifactId(co.cask.cdap.proto.id.ArtifactId) AuditType(co.cask.cdap.proto.audit.AuditType) WordCountApp(co.cask.cdap.WordCountApp) Test(org.junit.Test)

Example 7 with NamespacedEntityId

use of co.cask.cdap.proto.id.NamespacedEntityId in project cdap by caskdata.

the class DefaultMetadataStore method search.

private MetadataSearchResponse search(Set<MetadataScope> scopes, String namespaceId, String searchQuery, Set<EntityTypeSimpleName> types, SortInfo sortInfo, int offset, int limit, int numCursors, String cursor, boolean showHidden, Set<EntityScope> entityScope) throws BadRequestException {
    if (offset < 0) {
        throw new IllegalArgumentException("offset must not be negative");
    }
    if (limit < 0) {
        throw new IllegalArgumentException("limit must not be negative");
    }
    List<MetadataEntry> results = new LinkedList<>();
    List<String> cursors = new LinkedList<>();
    for (MetadataScope scope : scopes) {
        SearchResults searchResults = getSearchResults(scope, namespaceId, searchQuery, types, sortInfo, offset, limit, numCursors, cursor, showHidden, entityScope);
        results.addAll(searchResults.getResults());
        cursors.addAll(searchResults.getCursors());
    }
    // sort if required
    Set<NamespacedEntityId> sortedEntities = getSortedEntities(results, sortInfo);
    int total = sortedEntities.size();
    // pagination is not performed at the dataset level, because:
    // 1. scoring is needed for DEFAULT sort info. So perform it here for now.
    // 2. Even when using custom sorting, we need to remove elements from the beginning to the offset and the cursors
    //    at the end
    // TODO: Figure out how all of this can be done server (HBase) side
    int startIndex = Math.min(offset, sortedEntities.size());
    // Account for overflow
    int endIndex = (int) Math.min(Integer.MAX_VALUE, (long) offset + limit);
    endIndex = Math.min(endIndex, sortedEntities.size());
    // add 1 to maxIndex because end index is exclusive
    sortedEntities = new LinkedHashSet<>(ImmutableList.copyOf(sortedEntities).subList(startIndex, endIndex));
    // Fetch metadata for entities in the result list
    // Note: since the fetch is happening in a different transaction, the metadata for entities may have been
    // removed. It is okay not to have metadata for some results in case this happens.
    Map<NamespacedEntityId, Metadata> systemMetadata = fetchMetadata(sortedEntities, MetadataScope.SYSTEM);
    Map<NamespacedEntityId, Metadata> userMetadata = fetchMetadata(sortedEntities, MetadataScope.USER);
    return new MetadataSearchResponse(sortInfo.getSortBy() + " " + sortInfo.getSortOrder(), offset, limit, numCursors, total, addMetadataToEntities(sortedEntities, systemMetadata, userMetadata), cursors, showHidden, entityScope);
}
Also used : Metadata(co.cask.cdap.data2.metadata.dataset.Metadata) MetadataSearchResponse(co.cask.cdap.proto.metadata.MetadataSearchResponse) SearchResults(co.cask.cdap.data2.metadata.dataset.SearchResults) LinkedList(java.util.LinkedList) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) MetadataScope(co.cask.cdap.proto.metadata.MetadataScope)

Example 8 with NamespacedEntityId

use of co.cask.cdap.proto.id.NamespacedEntityId in project cdap by caskdata.

the class DefaultMetadataStore method getSortedEntities.

private Set<NamespacedEntityId> getSortedEntities(List<MetadataEntry> results, SortInfo sortInfo) {
    // in this case, the backing storage is expected to return results in the expected order.
    if (SortInfo.SortOrder.WEIGHTED != sortInfo.getSortOrder()) {
        Set<NamespacedEntityId> entities = new LinkedHashSet<>(results.size());
        for (MetadataEntry metadataEntry : results) {
            entities.add(metadataEntry.getTargetId());
        }
        return entities;
    }
    // if sort order is weighted, score results by weight, and return in descending order of weights
    // Score results
    final Map<NamespacedEntityId, Integer> weightedResults = new HashMap<>();
    for (MetadataEntry metadataEntry : results) {
        Integer score = weightedResults.get(metadataEntry.getTargetId());
        score = (score == null) ? 0 : score;
        weightedResults.put(metadataEntry.getTargetId(), score + 1);
    }
    // Sort the results by score
    List<Map.Entry<NamespacedEntityId, Integer>> resultList = new ArrayList<>(weightedResults.entrySet());
    Collections.sort(resultList, SEARCH_RESULT_DESC_SCORE_COMPARATOR);
    Set<NamespacedEntityId> result = new LinkedHashSet<>(resultList.size());
    for (Map.Entry<NamespacedEntityId, Integer> entry : resultList) {
        result.add(entry.getKey());
    }
    return result;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MetadataEntry(co.cask.cdap.data2.metadata.dataset.MetadataEntry) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 9 with NamespacedEntityId

use of co.cask.cdap.proto.id.NamespacedEntityId in project cdap by caskdata.

the class LineageAdmin method getMetadataForRun.

/**
   * @return metadata associated with a run
   */
public Set<MetadataRecord> getMetadataForRun(ProgramRunId run) throws NotFoundException {
    entityExistenceVerifier.ensureExists(run);
    Set<NamespacedEntityId> runEntities = new HashSet<>(lineageStoreReader.getEntitiesForRun(run));
    // No entities associated with the run, but run exists.
    if (runEntities.isEmpty()) {
        return ImmutableSet.of();
    }
    RunId runId = RunIds.fromString(run.getRun());
    // The entities returned by lineageStore does not contain application
    ApplicationId application = run.getParent().getParent();
    runEntities.add(application);
    return metadataStore.getSnapshotBeforeTime(MetadataScope.USER, runEntities, RunIds.getTime(runId, TimeUnit.MILLISECONDS));
}
Also used : NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Example 10 with NamespacedEntityId

use of co.cask.cdap.proto.id.NamespacedEntityId in project cdap by caskdata.

the class LineageAdmin method getRollupRelations.

private Multimap<RelationKey, Relation> getRollupRelations(Multimap<RelationKey, Relation> relations, Map<ProgramRunId, RunRecordMeta> runRecordMap, Map<String, ProgramRunId> workflowIdMap) throws NotFoundException {
    Multimap<RelationKey, Relation> relationsNew = HashMultimap.create();
    for (Map.Entry<RelationKey, Collection<Relation>> entry : relations.asMap().entrySet()) {
        for (Relation relation : entry.getValue()) {
            ProgramRunId workflowProgramRunId = getWorkflowProgramRunid(relation, runRecordMap, workflowIdMap);
            if (workflowProgramRunId == null) {
                relationsNew.put(entry.getKey(), relation);
            } else {
                ProgramId workflowProgramId = new ProgramId(workflowProgramRunId.getNamespace(), workflowProgramRunId.getApplication(), workflowProgramRunId.getType(), workflowProgramRunId.getProgram());
                Relation workflowRelation;
                NamespacedEntityId data = relation.getData();
                if (data instanceof DatasetId) {
                    workflowRelation = new Relation((DatasetId) data, workflowProgramId, relation.getAccess(), RunIds.fromString(workflowProgramRunId.getRun()));
                } else {
                    workflowRelation = new Relation((StreamId) data, workflowProgramId, relation.getAccess(), RunIds.fromString(workflowProgramRunId.getRun()));
                }
                relationsNew.put(entry.getKey(), workflowRelation);
            }
        }
    }
    return relationsNew;
}
Also used : Relation(co.cask.cdap.data2.metadata.lineage.Relation) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) StreamId(co.cask.cdap.proto.id.StreamId) Collection(java.util.Collection) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramId(co.cask.cdap.proto.id.ProgramId) HashMap(java.util.HashMap) Map(java.util.Map) DatasetId(co.cask.cdap.proto.id.DatasetId)

Aggregations

NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)23 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)9 ProgramId (co.cask.cdap.proto.id.ProgramId)6 HashMap (java.util.HashMap)6 StreamId (co.cask.cdap.proto.id.StreamId)5 HashSet (java.util.HashSet)5 RunId (org.apache.twill.api.RunId)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4 Row (co.cask.cdap.api.dataset.table.Row)3 Scanner (co.cask.cdap.api.dataset.table.Scanner)3 Metadata (co.cask.cdap.data2.metadata.dataset.Metadata)3 AuditMessage (co.cask.cdap.proto.audit.AuditMessage)3 DatasetId (co.cask.cdap.proto.id.DatasetId)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 Collection (java.util.Collection)3 LinkedHashSet (java.util.LinkedHashSet)3 Test (org.junit.Test)3 MethodArgument (co.cask.cdap.common.internal.remote.MethodArgument)2 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)2