Search in sources :

Example 11 with NamespacedEntityId

use of co.cask.cdap.proto.id.NamespacedEntityId in project cdap by caskdata.

the class MetadataHttpHandlerTestRun method testSearchMetadata.

@Test
public void testSearchMetadata() throws Exception {
    appClient.deploy(NamespaceId.DEFAULT, createAppJarFile(AllProgramsApp.class));
    Map<NamespacedEntityId, Metadata> expectedUserMetadata = new HashMap<>();
    // Add metadata to app
    Map<String, String> props = ImmutableMap.of("key1", "value1");
    Set<String> tags = ImmutableSet.of("tag1", "tag2");
    ApplicationId appId = NamespaceId.DEFAULT.app(AllProgramsApp.NAME);
    addProperties(appId, props);
    addTags(appId, tags);
    expectedUserMetadata.put(appId, new Metadata(props, tags));
    // Add metadata to stream
    props = ImmutableMap.of("key10", "value10", "key11", "value11");
    tags = ImmutableSet.of("tag11");
    StreamId streamId = NamespaceId.DEFAULT.stream(AllProgramsApp.STREAM_NAME);
    addProperties(streamId, props);
    addTags(streamId, tags);
    expectedUserMetadata.put(streamId, new Metadata(props, tags));
    Set<MetadataSearchResultRecord> results = super.searchMetadata(NamespaceId.DEFAULT, "value*", ImmutableSet.<EntityTypeSimpleName>of());
    // Verify results
    Assert.assertEquals(expectedUserMetadata.keySet(), getEntities(results));
    for (MetadataSearchResultRecord result : results) {
        // User metadata has to match exactly since we know what we have set
        Assert.assertEquals(expectedUserMetadata.get(result.getEntityId()), result.getMetadata().get(MetadataScope.USER));
        // Make sure system metadata is returned, we cannot check for exact match since we haven't set it
        Metadata systemMetadata = result.getMetadata().get(MetadataScope.SYSTEM);
        Assert.assertNotNull(systemMetadata);
        Assert.assertFalse(systemMetadata.getProperties().isEmpty());
        Assert.assertFalse(systemMetadata.getTags().isEmpty());
    }
}
Also used : NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) StreamId(co.cask.cdap.proto.id.StreamId) HashMap(java.util.HashMap) MetadataSearchResultRecord(co.cask.cdap.proto.metadata.MetadataSearchResultRecord) Metadata(co.cask.cdap.proto.metadata.Metadata) AllProgramsApp(co.cask.cdap.client.app.AllProgramsApp) ApplicationId(co.cask.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 12 with NamespacedEntityId

use of co.cask.cdap.proto.id.NamespacedEntityId in project cdap by caskdata.

the class StreamAdminTest method testAuditPublish.

@Test
public void testAuditPublish() throws Exception {
    grantAndAssertSuccess(FOO_NAMESPACE, USER, EnumSet.allOf(Action.class));
    // clear existing all messages
    getInMemoryAuditPublisher().popMessages();
    final List<AuditMessage> expectedMessages = new ArrayList<>();
    StreamAdmin streamAdmin = getStreamAdmin();
    StreamId stream1 = FOO_NAMESPACE.stream("stream1");
    streamAdmin.create(stream1);
    expectedMessages.add(new AuditMessage(0, stream1, "", AuditType.CREATE, AuditPayload.EMPTY_PAYLOAD));
    StreamId stream2 = FOO_NAMESPACE.stream("stream2");
    streamAdmin.create(stream2);
    expectedMessages.add(new AuditMessage(0, stream2, "", AuditType.CREATE, AuditPayload.EMPTY_PAYLOAD));
    streamAdmin.truncate(stream1);
    expectedMessages.add(new AuditMessage(0, stream1, "", AuditType.TRUNCATE, AuditPayload.EMPTY_PAYLOAD));
    streamAdmin.updateConfig(stream1, new StreamProperties(100L, new FormatSpecification("f", null), 100));
    expectedMessages.add(new AuditMessage(0, stream1, "", AuditType.UPDATE, AuditPayload.EMPTY_PAYLOAD));
    ProgramRunId run = new ProgramId("ns1", "app", ProgramType.FLOW, "flw").run(RunIds.generate().getId());
    streamAdmin.addAccess(run, stream1, AccessType.READ);
    expectedMessages.add(new AuditMessage(0, stream1, "", AuditType.ACCESS, new AccessPayload(co.cask.cdap.proto.audit.payload.access.AccessType.READ, run)));
    streamAdmin.drop(stream1);
    expectedMessages.add(new AuditMessage(0, stream1, "", AuditType.DELETE, AuditPayload.EMPTY_PAYLOAD));
    streamAdmin.dropAllInNamespace(FOO_NAMESPACE);
    expectedMessages.add(new AuditMessage(0, stream2, "", AuditType.DELETE, AuditPayload.EMPTY_PAYLOAD));
    // Ignore audit messages for system namespace (creation of system datasets, etc)
    final String systemNs = NamespaceId.SYSTEM.getNamespace();
    final Iterable<AuditMessage> actualMessages = Iterables.filter(getInMemoryAuditPublisher().popMessages(), new Predicate<AuditMessage>() {

        @Override
        public boolean apply(AuditMessage input) {
            return !(input.getEntityId() instanceof NamespacedEntityId && ((NamespacedEntityId) input.getEntityId()).getNamespace().equals(systemNs));
        }
    });
    Assert.assertEquals(expectedMessages, Lists.newArrayList(actualMessages));
}
Also used : Action(co.cask.cdap.proto.security.Action) AuditMessage(co.cask.cdap.proto.audit.AuditMessage) StreamId(co.cask.cdap.proto.id.StreamId) ArrayList(java.util.ArrayList) StreamProperties(co.cask.cdap.proto.StreamProperties) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) ProgramId(co.cask.cdap.proto.id.ProgramId) AccessPayload(co.cask.cdap.proto.audit.payload.access.AccessPayload) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) Test(org.junit.Test)

Example 13 with NamespacedEntityId

use of co.cask.cdap.proto.id.NamespacedEntityId in project cdap by caskdata.

the class LineageAdmin method doComputeLineage.

private Lineage doComputeLineage(final NamespacedEntityId sourceData, long startMillis, long endMillis, int levels, @Nullable String rollup) throws NotFoundException {
    LOG.trace("Computing lineage for data {}, startMillis {}, endMillis {}, levels {}", sourceData, startMillis, endMillis, levels);
    // Convert start time and end time period into scan keys in terms of program start times.
    Set<RunId> runningInRange = store.getRunningInRange(TimeUnit.MILLISECONDS.toSeconds(startMillis), TimeUnit.MILLISECONDS.toSeconds(endMillis));
    if (LOG.isTraceEnabled()) {
        LOG.trace("Got {} rundIds in time range ({}, {})", runningInRange.size(), startMillis, endMillis);
    }
    ScanRangeWithFilter scanRange = getScanRange(runningInRange);
    LOG.trace("Using scan start = {}, scan end = {}", scanRange.getStart(), scanRange.getEnd());
    Multimap<RelationKey, Relation> relations = HashMultimap.create();
    Set<NamespacedEntityId> visitedDatasets = new HashSet<>();
    Set<NamespacedEntityId> toVisitDatasets = new HashSet<>();
    Set<ProgramId> visitedPrograms = new HashSet<>();
    Set<ProgramId> toVisitPrograms = new HashSet<>();
    toVisitDatasets.add(sourceData);
    for (int i = 0; i < levels; ++i) {
        LOG.trace("Level {}", i);
        toVisitPrograms.clear();
        for (NamespacedEntityId d : toVisitDatasets) {
            if (visitedDatasets.add(d)) {
                LOG.trace("Visiting dataset {}", d);
                // Fetch related programs
                Iterable<Relation> programRelations = getProgramRelations(d, scanRange.getStart(), scanRange.getEnd(), scanRange.getFilter());
                LOG.trace("Got program relations {}", programRelations);
                for (Relation relation : programRelations) {
                    relations.put(new RelationKey(relation), relation);
                }
                Iterables.addAll(toVisitPrograms, Iterables.transform(programRelations, RELATION_TO_PROGRAM_FUNCTION));
            }
        }
        toVisitDatasets.clear();
        for (ProgramId p : toVisitPrograms) {
            if (visitedPrograms.add(p)) {
                LOG.trace("Visiting program {}", p);
                // Fetch related datasets
                Iterable<Relation> datasetRelations = lineageStoreReader.getRelations(p, scanRange.getStart(), scanRange.getEnd(), scanRange.getFilter());
                LOG.trace("Got data relations {}", datasetRelations);
                for (Relation relation : datasetRelations) {
                    relations.put(new RelationKey(relation), relation);
                }
                Iterables.addAll(toVisitDatasets, Iterables.transform(datasetRelations, RELATION_TO_DATA_FUNCTION));
            }
        }
    }
    if (rollup != null && rollup.contains("workflow")) {
        relations = doComputeRollupLineage(relations);
    }
    Lineage lineage = new Lineage(Iterables.concat(Maps.transformValues(relations.asMap(), COLLAPSE_UNKNOWN_TYPE_FUNCTION).values()));
    LOG.trace("Got lineage {}", lineage);
    return lineage;
}
Also used : Lineage(co.cask.cdap.data2.metadata.lineage.Lineage) ProgramId(co.cask.cdap.proto.id.ProgramId) Relation(co.cask.cdap.data2.metadata.lineage.Relation) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) RunId(org.apache.twill.api.RunId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) HashSet(java.util.HashSet)

Example 14 with NamespacedEntityId

use of co.cask.cdap.proto.id.NamespacedEntityId in project cdap by caskdata.

the class LineageWriterDatasetFramework method doWriteLineage.

private void doWriteLineage(DatasetId datasetInstanceId, AccessType accessType) {
    ProgramContext programContext = this.programContext;
    if (programContext != null) {
        ProgramRunId programRunId = programContext.getProgramRunId();
        NamespacedEntityId componentId = programContext.getComponentId();
        try {
            lineageWriter.addAccess(programRunId, datasetInstanceId, accessType, componentId);
        } catch (Throwable t) {
            // Failure to write to lineage shouldn't cause dataset operation failure
            LOG.warn("Failed to write lineage information for dataset {} with access type {} from {},{}", datasetInstanceId, accessType, programRunId, componentId);
            // Log the stacktrace as debug to not polluting the log
            LOG.debug("Cause for lineage writing failure for {} {} {} {}", datasetInstanceId, accessType, programRunId, componentId, t);
        }
    }
}
Also used : NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ProgramRunId(co.cask.cdap.proto.id.ProgramRunId) ProgramContext(co.cask.cdap.data.ProgramContext)

Example 15 with NamespacedEntityId

use of co.cask.cdap.proto.id.NamespacedEntityId in project cdap by caskdata.

the class MetadataDataset method getMetadata.

/**
   * Returns metadata for a given set of entities
   *
   * @param targetIds entities for which metadata is required
   * @return map of entitiyId to set of metadata for that entity
   */
public Set<Metadata> getMetadata(Set<? extends NamespacedEntityId> targetIds) {
    if (targetIds.isEmpty()) {
        return Collections.emptySet();
    }
    List<ImmutablePair<byte[], byte[]>> fuzzyKeys = new ArrayList<>(targetIds.size());
    for (NamespacedEntityId targetId : targetIds) {
        fuzzyKeys.add(getFuzzyKeyFor(targetId));
    }
    // Sort fuzzy keys
    Collections.sort(fuzzyKeys, FUZZY_KEY_COMPARATOR);
    // Scan using fuzzy filter. Scan returns one row per property.
    // Group the rows on namespacedId
    Multimap<NamespacedEntityId, MetadataEntry> metadataMap = HashMultimap.create();
    byte[] start = fuzzyKeys.get(0).getFirst();
    byte[] end = Bytes.stopKeyForPrefix(fuzzyKeys.get(fuzzyKeys.size() - 1).getFirst());
    try (Scanner scan = indexedTable.scan(new Scan(start, end, new FuzzyRowFilter(fuzzyKeys)))) {
        Row next;
        while ((next = scan.next()) != null) {
            MetadataEntry metadataEntry = convertRow(next);
            if (metadataEntry != null) {
                metadataMap.put(metadataEntry.getTargetId(), metadataEntry);
            }
        }
    }
    // Create metadata objects for each entity from grouped rows
    Set<Metadata> metadataSet = new HashSet<>();
    for (Map.Entry<NamespacedEntityId, Collection<MetadataEntry>> entry : metadataMap.asMap().entrySet()) {
        Map<String, String> properties = new HashMap<>();
        Set<String> tags = Collections.emptySet();
        for (MetadataEntry metadataEntry : entry.getValue()) {
            if (TAGS_KEY.equals(metadataEntry.getKey())) {
                tags = splitTags(metadataEntry.getValue());
            } else {
                properties.put(metadataEntry.getKey(), metadataEntry.getValue());
            }
        }
        metadataSet.add(new Metadata(entry.getKey(), properties, tags));
    }
    return metadataSet;
}
Also used : Scanner(co.cask.cdap.api.dataset.table.Scanner) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FuzzyRowFilter(co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter) NamespacedEntityId(co.cask.cdap.proto.id.NamespacedEntityId) ImmutablePair(co.cask.cdap.common.utils.ImmutablePair) Collection(java.util.Collection) Scan(co.cask.cdap.api.dataset.table.Scan) Row(co.cask.cdap.api.dataset.table.Row) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) HashSet(java.util.HashSet)

Aggregations

NamespacedEntityId (co.cask.cdap.proto.id.NamespacedEntityId)23 ProgramRunId (co.cask.cdap.proto.id.ProgramRunId)9 ProgramId (co.cask.cdap.proto.id.ProgramId)6 HashMap (java.util.HashMap)6 StreamId (co.cask.cdap.proto.id.StreamId)5 HashSet (java.util.HashSet)5 RunId (org.apache.twill.api.RunId)5 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4 Row (co.cask.cdap.api.dataset.table.Row)3 Scanner (co.cask.cdap.api.dataset.table.Scanner)3 Metadata (co.cask.cdap.data2.metadata.dataset.Metadata)3 AuditMessage (co.cask.cdap.proto.audit.AuditMessage)3 DatasetId (co.cask.cdap.proto.id.DatasetId)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 Collection (java.util.Collection)3 LinkedHashSet (java.util.LinkedHashSet)3 Test (org.junit.Test)3 MethodArgument (co.cask.cdap.common.internal.remote.MethodArgument)2 MDSKey (co.cask.cdap.data2.dataset2.lib.table.MDSKey)2