Search in sources :

Example 1 with TYPE_NIFI_QUEUE

use of org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE in project nifi by apache.

the class NiFiAtlasClient method registerDataSetEntities.

/**
 * Register DataSet within specified NiFiFlow.
 * @return Set of registered Atlas type names and its remaining entities without deleted ones.
 */
private Map<String, List<AtlasEntity>> registerDataSetEntities(final NiFiFlow nifiFlow) throws AtlasServiceException {
    final Map<NiFiFlow.EntityChangeType, List<AtlasEntity>> changedEntities = nifiFlow.getChangedDataSetEntities();
    if (changedEntities.containsKey(CREATED)) {
        final List<AtlasEntity> createdEntities = changedEntities.get(CREATED);
        final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(createdEntities);
        final EntityMutationResponse mutationResponse = atlasClient.createEntities(atlasEntities);
        logger.debug("Created DataSet entities mutation response={}", mutationResponse);
        final Map<String, String> guidAssignments = mutationResponse.getGuidAssignments();
        for (AtlasEntity entity : createdEntities) {
            final String guid = guidAssignments.get(entity.getGuid());
            final String qualifiedName = toStr(entity.getAttribute(ATTR_QUALIFIED_NAME));
            if (StringUtils.isEmpty(guid)) {
                logger.warn("GUID was not assigned for {}::{} for some reason.", entity.getTypeName(), qualifiedName);
                continue;
            }
            final Map<AtlasObjectId, AtlasEntity> entityMap;
            switch(entity.getTypeName()) {
                case TYPE_NIFI_INPUT_PORT:
                    entityMap = nifiFlow.getRootInputPortEntities();
                    break;
                case TYPE_NIFI_OUTPUT_PORT:
                    entityMap = nifiFlow.getRootOutputPortEntities();
                    break;
                case TYPE_NIFI_QUEUE:
                    entityMap = nifiFlow.getQueues();
                    break;
                default:
                    throw new RuntimeException(entity.getTypeName() + " is not expected.");
            }
            // In order to replace the id, remove current id which does not have GUID.
            findIdByQualifiedName(entityMap.keySet(), qualifiedName).ifPresent(entityMap::remove);
            entity.setGuid(guid);
            final AtlasObjectId idWithGuid = new AtlasObjectId(guid, entity.getTypeName(), Collections.singletonMap(ATTR_QUALIFIED_NAME, qualifiedName));
            entityMap.put(idWithGuid, entity);
        }
    }
    if (changedEntities.containsKey(UPDATED)) {
        final List<AtlasEntity> updatedEntities = changedEntities.get(UPDATED);
        final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(updatedEntities);
        final EntityMutationResponse mutationResponse = atlasClient.updateEntities(atlasEntities);
        logger.debug("Updated DataSet entities mutation response={}", mutationResponse);
    }
    final Set<String> changedTypeNames = changedEntities.entrySet().stream().filter(entry -> !AS_IS.equals(entry.getKey())).flatMap(entry -> entry.getValue().stream()).map(AtlasEntity::getTypeName).collect(Collectors.toSet());
    // NOTE: Cascading DELETE will be performed when parent NiFiFlow is updated without removed DataSet entities.
    final Map<String, List<AtlasEntity>> remainingEntitiesByType = changedEntities.entrySet().stream().filter(entry -> !DELETED.equals(entry.getKey())).flatMap(entry -> entry.getValue().stream()).filter(entity -> changedTypeNames.contains(entity.getTypeName())).collect(Collectors.groupingBy(AtlasEntity::getTypeName));
    // If all entities are deleted for a type (e.g. nifi_intput_port), then remainingEntitiesByType will not contain such key.
    // If the returning map does not contain anything for a type, then the corresponding attribute will not be updated.
    // To empty an attribute when all of its elements are deleted, add empty list for a type.
    changedTypeNames.forEach(changedTypeName -> remainingEntitiesByType.computeIfAbsent(changedTypeName, k -> Collections.emptyList()));
    return remainingEntitiesByType;
}
Also used : AtlasAttributeDef(org.apache.atlas.model.typedef.AtlasStructDef.AtlasAttributeDef) ATTR_INPUT_PORTS(org.apache.nifi.atlas.NiFiTypes.ATTR_INPUT_PORTS) TYPE_NIFI_FLOW(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW) LoggerFactory(org.slf4j.LoggerFactory) ATTR_OUTPUT_PORTS(org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUT_PORTS) ATTR_QUEUES(org.apache.nifi.atlas.NiFiTypes.ATTR_QUEUES) DELETED(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.DELETED) ATTR_QUALIFIED_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME) UPDATED(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.UPDATED) Matcher(java.util.regex.Matcher) AtlasErrorCode(org.apache.atlas.AtlasErrorCode) AtlasObjectId(org.apache.atlas.model.instance.AtlasObjectId) AtlasTypesDef(org.apache.atlas.model.typedef.AtlasTypesDef) Map(java.util.Map) TYPE_NIFI_OUTPUT_PORT(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_OUTPUT_PORT) CREATED(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.CREATED) AtlasUtils.getComponentIdFromQualifiedName(org.apache.nifi.atlas.AtlasUtils.getComponentIdFromQualifiedName) AtlasEntityDef(org.apache.atlas.model.typedef.AtlasEntityDef) MultivaluedMapImpl(com.sun.jersey.core.util.MultivaluedMapImpl) AtlasServiceException(org.apache.atlas.AtlasServiceException) Set(java.util.Set) StringUtils(org.apache.nifi.util.StringUtils) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) ATTR_URL(org.apache.nifi.atlas.NiFiTypes.ATTR_URL) Pattern(java.util.regex.Pattern) UniformInterfaceException(com.sun.jersey.api.client.UniformInterfaceException) AtlasClientV2(org.apache.atlas.AtlasClientV2) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) EntityMutationResponse(org.apache.atlas.model.instance.EntityMutationResponse) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ATTR_OUTPUTS(org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUTS) TYPE_NIFI_FLOW_PATH(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW_PATH) AtlasUtils.findIdByQualifiedName(org.apache.nifi.atlas.AtlasUtils.findIdByQualifiedName) AS_IS(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.AS_IS) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) ATTR_INPUTS(org.apache.nifi.atlas.NiFiTypes.ATTR_INPUTS) ATTR_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_NAME) Logger(org.slf4j.Logger) AtlasUtils.toStr(org.apache.nifi.atlas.AtlasUtils.toStr) ATTR_FLOW_PATHS(org.apache.nifi.atlas.NiFiTypes.ATTR_FLOW_PATHS) TYPE_NIFI_INPUT_PORT(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_INPUT_PORT) ATTR_DESCRIPTION(org.apache.nifi.atlas.NiFiTypes.ATTR_DESCRIPTION) ENTITIES(org.apache.nifi.atlas.NiFiTypes.ENTITIES) ATTR_GUID(org.apache.nifi.atlas.NiFiTypes.ATTR_GUID) MultivaluedMap(javax.ws.rs.core.MultivaluedMap) Tuple(org.apache.nifi.util.Tuple) TYPE_NIFI_QUEUE(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE) SearchFilter(org.apache.atlas.model.SearchFilter) ATTR_TYPENAME(org.apache.nifi.atlas.NiFiTypes.ATTR_TYPENAME) Collections(java.util.Collections) EntityMutationResponse(org.apache.atlas.model.instance.EntityMutationResponse) AtlasObjectId(org.apache.atlas.model.instance.AtlasObjectId) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) List(java.util.List) ArrayList(java.util.ArrayList)

Example 2 with TYPE_NIFI_QUEUE

use of org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE in project nifi by apache.

the class CompleteFlowPathLineage method createCompleteFlowPath.

/**
 * Create a new FlowPath from a LineagePath. FlowPaths created by this method will have a hash in its qualified name.
 *
 * <p>This method processes parents first to generate a hash, as parent LineagePath hashes contribute child hash
 * in order to distinguish FlowPaths based on the complete path for a given FlowFile.
 * For example, even if two lineagePaths have identical componentIds/inputs/outputs,
 * if those parents have different inputs, those should be treated as different paths.</p>
 *
 * @param nifiFlow A reference to current NiFiFlow
 * @param lineagePath LineagePath from which NiFiFlowPath and DataSet refs are created and added to the {@code createdFlowPaths}.
 * @param createdFlowPaths A list to buffer created NiFiFlowPaths,
 *                         in order to defer sending notification to Kafka until all parent FlowPath get analyzed.
 */
private void createCompleteFlowPath(NiFiFlow nifiFlow, LineagePath lineagePath, List<Tuple<NiFiFlowPath, DataSetRefs>> createdFlowPaths) {
    final List<ProvenanceEventRecord> events = lineagePath.getEvents();
    Collections.reverse(events);
    final List<String> componentIds = events.stream().map(ProvenanceEventRecord::getComponentId).collect(Collectors.toList());
    final String firstComponentId = events.get(0).getComponentId();
    final DataSetRefs dataSetRefs = lineagePath.getRefs();
    // Process parents first.
    Referenceable queueBetweenParent = null;
    if (!lineagePath.getParents().isEmpty()) {
        // Add queue between this lineage path and parent.
        queueBetweenParent = new Referenceable(TYPE_NIFI_QUEUE);
        // The first event knows why this lineage has parents, e.g. FORK or JOIN.
        final String firstEventType = events.get(0).getEventType().name();
        queueBetweenParent.set(ATTR_NAME, firstEventType);
        dataSetRefs.addInput(queueBetweenParent);
        for (LineagePath parent : lineagePath.getParents()) {
            parent.getRefs().addOutput(queueBetweenParent);
            createCompleteFlowPath(nifiFlow, parent, createdFlowPaths);
        }
    }
    // Create a variant path.
    // Calculate a hash from component_ids and input and output resource ids.
    final Stream<String> ioIds = Stream.concat(dataSetRefs.getInputs().stream(), dataSetRefs.getOutputs().stream()).map(ref -> toTypedQualifiedName(ref.getTypeName(), toStr(ref.get(ATTR_QUALIFIED_NAME))));
    final Stream<String> parentHashes = lineagePath.getParents().stream().map(p -> String.valueOf(p.getLineagePathHash()));
    final CRC32 crc32 = new CRC32();
    crc32.update(Stream.of(componentIds.stream(), ioIds, parentHashes).reduce(Stream::concat).orElseGet(Stream::empty).sorted().distinct().collect(Collectors.joining(",")).getBytes(StandardCharsets.UTF_8));
    final long hash = crc32.getValue();
    lineagePath.setLineagePathHash(hash);
    final NiFiFlowPath flowPath = new NiFiFlowPath(firstComponentId, hash);
    // E.g, FF1 and FF2 read from dirA were merged, vs FF3 and FF4 read from dirB were merged then passed here, these two should be different queue.
    if (queueBetweenParent != null) {
        queueBetweenParent.set(ATTR_QUALIFIED_NAME, toQualifiedName(nifiFlow.getClusterName(), firstComponentId + "::" + hash));
    }
    // If the same components emitted multiple provenance events consecutively, merge it to come up with a simpler name.
    String previousComponentId = null;
    List<ProvenanceEventRecord> uniqueEventsForName = new ArrayList<>();
    for (ProvenanceEventRecord event : events) {
        if (!event.getComponentId().equals(previousComponentId)) {
            uniqueEventsForName.add(event);
        }
        previousComponentId = event.getComponentId();
    }
    final String pathName = uniqueEventsForName.stream().map(event -> nifiFlow.getProcessComponentName(event.getComponentId(), event::getComponentType)).collect(Collectors.joining(", "));
    flowPath.setName(pathName);
    final NiFiFlowPath staticFlowPath = nifiFlow.findPath(firstComponentId);
    flowPath.setGroupId(staticFlowPath != null ? staticFlowPath.getGroupId() : nifiFlow.getRootProcessGroupId());
    // To defer send notification until entire lineagePath analysis gets finished, just add the instance into a buffer.
    createdFlowPaths.add(new Tuple<>(flowPath, dataSetRefs));
}
Also used : ComputeLineageResult(org.apache.nifi.provenance.lineage.ComputeLineageResult) HashMap(java.util.HashMap) ATTR_QUALIFIED_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME) ArrayList(java.util.ArrayList) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Map(java.util.Map) LineageNode(org.apache.nifi.provenance.lineage.LineageNode) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) NiFiFlow(org.apache.nifi.atlas.NiFiFlow) ATTR_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_NAME) AtlasUtils.toStr(org.apache.nifi.atlas.AtlasUtils.toStr) AtlasUtils.toTypedQualifiedName(org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType) AtlasUtils.toQualifiedName(org.apache.nifi.atlas.AtlasUtils.toQualifiedName) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) Tuple(org.apache.nifi.util.Tuple) DROP(org.apache.nifi.provenance.ProvenanceEventType.DROP) TYPE_NIFI_QUEUE(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE) CRC32(java.util.zip.CRC32) Referenceable(org.apache.atlas.typesystem.Referenceable) Collections(java.util.Collections) LineageNodeType(org.apache.nifi.provenance.lineage.LineageNodeType) CRC32(java.util.zip.CRC32) ArrayList(java.util.ArrayList) Referenceable(org.apache.atlas.typesystem.Referenceable) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) Stream(java.util.stream.Stream)

Aggregations

ArrayList (java.util.ArrayList)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Objects (java.util.Objects)2 Collectors (java.util.stream.Collectors)2 AtlasUtils.toStr (org.apache.nifi.atlas.AtlasUtils.toStr)2 ATTR_NAME (org.apache.nifi.atlas.NiFiTypes.ATTR_NAME)2 UniformInterfaceException (com.sun.jersey.api.client.UniformInterfaceException)1 MultivaluedMapImpl (com.sun.jersey.core.util.MultivaluedMapImpl)1 StandardCharsets (java.nio.charset.StandardCharsets)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 Function (java.util.function.Function)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 Stream (java.util.stream.Stream)1 CRC32 (java.util.zip.CRC32)1