Search in sources :

Example 1 with ATTR_QUALIFIED_NAME

use of org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME in project nifi by apache.

the class NiFiAtlasClient method registerDataSetEntities.

/**
 * Register DataSet within specified NiFiFlow.
 * @return Set of registered Atlas type names and its remaining entities without deleted ones.
 */
private Map<String, List<AtlasEntity>> registerDataSetEntities(final NiFiFlow nifiFlow) throws AtlasServiceException {
    final Map<NiFiFlow.EntityChangeType, List<AtlasEntity>> changedEntities = nifiFlow.getChangedDataSetEntities();
    if (changedEntities.containsKey(CREATED)) {
        final List<AtlasEntity> createdEntities = changedEntities.get(CREATED);
        final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(createdEntities);
        final EntityMutationResponse mutationResponse = atlasClient.createEntities(atlasEntities);
        logger.debug("Created DataSet entities mutation response={}", mutationResponse);
        final Map<String, String> guidAssignments = mutationResponse.getGuidAssignments();
        for (AtlasEntity entity : createdEntities) {
            final String guid = guidAssignments.get(entity.getGuid());
            final String qualifiedName = toStr(entity.getAttribute(ATTR_QUALIFIED_NAME));
            if (StringUtils.isEmpty(guid)) {
                logger.warn("GUID was not assigned for {}::{} for some reason.", entity.getTypeName(), qualifiedName);
                continue;
            }
            final Map<AtlasObjectId, AtlasEntity> entityMap;
            switch(entity.getTypeName()) {
                case TYPE_NIFI_INPUT_PORT:
                    entityMap = nifiFlow.getRootInputPortEntities();
                    break;
                case TYPE_NIFI_OUTPUT_PORT:
                    entityMap = nifiFlow.getRootOutputPortEntities();
                    break;
                case TYPE_NIFI_QUEUE:
                    entityMap = nifiFlow.getQueues();
                    break;
                default:
                    throw new RuntimeException(entity.getTypeName() + " is not expected.");
            }
            // In order to replace the id, remove current id which does not have GUID.
            findIdByQualifiedName(entityMap.keySet(), qualifiedName).ifPresent(entityMap::remove);
            entity.setGuid(guid);
            final AtlasObjectId idWithGuid = new AtlasObjectId(guid, entity.getTypeName(), Collections.singletonMap(ATTR_QUALIFIED_NAME, qualifiedName));
            entityMap.put(idWithGuid, entity);
        }
    }
    if (changedEntities.containsKey(UPDATED)) {
        final List<AtlasEntity> updatedEntities = changedEntities.get(UPDATED);
        final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(updatedEntities);
        final EntityMutationResponse mutationResponse = atlasClient.updateEntities(atlasEntities);
        logger.debug("Updated DataSet entities mutation response={}", mutationResponse);
    }
    final Set<String> changedTypeNames = changedEntities.entrySet().stream().filter(entry -> !AS_IS.equals(entry.getKey())).flatMap(entry -> entry.getValue().stream()).map(AtlasEntity::getTypeName).collect(Collectors.toSet());
    // NOTE: Cascading DELETE will be performed when parent NiFiFlow is updated without removed DataSet entities.
    final Map<String, List<AtlasEntity>> remainingEntitiesByType = changedEntities.entrySet().stream().filter(entry -> !DELETED.equals(entry.getKey())).flatMap(entry -> entry.getValue().stream()).filter(entity -> changedTypeNames.contains(entity.getTypeName())).collect(Collectors.groupingBy(AtlasEntity::getTypeName));
    // If all entities are deleted for a type (e.g. nifi_intput_port), then remainingEntitiesByType will not contain such key.
    // If the returning map does not contain anything for a type, then the corresponding attribute will not be updated.
    // To empty an attribute when all of its elements are deleted, add empty list for a type.
    changedTypeNames.forEach(changedTypeName -> remainingEntitiesByType.computeIfAbsent(changedTypeName, k -> Collections.emptyList()));
    return remainingEntitiesByType;
}
Also used : AtlasAttributeDef(org.apache.atlas.model.typedef.AtlasStructDef.AtlasAttributeDef) ATTR_INPUT_PORTS(org.apache.nifi.atlas.NiFiTypes.ATTR_INPUT_PORTS) TYPE_NIFI_FLOW(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW) LoggerFactory(org.slf4j.LoggerFactory) ATTR_OUTPUT_PORTS(org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUT_PORTS) ATTR_QUEUES(org.apache.nifi.atlas.NiFiTypes.ATTR_QUEUES) DELETED(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.DELETED) ATTR_QUALIFIED_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME) UPDATED(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.UPDATED) Matcher(java.util.regex.Matcher) AtlasErrorCode(org.apache.atlas.AtlasErrorCode) AtlasObjectId(org.apache.atlas.model.instance.AtlasObjectId) AtlasTypesDef(org.apache.atlas.model.typedef.AtlasTypesDef) Map(java.util.Map) TYPE_NIFI_OUTPUT_PORT(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_OUTPUT_PORT) CREATED(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.CREATED) AtlasUtils.getComponentIdFromQualifiedName(org.apache.nifi.atlas.AtlasUtils.getComponentIdFromQualifiedName) AtlasEntityDef(org.apache.atlas.model.typedef.AtlasEntityDef) MultivaluedMapImpl(com.sun.jersey.core.util.MultivaluedMapImpl) AtlasServiceException(org.apache.atlas.AtlasServiceException) Set(java.util.Set) StringUtils(org.apache.nifi.util.StringUtils) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) ATTR_URL(org.apache.nifi.atlas.NiFiTypes.ATTR_URL) Pattern(java.util.regex.Pattern) UniformInterfaceException(com.sun.jersey.api.client.UniformInterfaceException) AtlasClientV2(org.apache.atlas.AtlasClientV2) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) EntityMutationResponse(org.apache.atlas.model.instance.EntityMutationResponse) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ATTR_OUTPUTS(org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUTS) TYPE_NIFI_FLOW_PATH(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW_PATH) AtlasUtils.findIdByQualifiedName(org.apache.nifi.atlas.AtlasUtils.findIdByQualifiedName) AS_IS(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.AS_IS) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) ATTR_INPUTS(org.apache.nifi.atlas.NiFiTypes.ATTR_INPUTS) ATTR_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_NAME) Logger(org.slf4j.Logger) AtlasUtils.toStr(org.apache.nifi.atlas.AtlasUtils.toStr) ATTR_FLOW_PATHS(org.apache.nifi.atlas.NiFiTypes.ATTR_FLOW_PATHS) TYPE_NIFI_INPUT_PORT(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_INPUT_PORT) ATTR_DESCRIPTION(org.apache.nifi.atlas.NiFiTypes.ATTR_DESCRIPTION) ENTITIES(org.apache.nifi.atlas.NiFiTypes.ENTITIES) ATTR_GUID(org.apache.nifi.atlas.NiFiTypes.ATTR_GUID) MultivaluedMap(javax.ws.rs.core.MultivaluedMap) Tuple(org.apache.nifi.util.Tuple) TYPE_NIFI_QUEUE(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE) SearchFilter(org.apache.atlas.model.SearchFilter) ATTR_TYPENAME(org.apache.nifi.atlas.NiFiTypes.ATTR_TYPENAME) Collections(java.util.Collections) EntityMutationResponse(org.apache.atlas.model.instance.EntityMutationResponse) AtlasObjectId(org.apache.atlas.model.instance.AtlasObjectId) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) List(java.util.List) ArrayList(java.util.ArrayList)

Example 2 with ATTR_QUALIFIED_NAME

use of org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME in project nifi by apache.

the class NiFiAtlasClient method registerFlowPathEntities.

private Set<AtlasObjectId> registerFlowPathEntities(final NiFiFlow nifiFlow) throws AtlasServiceException {
    final Map<NiFiFlow.EntityChangeType, List<AtlasEntity>> changedEntities = nifiFlow.getChangedFlowPathEntities();
    if (changedEntities.containsKey(CREATED)) {
        final List<AtlasEntity> createdEntities = changedEntities.get(CREATED);
        final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(createdEntities);
        final EntityMutationResponse mutationResponse = atlasClient.createEntities(atlasEntities);
        logger.debug("Created FlowPath entities mutation response={}", mutationResponse);
        final Map<String, String> guidAssignments = mutationResponse.getGuidAssignments();
        createdEntities.forEach(entity -> {
            final String guid = entity.getGuid();
            entity.setGuid(guidAssignments.get(guid));
            final String pathId = getComponentIdFromQualifiedName(toStr(entity.getAttribute(ATTR_QUALIFIED_NAME)));
            final NiFiFlowPath path = nifiFlow.getFlowPaths().get(pathId);
            path.setExEntity(entity);
        });
    }
    if (changedEntities.containsKey(UPDATED)) {
        final List<AtlasEntity> updatedEntities = changedEntities.get(UPDATED);
        final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(updatedEntities);
        final EntityMutationResponse mutationResponse = atlasClient.updateEntities(atlasEntities);
        logger.debug("Updated FlowPath entities mutation response={}", mutationResponse);
        updatedEntities.forEach(entity -> {
            final String pathId = getComponentIdFromQualifiedName(toStr(entity.getAttribute(ATTR_QUALIFIED_NAME)));
            final NiFiFlowPath path = nifiFlow.getFlowPaths().get(pathId);
            path.setExEntity(entity);
        });
    }
    if (NiFiFlow.EntityChangeType.containsChange(changedEntities.keySet())) {
        return changedEntities.entrySet().stream().filter(entry -> !DELETED.equals(entry.getKey())).flatMap(entry -> entry.getValue().stream()).map(path -> new AtlasObjectId(path.getGuid(), TYPE_NIFI_FLOW_PATH, Collections.singletonMap(ATTR_QUALIFIED_NAME, path.getAttribute(ATTR_QUALIFIED_NAME)))).collect(Collectors.toSet());
    }
    return null;
}
Also used : AtlasAttributeDef(org.apache.atlas.model.typedef.AtlasStructDef.AtlasAttributeDef) ATTR_INPUT_PORTS(org.apache.nifi.atlas.NiFiTypes.ATTR_INPUT_PORTS) TYPE_NIFI_FLOW(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW) LoggerFactory(org.slf4j.LoggerFactory) ATTR_OUTPUT_PORTS(org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUT_PORTS) ATTR_QUEUES(org.apache.nifi.atlas.NiFiTypes.ATTR_QUEUES) DELETED(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.DELETED) ATTR_QUALIFIED_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME) UPDATED(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.UPDATED) Matcher(java.util.regex.Matcher) AtlasErrorCode(org.apache.atlas.AtlasErrorCode) AtlasObjectId(org.apache.atlas.model.instance.AtlasObjectId) AtlasTypesDef(org.apache.atlas.model.typedef.AtlasTypesDef) Map(java.util.Map) TYPE_NIFI_OUTPUT_PORT(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_OUTPUT_PORT) CREATED(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.CREATED) AtlasUtils.getComponentIdFromQualifiedName(org.apache.nifi.atlas.AtlasUtils.getComponentIdFromQualifiedName) AtlasEntityDef(org.apache.atlas.model.typedef.AtlasEntityDef) MultivaluedMapImpl(com.sun.jersey.core.util.MultivaluedMapImpl) AtlasServiceException(org.apache.atlas.AtlasServiceException) Set(java.util.Set) StringUtils(org.apache.nifi.util.StringUtils) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) ATTR_URL(org.apache.nifi.atlas.NiFiTypes.ATTR_URL) Pattern(java.util.regex.Pattern) UniformInterfaceException(com.sun.jersey.api.client.UniformInterfaceException) AtlasClientV2(org.apache.atlas.AtlasClientV2) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) EntityMutationResponse(org.apache.atlas.model.instance.EntityMutationResponse) Function(java.util.function.Function) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ATTR_OUTPUTS(org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUTS) TYPE_NIFI_FLOW_PATH(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW_PATH) AtlasUtils.findIdByQualifiedName(org.apache.nifi.atlas.AtlasUtils.findIdByQualifiedName) AS_IS(org.apache.nifi.atlas.NiFiFlow.EntityChangeType.AS_IS) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) ATTR_INPUTS(org.apache.nifi.atlas.NiFiTypes.ATTR_INPUTS) ATTR_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_NAME) Logger(org.slf4j.Logger) AtlasUtils.toStr(org.apache.nifi.atlas.AtlasUtils.toStr) ATTR_FLOW_PATHS(org.apache.nifi.atlas.NiFiTypes.ATTR_FLOW_PATHS) TYPE_NIFI_INPUT_PORT(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_INPUT_PORT) ATTR_DESCRIPTION(org.apache.nifi.atlas.NiFiTypes.ATTR_DESCRIPTION) ENTITIES(org.apache.nifi.atlas.NiFiTypes.ENTITIES) ATTR_GUID(org.apache.nifi.atlas.NiFiTypes.ATTR_GUID) MultivaluedMap(javax.ws.rs.core.MultivaluedMap) Tuple(org.apache.nifi.util.Tuple) TYPE_NIFI_QUEUE(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE) SearchFilter(org.apache.atlas.model.SearchFilter) ATTR_TYPENAME(org.apache.nifi.atlas.NiFiTypes.ATTR_TYPENAME) Collections(java.util.Collections) EntityMutationResponse(org.apache.atlas.model.instance.EntityMutationResponse) AtlasObjectId(org.apache.atlas.model.instance.AtlasObjectId) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) List(java.util.List) ArrayList(java.util.ArrayList)

Example 3 with ATTR_QUALIFIED_NAME

use of org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME in project nifi by apache.

the class NiFiAtlasHook method commitMessages.

public void commitMessages() {
    final Map<Boolean, List<HookNotificationMessage>> partialNiFiFlowPathUpdateAndOthers = messages.stream().collect(Collectors.groupingBy(msg -> ENTITY_PARTIAL_UPDATE.equals(msg.getType()) && TYPE_NIFI_FLOW_PATH.equals(((EntityPartialUpdateRequest) msg).getTypeName()) && ATTR_QUALIFIED_NAME.equals(((EntityPartialUpdateRequest) msg).getAttribute())));
    final List<HookNotificationMessage> otherMessages = partialNiFiFlowPathUpdateAndOthers.computeIfAbsent(false, k -> Collections.emptyList());
    final List<HookNotificationMessage> partialNiFiFlowPathUpdates = partialNiFiFlowPathUpdateAndOthers.computeIfAbsent(true, k -> Collections.emptyList());
    logger.info("Commit messages: {} partialNiFiFlowPathUpdate and {} other messages.", partialNiFiFlowPathUpdates.size(), otherMessages.size());
    final Metrics metrics = new Metrics();
    metrics.totalMessages = messages.size();
    metrics.partialNiFiFlowPathUpdates = partialNiFiFlowPathUpdates.size();
    metrics.otherMessages = otherMessages.size();
    try {
        // Notify other messages first.
        notifyEntities(otherMessages);
        // De-duplicate messages.
        final List<HookNotificationMessage> deduplicatedMessages = partialNiFiFlowPathUpdates.stream().map(msg -> (EntityPartialUpdateRequest) msg).collect(Collectors.groupingBy(EntityPartialUpdateRequest::getAttributeValue)).entrySet().stream().map(entry -> {
            final String flowPathQualifiedName = entry.getKey();
            final Map<String, Referenceable> distinctInputs;
            final Map<String, Referenceable> distinctOutputs;
            final String flowPathGuid;
            try {
                // Fetch existing nifi_flow_path and its inputs/ouputs.
                metrics.flowPathSearched++;
                final AtlasEntity.AtlasEntityWithExtInfo flowPathExt = atlasClient.searchEntityDef(new AtlasObjectId(TYPE_NIFI_FLOW_PATH, ATTR_QUALIFIED_NAME, flowPathQualifiedName));
                final AtlasEntity flowPathEntity = flowPathExt.getEntity();
                flowPathGuid = flowPathEntity.getGuid();
                distinctInputs = toReferenceables(flowPathEntity.getAttribute(ATTR_INPUTS), metrics);
                distinctOutputs = toReferenceables(flowPathEntity.getAttribute(ATTR_OUTPUTS), metrics);
            } catch (AtlasServiceException e) {
                if (ClientResponse.Status.NOT_FOUND.equals(e.getStatus())) {
                    logger.debug("nifi_flow_path was not found for qualifiedName {}", flowPathQualifiedName);
                } else {
                    logger.warn("Failed to retrieve nifi_flow_path with qualifiedName {} due to {}", flowPathQualifiedName, e, e);
                }
                return null;
            }
            // Merge all inputs and outputs for this nifi_flow_path.
            for (EntityPartialUpdateRequest msg : entry.getValue()) {
                fromReferenceable(msg.getEntity().get(ATTR_INPUTS), metrics).entrySet().stream().filter(ref -> !distinctInputs.containsKey(ref.getKey())).forEach(ref -> distinctInputs.put(ref.getKey(), ref.getValue()));
                fromReferenceable(msg.getEntity().get(ATTR_OUTPUTS), metrics).entrySet().stream().filter(ref -> !distinctOutputs.containsKey(ref.getKey())).forEach(ref -> distinctOutputs.put(ref.getKey(), ref.getValue()));
            }
            // Consolidate messages into one.
            final Referenceable flowPathRef = new Referenceable(flowPathGuid, TYPE_NIFI_FLOW_PATH, null);
            // NOTE: distinctInputs.values() returns HashMap$Values, which causes following error. To avoid that, wrap with ArrayList:
            // org.json4s.package$MappingException: Can't find ScalaSig for class org.apache.atlas.typesystem.Referenceable
            flowPathRef.set(ATTR_INPUTS, new ArrayList<>(distinctInputs.values()));
            flowPathRef.set(ATTR_OUTPUTS, new ArrayList<>(distinctOutputs.values()));
            return new EntityPartialUpdateRequest(NIFI_USER, TYPE_NIFI_FLOW_PATH, ATTR_QUALIFIED_NAME, flowPathQualifiedName, flowPathRef);
        }).filter(Objects::nonNull).collect(Collectors.toList());
        metrics.dedupedPartialNiFiFlowPathUpdates = deduplicatedMessages.size();
        notifyEntities(deduplicatedMessages);
    } finally {
        metrics.log("Committed");
        messages.clear();
    }
}
Also used : ClientResponse(com.sun.jersey.api.client.ClientResponse) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) LineageContext(org.apache.nifi.atlas.provenance.lineage.LineageContext) ATTR_QUALIFIED_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) ATTR_OUTPUTS(org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUTS) EntityPartialUpdateRequest(org.apache.atlas.notification.hook.HookNotification.EntityPartialUpdateRequest) TYPE_NIFI_FLOW_PATH(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_FLOW_PATH) HookNotificationMessage(org.apache.atlas.notification.hook.HookNotification.HookNotificationMessage) AtlasObjectId(org.apache.atlas.model.instance.AtlasObjectId) Map(java.util.Map) Id(org.apache.atlas.typesystem.persistence.Id) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) ATTR_INPUTS(org.apache.nifi.atlas.NiFiTypes.ATTR_INPUTS) Logger(org.slf4j.Logger) AtlasUtils.toTypedQualifiedName(org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName) AtlasServiceException(org.apache.atlas.AtlasServiceException) Collectors(java.util.stream.Collectors) ATTR_GUID(org.apache.nifi.atlas.NiFiTypes.ATTR_GUID) Objects(java.util.Objects) List(java.util.List) Tuple(org.apache.nifi.util.Tuple) ENTITY_PARTIAL_UPDATE(org.apache.atlas.notification.hook.HookNotification.HookNotificationType.ENTITY_PARTIAL_UPDATE) Referenceable(org.apache.atlas.typesystem.Referenceable) ATTR_TYPENAME(org.apache.nifi.atlas.NiFiTypes.ATTR_TYPENAME) Collections(java.util.Collections) AtlasHook(org.apache.atlas.hook.AtlasHook) AtlasObjectId(org.apache.atlas.model.instance.AtlasObjectId) EntityPartialUpdateRequest(org.apache.atlas.notification.hook.HookNotification.EntityPartialUpdateRequest) Referenceable(org.apache.atlas.typesystem.Referenceable) AtlasServiceException(org.apache.atlas.AtlasServiceException) AtlasEntity(org.apache.atlas.model.instance.AtlasEntity) HookNotificationMessage(org.apache.atlas.notification.hook.HookNotification.HookNotificationMessage) ArrayList(java.util.ArrayList) List(java.util.List)

Example 4 with ATTR_QUALIFIED_NAME

use of org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME in project nifi by apache.

the class CompleteFlowPathLineage method createCompleteFlowPath.

/**
 * Create a new FlowPath from a LineagePath. FlowPaths created by this method will have a hash in its qualified name.
 *
 * <p>This method processes parents first to generate a hash, as parent LineagePath hashes contribute child hash
 * in order to distinguish FlowPaths based on the complete path for a given FlowFile.
 * For example, even if two lineagePaths have identical componentIds/inputs/outputs,
 * if those parents have different inputs, those should be treated as different paths.</p>
 *
 * @param nifiFlow A reference to current NiFiFlow
 * @param lineagePath LineagePath from which NiFiFlowPath and DataSet refs are created and added to the {@code createdFlowPaths}.
 * @param createdFlowPaths A list to buffer created NiFiFlowPaths,
 *                         in order to defer sending notification to Kafka until all parent FlowPath get analyzed.
 */
private void createCompleteFlowPath(NiFiFlow nifiFlow, LineagePath lineagePath, List<Tuple<NiFiFlowPath, DataSetRefs>> createdFlowPaths) {
    final List<ProvenanceEventRecord> events = lineagePath.getEvents();
    Collections.reverse(events);
    final List<String> componentIds = events.stream().map(ProvenanceEventRecord::getComponentId).collect(Collectors.toList());
    final String firstComponentId = events.get(0).getComponentId();
    final DataSetRefs dataSetRefs = lineagePath.getRefs();
    // Process parents first.
    Referenceable queueBetweenParent = null;
    if (!lineagePath.getParents().isEmpty()) {
        // Add queue between this lineage path and parent.
        queueBetweenParent = new Referenceable(TYPE_NIFI_QUEUE);
        // The first event knows why this lineage has parents, e.g. FORK or JOIN.
        final String firstEventType = events.get(0).getEventType().name();
        queueBetweenParent.set(ATTR_NAME, firstEventType);
        dataSetRefs.addInput(queueBetweenParent);
        for (LineagePath parent : lineagePath.getParents()) {
            parent.getRefs().addOutput(queueBetweenParent);
            createCompleteFlowPath(nifiFlow, parent, createdFlowPaths);
        }
    }
    // Create a variant path.
    // Calculate a hash from component_ids and input and output resource ids.
    final Stream<String> ioIds = Stream.concat(dataSetRefs.getInputs().stream(), dataSetRefs.getOutputs().stream()).map(ref -> toTypedQualifiedName(ref.getTypeName(), toStr(ref.get(ATTR_QUALIFIED_NAME))));
    final Stream<String> parentHashes = lineagePath.getParents().stream().map(p -> String.valueOf(p.getLineagePathHash()));
    final CRC32 crc32 = new CRC32();
    crc32.update(Stream.of(componentIds.stream(), ioIds, parentHashes).reduce(Stream::concat).orElseGet(Stream::empty).sorted().distinct().collect(Collectors.joining(",")).getBytes(StandardCharsets.UTF_8));
    final long hash = crc32.getValue();
    lineagePath.setLineagePathHash(hash);
    final NiFiFlowPath flowPath = new NiFiFlowPath(firstComponentId, hash);
    // E.g, FF1 and FF2 read from dirA were merged, vs FF3 and FF4 read from dirB were merged then passed here, these two should be different queue.
    if (queueBetweenParent != null) {
        queueBetweenParent.set(ATTR_QUALIFIED_NAME, toQualifiedName(nifiFlow.getClusterName(), firstComponentId + "::" + hash));
    }
    // If the same components emitted multiple provenance events consecutively, merge it to come up with a simpler name.
    String previousComponentId = null;
    List<ProvenanceEventRecord> uniqueEventsForName = new ArrayList<>();
    for (ProvenanceEventRecord event : events) {
        if (!event.getComponentId().equals(previousComponentId)) {
            uniqueEventsForName.add(event);
        }
        previousComponentId = event.getComponentId();
    }
    final String pathName = uniqueEventsForName.stream().map(event -> nifiFlow.getProcessComponentName(event.getComponentId(), event::getComponentType)).collect(Collectors.joining(", "));
    flowPath.setName(pathName);
    final NiFiFlowPath staticFlowPath = nifiFlow.findPath(firstComponentId);
    flowPath.setGroupId(staticFlowPath != null ? staticFlowPath.getGroupId() : nifiFlow.getRootProcessGroupId());
    // To defer send notification until entire lineagePath analysis gets finished, just add the instance into a buffer.
    createdFlowPaths.add(new Tuple<>(flowPath, dataSetRefs));
}
Also used : ComputeLineageResult(org.apache.nifi.provenance.lineage.ComputeLineageResult) HashMap(java.util.HashMap) ATTR_QUALIFIED_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME) ArrayList(java.util.ArrayList) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Map(java.util.Map) LineageNode(org.apache.nifi.provenance.lineage.LineageNode) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) NiFiFlow(org.apache.nifi.atlas.NiFiFlow) ATTR_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_NAME) AtlasUtils.toStr(org.apache.nifi.atlas.AtlasUtils.toStr) AtlasUtils.toTypedQualifiedName(org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType) AtlasUtils.toQualifiedName(org.apache.nifi.atlas.AtlasUtils.toQualifiedName) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) Tuple(org.apache.nifi.util.Tuple) DROP(org.apache.nifi.provenance.ProvenanceEventType.DROP) TYPE_NIFI_QUEUE(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE) CRC32(java.util.zip.CRC32) Referenceable(org.apache.atlas.typesystem.Referenceable) Collections(java.util.Collections) LineageNodeType(org.apache.nifi.provenance.lineage.LineageNodeType) CRC32(java.util.zip.CRC32) ArrayList(java.util.ArrayList) Referenceable(org.apache.atlas.typesystem.Referenceable) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) Stream(java.util.stream.Stream)

Aggregations

ArrayList (java.util.ArrayList)4 Collections (java.util.Collections)4 HashMap (java.util.HashMap)4 List (java.util.List)4 Map (java.util.Map)4 Objects (java.util.Objects)4 Collectors (java.util.stream.Collectors)4 AtlasServiceException (org.apache.atlas.AtlasServiceException)3 AtlasEntity (org.apache.atlas.model.instance.AtlasEntity)3 AtlasObjectId (org.apache.atlas.model.instance.AtlasObjectId)3 AtlasUtils.toStr (org.apache.nifi.atlas.AtlasUtils.toStr)3 ATTR_GUID (org.apache.nifi.atlas.NiFiTypes.ATTR_GUID)3 ATTR_INPUTS (org.apache.nifi.atlas.NiFiTypes.ATTR_INPUTS)3 ATTR_NAME (org.apache.nifi.atlas.NiFiTypes.ATTR_NAME)3 ATTR_OUTPUTS (org.apache.nifi.atlas.NiFiTypes.ATTR_OUTPUTS)3 UniformInterfaceException (com.sun.jersey.api.client.UniformInterfaceException)2 MultivaluedMapImpl (com.sun.jersey.core.util.MultivaluedMapImpl)2 HashSet (java.util.HashSet)2 Set (java.util.Set)2 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)2