Search in sources :

Example 1 with NiFiFlow

use of org.apache.nifi.atlas.NiFiFlow in project nifi by apache.

the class CompleteFlowPathLineage method createCompleteFlowPath.

/**
 * Create a new FlowPath from a LineagePath. FlowPaths created by this method will have a hash in its qualified name.
 *
 * <p>This method processes parents first to generate a hash, as parent LineagePath hashes contribute child hash
 * in order to distinguish FlowPaths based on the complete path for a given FlowFile.
 * For example, even if two lineagePaths have identical componentIds/inputs/outputs,
 * if those parents have different inputs, those should be treated as different paths.</p>
 *
 * @param nifiFlow A reference to current NiFiFlow
 * @param lineagePath LineagePath from which NiFiFlowPath and DataSet refs are created and added to the {@code createdFlowPaths}.
 * @param createdFlowPaths A list to buffer created NiFiFlowPaths,
 *                         in order to defer sending notification to Kafka until all parent FlowPath get analyzed.
 */
private void createCompleteFlowPath(NiFiFlow nifiFlow, LineagePath lineagePath, List<Tuple<NiFiFlowPath, DataSetRefs>> createdFlowPaths) {
    final List<ProvenanceEventRecord> events = lineagePath.getEvents();
    Collections.reverse(events);
    final List<String> componentIds = events.stream().map(ProvenanceEventRecord::getComponentId).collect(Collectors.toList());
    final String firstComponentId = events.get(0).getComponentId();
    final DataSetRefs dataSetRefs = lineagePath.getRefs();
    // Process parents first.
    Referenceable queueBetweenParent = null;
    if (!lineagePath.getParents().isEmpty()) {
        // Add queue between this lineage path and parent.
        queueBetweenParent = new Referenceable(TYPE_NIFI_QUEUE);
        // The first event knows why this lineage has parents, e.g. FORK or JOIN.
        final String firstEventType = events.get(0).getEventType().name();
        queueBetweenParent.set(ATTR_NAME, firstEventType);
        dataSetRefs.addInput(queueBetweenParent);
        for (LineagePath parent : lineagePath.getParents()) {
            parent.getRefs().addOutput(queueBetweenParent);
            createCompleteFlowPath(nifiFlow, parent, createdFlowPaths);
        }
    }
    // Create a variant path.
    // Calculate a hash from component_ids and input and output resource ids.
    final Stream<String> ioIds = Stream.concat(dataSetRefs.getInputs().stream(), dataSetRefs.getOutputs().stream()).map(ref -> toTypedQualifiedName(ref.getTypeName(), toStr(ref.get(ATTR_QUALIFIED_NAME))));
    final Stream<String> parentHashes = lineagePath.getParents().stream().map(p -> String.valueOf(p.getLineagePathHash()));
    final CRC32 crc32 = new CRC32();
    crc32.update(Stream.of(componentIds.stream(), ioIds, parentHashes).reduce(Stream::concat).orElseGet(Stream::empty).sorted().distinct().collect(Collectors.joining(",")).getBytes(StandardCharsets.UTF_8));
    final long hash = crc32.getValue();
    lineagePath.setLineagePathHash(hash);
    final NiFiFlowPath flowPath = new NiFiFlowPath(firstComponentId, hash);
    // E.g, FF1 and FF2 read from dirA were merged, vs FF3 and FF4 read from dirB were merged then passed here, these two should be different queue.
    if (queueBetweenParent != null) {
        queueBetweenParent.set(ATTR_QUALIFIED_NAME, toQualifiedName(nifiFlow.getClusterName(), firstComponentId + "::" + hash));
    }
    // If the same components emitted multiple provenance events consecutively, merge it to come up with a simpler name.
    String previousComponentId = null;
    List<ProvenanceEventRecord> uniqueEventsForName = new ArrayList<>();
    for (ProvenanceEventRecord event : events) {
        if (!event.getComponentId().equals(previousComponentId)) {
            uniqueEventsForName.add(event);
        }
        previousComponentId = event.getComponentId();
    }
    final String pathName = uniqueEventsForName.stream().map(event -> nifiFlow.getProcessComponentName(event.getComponentId(), event::getComponentType)).collect(Collectors.joining(", "));
    flowPath.setName(pathName);
    final NiFiFlowPath staticFlowPath = nifiFlow.findPath(firstComponentId);
    flowPath.setGroupId(staticFlowPath != null ? staticFlowPath.getGroupId() : nifiFlow.getRootProcessGroupId());
    // To defer send notification until entire lineagePath analysis gets finished, just add the instance into a buffer.
    createdFlowPaths.add(new Tuple<>(flowPath, dataSetRefs));
}
Also used : ComputeLineageResult(org.apache.nifi.provenance.lineage.ComputeLineageResult) HashMap(java.util.HashMap) ATTR_QUALIFIED_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME) ArrayList(java.util.ArrayList) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Map(java.util.Map) LineageNode(org.apache.nifi.provenance.lineage.LineageNode) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) NiFiFlow(org.apache.nifi.atlas.NiFiFlow) ATTR_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_NAME) AtlasUtils.toStr(org.apache.nifi.atlas.AtlasUtils.toStr) AtlasUtils.toTypedQualifiedName(org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType) AtlasUtils.toQualifiedName(org.apache.nifi.atlas.AtlasUtils.toQualifiedName) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) Tuple(org.apache.nifi.util.Tuple) DROP(org.apache.nifi.provenance.ProvenanceEventType.DROP) TYPE_NIFI_QUEUE(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE) CRC32(java.util.zip.CRC32) Referenceable(org.apache.atlas.typesystem.Referenceable) Collections(java.util.Collections) LineageNodeType(org.apache.nifi.provenance.lineage.LineageNodeType) CRC32(java.util.zip.CRC32) ArrayList(java.util.ArrayList) Referenceable(org.apache.atlas.typesystem.Referenceable) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) Stream(java.util.stream.Stream)

Example 2 with NiFiFlow

use of org.apache.nifi.atlas.NiFiFlow in project nifi by apache.

the class ReportLineageToAtlas method createNiFiFlow.

private NiFiFlow createNiFiFlow(ReportingContext context, NiFiAtlasClient atlasClient) {
    final ProcessGroupStatus rootProcessGroup = context.getEventAccess().getGroupStatus("root");
    final String flowName = rootProcessGroup.getName();
    final String nifiUrl = context.getProperty(ATLAS_NIFI_URL).evaluateAttributeExpressions().getValue();
    final String clusterName;
    try {
        final String nifiHostName = new URL(nifiUrl).getHost();
        clusterName = clusterResolvers.fromHostNames(nifiHostName);
    } catch (MalformedURLException e) {
        throw new IllegalArgumentException("Failed to parse NiFi URL, " + e.getMessage(), e);
    }
    NiFiFlow existingNiFiFlow = null;
    try {
        // Retrieve Existing NiFiFlow from Atlas.
        existingNiFiFlow = atlasClient.fetchNiFiFlow(rootProcessGroup.getId(), clusterName);
    } catch (AtlasServiceException e) {
        if (ClientResponse.Status.NOT_FOUND.equals(e.getStatus())) {
            getLogger().debug("Existing flow was not found for {}@{}", new Object[] { rootProcessGroup.getId(), clusterName });
        } else {
            throw new RuntimeException("Failed to fetch existing NiFI flow. " + e, e);
        }
    }
    final NiFiFlow nifiFlow = existingNiFiFlow != null ? existingNiFiFlow : new NiFiFlow(rootProcessGroup.getId());
    nifiFlow.setFlowName(flowName);
    nifiFlow.setUrl(nifiUrl);
    nifiFlow.setClusterName(clusterName);
    final NiFiFlowAnalyzer flowAnalyzer = new NiFiFlowAnalyzer();
    flowAnalyzer.analyzeProcessGroup(nifiFlow, rootProcessGroup);
    flowAnalyzer.analyzePaths(nifiFlow);
    return nifiFlow;
}
Also used : ProcessGroupStatus(org.apache.nifi.controller.status.ProcessGroupStatus) NiFiFlowAnalyzer(org.apache.nifi.atlas.NiFiFlowAnalyzer) MalformedURLException(java.net.MalformedURLException) AtlasServiceException(org.apache.atlas.AtlasServiceException) NiFiFlow(org.apache.nifi.atlas.NiFiFlow) URL(java.net.URL)

Example 3 with NiFiFlow

use of org.apache.nifi.atlas.NiFiFlow in project nifi by apache.

the class ReportLineageToAtlas method onTrigger.

@Override
public void onTrigger(ReportingContext context) {
    final String clusterNodeId = context.getClusterNodeIdentifier();
    final boolean isClustered = context.isClustered();
    if (isClustered && isEmpty(clusterNodeId)) {
        // Clustered, but this node's ID is unknown. Not ready for processing yet.
        return;
    }
    // If standalone or being primary node in a NiFi cluster, this node is responsible for doing primary tasks.
    final boolean isResponsibleForPrimaryTasks = !isClustered || getNodeTypeProvider().isPrimary();
    final NiFiAtlasClient atlasClient = createNiFiAtlasClient(context);
    // Create Entity defs in Atlas if there's none yet.
    if (!isTypeDefCreated) {
        try {
            if (isResponsibleForPrimaryTasks) {
                // Create NiFi type definitions in Atlas type system.
                atlasClient.registerNiFiTypeDefs(false);
            } else {
                // Otherwise, just check existence of NiFi type definitions.
                if (!atlasClient.isNiFiTypeDefsRegistered()) {
                    getLogger().debug("NiFi type definitions are not ready in Atlas type system yet.");
                    return;
                }
            }
            isTypeDefCreated = true;
        } catch (AtlasServiceException e) {
            throw new RuntimeException("Failed to check and create NiFi flow type definitions in Atlas due to " + e, e);
        }
    }
    // Regardless of whether being a primary task node, each node has to analyse NiFiFlow.
    // Assuming each node has the same flow definition, that is guaranteed by NiFi cluster management mechanism.
    final NiFiFlow nifiFlow = createNiFiFlow(context, atlasClient);
    if (isResponsibleForPrimaryTasks) {
        try {
            atlasClient.registerNiFiFlow(nifiFlow);
        } catch (AtlasServiceException e) {
            throw new RuntimeException("Failed to register NiFI flow. " + e, e);
        }
    }
    // NOTE: There is a race condition between the primary node and other nodes.
    // If a node notifies an event related to a NiFi component which is not yet created by NiFi primary node,
    // then the notification message will fail due to having a reference to a non-existing entity.
    nifiAtlasHook.setAtlasClient(atlasClient);
    consumeNiFiProvenanceEvents(context, nifiFlow);
}
Also used : AtlasServiceException(org.apache.atlas.AtlasServiceException) NiFiAtlasClient(org.apache.nifi.atlas.NiFiAtlasClient) NiFiFlow(org.apache.nifi.atlas.NiFiFlow)

Aggregations

NiFiFlow (org.apache.nifi.atlas.NiFiFlow)3 AtlasServiceException (org.apache.atlas.AtlasServiceException)2 MalformedURLException (java.net.MalformedURLException)1 URL (java.net.URL)1 StandardCharsets (java.nio.charset.StandardCharsets)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 CRC32 (java.util.zip.CRC32)1 Referenceable (org.apache.atlas.typesystem.Referenceable)1 AtlasUtils.toQualifiedName (org.apache.nifi.atlas.AtlasUtils.toQualifiedName)1 AtlasUtils.toStr (org.apache.nifi.atlas.AtlasUtils.toStr)1 AtlasUtils.toTypedQualifiedName (org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName)1 NiFiAtlasClient (org.apache.nifi.atlas.NiFiAtlasClient)1 NiFiFlowAnalyzer (org.apache.nifi.atlas.NiFiFlowAnalyzer)1