Search in sources :

Example 1 with NiFiFlowPath

use of org.apache.nifi.atlas.NiFiFlowPath in project nifi by apache.

the class AbstractLineageStrategy method addDataSetRefs.

protected void addDataSetRefs(NiFiFlow nifiFlow, DataSetRefs refs) {
    final Set<NiFiFlowPath> flowPaths = refs.getComponentIds().stream().map(componentId -> {
        final NiFiFlowPath flowPath = nifiFlow.findPath(componentId);
        if (flowPath == null) {
            logger.warn("FlowPath for {} was not found.", componentId);
        }
        return flowPath;
    }).filter(Objects::nonNull).collect(Collectors.toSet());
    addDataSetRefs(nifiFlow, flowPaths, refs);
}
Also used : NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath)

Example 2 with NiFiFlowPath

use of org.apache.nifi.atlas.NiFiFlowPath in project nifi by apache.

the class AbstractLineageStrategy method addDataSetRefs.

protected void addDataSetRefs(NiFiFlow nifiFlow, Set<NiFiFlowPath> flowPaths, DataSetRefs refs) {
    // create reference to NiFi flow path.
    final Referenceable flowRef = toReferenceable(nifiFlow);
    final String clusterName = nifiFlow.getClusterName();
    final String url = nifiFlow.getUrl();
    for (NiFiFlowPath flowPath : flowPaths) {
        final Referenceable flowPathRef = toReferenceable(flowPath, flowRef, clusterName, url);
        addDataSetRefs(refs, flowPathRef);
    }
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath)

Example 3 with NiFiFlowPath

use of org.apache.nifi.atlas.NiFiFlowPath in project nifi by apache.

the class CompleteFlowPathLineage method processEvent.

@Override
public void processEvent(AnalysisContext analysisContext, NiFiFlow nifiFlow, ProvenanceEventRecord event) {
    if (!ProvenanceEventType.DROP.equals(event.getEventType())) {
        return;
    }
    final ComputeLineageResult lineage = analysisContext.queryLineage(event.getEventId());
    // Construct a tree model to traverse backwards.
    final Map<String, List<LineageNode>> lineageTree = new HashMap<>();
    analyzeLineageTree(lineage, lineageTree);
    final LineagePath lineagePath = new LineagePath();
    extractLineagePaths(analysisContext, lineageTree, lineagePath, event);
    analyzeLineagePath(analysisContext, lineagePath);
    // Input and output data set are both required to report lineage.
    List<Tuple<NiFiFlowPath, DataSetRefs>> createdFlowPaths = new ArrayList<>();
    if (lineagePath.isComplete()) {
        createCompleteFlowPath(nifiFlow, lineagePath, createdFlowPaths);
        for (Tuple<NiFiFlowPath, DataSetRefs> createdFlowPath : createdFlowPaths) {
            final NiFiFlowPath flowPath = createdFlowPath.getKey();
            createEntity(toReferenceable(flowPath, nifiFlow));
            addDataSetRefs(nifiFlow, Collections.singleton(flowPath), createdFlowPath.getValue());
        }
        createdFlowPaths.clear();
    }
}
Also used : ComputeLineageResult(org.apache.nifi.provenance.lineage.ComputeLineageResult) HashMap(java.util.HashMap) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) Tuple(org.apache.nifi.util.Tuple)

Example 4 with NiFiFlowPath

use of org.apache.nifi.atlas.NiFiFlowPath in project nifi by apache.

the class CompleteFlowPathLineage method createCompleteFlowPath.

/**
 * Create a new FlowPath from a LineagePath. FlowPaths created by this method will have a hash in its qualified name.
 *
 * <p>This method processes parents first to generate a hash, as parent LineagePath hashes contribute child hash
 * in order to distinguish FlowPaths based on the complete path for a given FlowFile.
 * For example, even if two lineagePaths have identical componentIds/inputs/outputs,
 * if those parents have different inputs, those should be treated as different paths.</p>
 *
 * @param nifiFlow A reference to current NiFiFlow
 * @param lineagePath LineagePath from which NiFiFlowPath and DataSet refs are created and added to the {@code createdFlowPaths}.
 * @param createdFlowPaths A list to buffer created NiFiFlowPaths,
 *                         in order to defer sending notification to Kafka until all parent FlowPath get analyzed.
 */
private void createCompleteFlowPath(NiFiFlow nifiFlow, LineagePath lineagePath, List<Tuple<NiFiFlowPath, DataSetRefs>> createdFlowPaths) {
    final List<ProvenanceEventRecord> events = lineagePath.getEvents();
    Collections.reverse(events);
    final List<String> componentIds = events.stream().map(ProvenanceEventRecord::getComponentId).collect(Collectors.toList());
    final String firstComponentId = events.get(0).getComponentId();
    final DataSetRefs dataSetRefs = lineagePath.getRefs();
    // Process parents first.
    Referenceable queueBetweenParent = null;
    if (!lineagePath.getParents().isEmpty()) {
        // Add queue between this lineage path and parent.
        queueBetweenParent = new Referenceable(TYPE_NIFI_QUEUE);
        // The first event knows why this lineage has parents, e.g. FORK or JOIN.
        final String firstEventType = events.get(0).getEventType().name();
        queueBetweenParent.set(ATTR_NAME, firstEventType);
        dataSetRefs.addInput(queueBetweenParent);
        for (LineagePath parent : lineagePath.getParents()) {
            parent.getRefs().addOutput(queueBetweenParent);
            createCompleteFlowPath(nifiFlow, parent, createdFlowPaths);
        }
    }
    // Create a variant path.
    // Calculate a hash from component_ids and input and output resource ids.
    final Stream<String> ioIds = Stream.concat(dataSetRefs.getInputs().stream(), dataSetRefs.getOutputs().stream()).map(ref -> toTypedQualifiedName(ref.getTypeName(), toStr(ref.get(ATTR_QUALIFIED_NAME))));
    final Stream<String> parentHashes = lineagePath.getParents().stream().map(p -> String.valueOf(p.getLineagePathHash()));
    final CRC32 crc32 = new CRC32();
    crc32.update(Stream.of(componentIds.stream(), ioIds, parentHashes).reduce(Stream::concat).orElseGet(Stream::empty).sorted().distinct().collect(Collectors.joining(",")).getBytes(StandardCharsets.UTF_8));
    final long hash = crc32.getValue();
    lineagePath.setLineagePathHash(hash);
    final NiFiFlowPath flowPath = new NiFiFlowPath(firstComponentId, hash);
    // E.g, FF1 and FF2 read from dirA were merged, vs FF3 and FF4 read from dirB were merged then passed here, these two should be different queue.
    if (queueBetweenParent != null) {
        queueBetweenParent.set(ATTR_QUALIFIED_NAME, toQualifiedName(nifiFlow.getClusterName(), firstComponentId + "::" + hash));
    }
    // If the same components emitted multiple provenance events consecutively, merge it to come up with a simpler name.
    String previousComponentId = null;
    List<ProvenanceEventRecord> uniqueEventsForName = new ArrayList<>();
    for (ProvenanceEventRecord event : events) {
        if (!event.getComponentId().equals(previousComponentId)) {
            uniqueEventsForName.add(event);
        }
        previousComponentId = event.getComponentId();
    }
    final String pathName = uniqueEventsForName.stream().map(event -> nifiFlow.getProcessComponentName(event.getComponentId(), event::getComponentType)).collect(Collectors.joining(", "));
    flowPath.setName(pathName);
    final NiFiFlowPath staticFlowPath = nifiFlow.findPath(firstComponentId);
    flowPath.setGroupId(staticFlowPath != null ? staticFlowPath.getGroupId() : nifiFlow.getRootProcessGroupId());
    // To defer send notification until entire lineagePath analysis gets finished, just add the instance into a buffer.
    createdFlowPaths.add(new Tuple<>(flowPath, dataSetRefs));
}
Also used : ComputeLineageResult(org.apache.nifi.provenance.lineage.ComputeLineageResult) HashMap(java.util.HashMap) ATTR_QUALIFIED_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME) ArrayList(java.util.ArrayList) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Map(java.util.Map) LineageNode(org.apache.nifi.provenance.lineage.LineageNode) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) NiFiFlow(org.apache.nifi.atlas.NiFiFlow) ATTR_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_NAME) AtlasUtils.toStr(org.apache.nifi.atlas.AtlasUtils.toStr) AtlasUtils.toTypedQualifiedName(org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType) AtlasUtils.toQualifiedName(org.apache.nifi.atlas.AtlasUtils.toQualifiedName) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) Tuple(org.apache.nifi.util.Tuple) DROP(org.apache.nifi.provenance.ProvenanceEventType.DROP) TYPE_NIFI_QUEUE(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE) CRC32(java.util.zip.CRC32) Referenceable(org.apache.atlas.typesystem.Referenceable) Collections(java.util.Collections) LineageNodeType(org.apache.nifi.provenance.lineage.LineageNodeType) CRC32(java.util.zip.CRC32) ArrayList(java.util.ArrayList) Referenceable(org.apache.atlas.typesystem.Referenceable) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) Stream(java.util.stream.Stream)

Example 5 with NiFiFlowPath

use of org.apache.nifi.atlas.NiFiFlowPath in project nifi by apache.

the class SimpleFlowPathLineage method processRemotePortEvent.

/**
 * Create a flow_path entity corresponding to the target RemoteGroupPort when a SEND/RECEIVE event are received.
 * Because such entity can not be created in advance while analyzing flow statically,
 * as ReportingTask can not determine whether a component id is a RemoteGroupPort,
 * since connectionStatus is the only available information in ReportingContext.
 * ConnectionStatus only knows component id, component type is unknown.
 * For example, there is no difference to tell if a connected component is a funnel or a RemoteGroupPort.
 */
private void processRemotePortEvent(AnalysisContext analysisContext, NiFiFlow nifiFlow, ProvenanceEventRecord event, DataSetRefs analyzedRefs) {
    final boolean isRemoteInputPort = "Remote Input Port".equals(event.getComponentType());
    // Create a RemoteInputPort Process.
    // event.getComponentId returns UUID for RemoteGroupPort as a client of S2S, and it's different from a remote port UUID (portDataSetid).
    // See NIFI-4571 for detail.
    final Referenceable remotePortDataSet = isRemoteInputPort ? analyzedRefs.getOutputs().iterator().next() : analyzedRefs.getInputs().iterator().next();
    final String portProcessId = event.getComponentId();
    final NiFiFlowPath remotePortProcess = new NiFiFlowPath(portProcessId);
    remotePortProcess.setName(event.getComponentType());
    remotePortProcess.addProcessor(portProcessId);
    // That is only possible by calling lineage API.
    if (isRemoteInputPort) {
        final ProvenanceEventRecord previousEvent = findPreviousProvenanceEvent(analysisContext, event);
        if (previousEvent == null) {
            logger.warn("Previous event was not found: {}", new Object[] { event });
            return;
        }
        // Set groupId from incoming connection if available.
        final List<ConnectionStatus> incomingConnections = nifiFlow.getIncomingConnections(portProcessId);
        if (incomingConnections == null || incomingConnections.isEmpty()) {
            logger.warn("Incoming relationship was not found: {}", new Object[] { event });
            return;
        }
        final ConnectionStatus connection = incomingConnections.get(0);
        remotePortProcess.setGroupId(connection.getGroupId());
        final Referenceable remotePortProcessRef = toReferenceable(remotePortProcess, nifiFlow);
        createEntity(remotePortProcessRef);
        // Create a queue.
        Referenceable queueFromStaticFlowPathToRemotePortProcess = new Referenceable(TYPE_NIFI_QUEUE);
        queueFromStaticFlowPathToRemotePortProcess.set(ATTR_NAME, "queue");
        queueFromStaticFlowPathToRemotePortProcess.set(ATTR_QUALIFIED_NAME, nifiFlow.toQualifiedName(portProcessId));
        // Create lineage: Static flow_path -> queue
        DataSetRefs staticFlowPathRefs = new DataSetRefs(previousEvent.getComponentId());
        staticFlowPathRefs.addOutput(queueFromStaticFlowPathToRemotePortProcess);
        addDataSetRefs(nifiFlow, staticFlowPathRefs);
        // Create lineage: Queue -> RemoteInputPort process -> RemoteInputPort dataSet
        DataSetRefs remotePortRefs = new DataSetRefs(portProcessId);
        remotePortRefs.addInput(queueFromStaticFlowPathToRemotePortProcess);
        remotePortRefs.addOutput(remotePortDataSet);
        addDataSetRefs(remotePortRefs, remotePortProcessRef);
    } else {
        // For RemoteOutputPort, it's possible that multiple processors are connected.
        // In that case, the received FlowFile is cloned and passed to each connection.
        // So we need to create multiple DataSetRefs.
        final List<ConnectionStatus> connections = nifiFlow.getOutgoingConnections(portProcessId);
        if (connections == null || connections.isEmpty()) {
            logger.warn("Incoming connection was not found: {}", new Object[] { event });
            return;
        }
        // Set group id from outgoing connection if available.
        remotePortProcess.setGroupId(connections.get(0).getGroupId());
        final Referenceable remotePortProcessRef = toReferenceable(remotePortProcess, nifiFlow);
        createEntity(remotePortProcessRef);
        // Create lineage: RemoteOutputPort dataSet -> RemoteOutputPort process
        DataSetRefs remotePortRefs = new DataSetRefs(portProcessId);
        remotePortRefs.addInput(remotePortDataSet);
        addDataSetRefs(remotePortRefs, remotePortProcessRef);
        for (ConnectionStatus connection : connections) {
            final String destinationId = connection.getDestinationId();
            final NiFiFlowPath destFlowPath = nifiFlow.findPath(destinationId);
            if (destFlowPath == null) {
                // as a queue will be created by the connected RemoteInputPort to connect this RemoteOutputPort.
                continue;
            }
            // Create a queue.
            Referenceable queueFromRemotePortProcessToStaticFlowPath = new Referenceable(TYPE_NIFI_QUEUE);
            queueFromRemotePortProcessToStaticFlowPath.set(ATTR_NAME, "queue");
            queueFromRemotePortProcessToStaticFlowPath.set(ATTR_QUALIFIED_NAME, nifiFlow.toQualifiedName(destinationId));
            // Create lineage: Queue -> Static flow_path
            DataSetRefs staticFlowPathRefs = new DataSetRefs(destinationId);
            staticFlowPathRefs.addInput(queueFromRemotePortProcessToStaticFlowPath);
            addDataSetRefs(nifiFlow, staticFlowPathRefs);
            // Create lineage: RemoteOutputPort dataSet -> RemoteOutputPort process -> Queue
            remotePortRefs.addOutput(queueFromRemotePortProcessToStaticFlowPath);
            addDataSetRefs(remotePortRefs, remotePortProcessRef);
        }
        // Add RemoteOutputPort process, so that it can be found even if it is connected to RemoteInputPort directory without any processor in between.
        nifiFlow.getFlowPaths().put(remotePortProcess.getId(), remotePortProcess);
    }
}
Also used : Referenceable(org.apache.atlas.typesystem.Referenceable) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) ConnectionStatus(org.apache.nifi.controller.status.ConnectionStatus)

Aggregations

NiFiFlowPath (org.apache.nifi.atlas.NiFiFlowPath)5 Referenceable (org.apache.atlas.typesystem.Referenceable)3 DataSetRefs (org.apache.nifi.atlas.provenance.DataSetRefs)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 List (java.util.List)2 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)2 ComputeLineageResult (org.apache.nifi.provenance.lineage.ComputeLineageResult)2 Tuple (org.apache.nifi.util.Tuple)2 StandardCharsets (java.nio.charset.StandardCharsets)1 Collections (java.util.Collections)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 CRC32 (java.util.zip.CRC32)1 AtlasUtils.toQualifiedName (org.apache.nifi.atlas.AtlasUtils.toQualifiedName)1 AtlasUtils.toStr (org.apache.nifi.atlas.AtlasUtils.toStr)1 AtlasUtils.toTypedQualifiedName (org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName)1 NiFiFlow (org.apache.nifi.atlas.NiFiFlow)1