Search in sources :

Example 1 with ATTR_NAME

use of org.apache.nifi.atlas.NiFiTypes.ATTR_NAME in project nifi by apache.

the class CompleteFlowPathLineage method createCompleteFlowPath.

/**
 * Create a new FlowPath from a LineagePath. FlowPaths created by this method will have a hash in its qualified name.
 *
 * <p>This method processes parents first to generate a hash, as parent LineagePath hashes contribute child hash
 * in order to distinguish FlowPaths based on the complete path for a given FlowFile.
 * For example, even if two lineagePaths have identical componentIds/inputs/outputs,
 * if those parents have different inputs, those should be treated as different paths.</p>
 *
 * @param nifiFlow A reference to current NiFiFlow
 * @param lineagePath LineagePath from which NiFiFlowPath and DataSet refs are created and added to the {@code createdFlowPaths}.
 * @param createdFlowPaths A list to buffer created NiFiFlowPaths,
 *                         in order to defer sending notification to Kafka until all parent FlowPath get analyzed.
 */
private void createCompleteFlowPath(NiFiFlow nifiFlow, LineagePath lineagePath, List<Tuple<NiFiFlowPath, DataSetRefs>> createdFlowPaths) {
    final List<ProvenanceEventRecord> events = lineagePath.getEvents();
    Collections.reverse(events);
    final List<String> componentIds = events.stream().map(ProvenanceEventRecord::getComponentId).collect(Collectors.toList());
    final String firstComponentId = events.get(0).getComponentId();
    final DataSetRefs dataSetRefs = lineagePath.getRefs();
    // Process parents first.
    Referenceable queueBetweenParent = null;
    if (!lineagePath.getParents().isEmpty()) {
        // Add queue between this lineage path and parent.
        queueBetweenParent = new Referenceable(TYPE_NIFI_QUEUE);
        // The first event knows why this lineage has parents, e.g. FORK or JOIN.
        final String firstEventType = events.get(0).getEventType().name();
        queueBetweenParent.set(ATTR_NAME, firstEventType);
        dataSetRefs.addInput(queueBetweenParent);
        for (LineagePath parent : lineagePath.getParents()) {
            parent.getRefs().addOutput(queueBetweenParent);
            createCompleteFlowPath(nifiFlow, parent, createdFlowPaths);
        }
    }
    // Create a variant path.
    // Calculate a hash from component_ids and input and output resource ids.
    final Stream<String> ioIds = Stream.concat(dataSetRefs.getInputs().stream(), dataSetRefs.getOutputs().stream()).map(ref -> toTypedQualifiedName(ref.getTypeName(), toStr(ref.get(ATTR_QUALIFIED_NAME))));
    final Stream<String> parentHashes = lineagePath.getParents().stream().map(p -> String.valueOf(p.getLineagePathHash()));
    final CRC32 crc32 = new CRC32();
    crc32.update(Stream.of(componentIds.stream(), ioIds, parentHashes).reduce(Stream::concat).orElseGet(Stream::empty).sorted().distinct().collect(Collectors.joining(",")).getBytes(StandardCharsets.UTF_8));
    final long hash = crc32.getValue();
    lineagePath.setLineagePathHash(hash);
    final NiFiFlowPath flowPath = new NiFiFlowPath(firstComponentId, hash);
    // E.g, FF1 and FF2 read from dirA were merged, vs FF3 and FF4 read from dirB were merged then passed here, these two should be different queue.
    if (queueBetweenParent != null) {
        queueBetweenParent.set(ATTR_QUALIFIED_NAME, toQualifiedName(nifiFlow.getClusterName(), firstComponentId + "::" + hash));
    }
    // If the same components emitted multiple provenance events consecutively, merge it to come up with a simpler name.
    String previousComponentId = null;
    List<ProvenanceEventRecord> uniqueEventsForName = new ArrayList<>();
    for (ProvenanceEventRecord event : events) {
        if (!event.getComponentId().equals(previousComponentId)) {
            uniqueEventsForName.add(event);
        }
        previousComponentId = event.getComponentId();
    }
    final String pathName = uniqueEventsForName.stream().map(event -> nifiFlow.getProcessComponentName(event.getComponentId(), event::getComponentType)).collect(Collectors.joining(", "));
    flowPath.setName(pathName);
    final NiFiFlowPath staticFlowPath = nifiFlow.findPath(firstComponentId);
    flowPath.setGroupId(staticFlowPath != null ? staticFlowPath.getGroupId() : nifiFlow.getRootProcessGroupId());
    // To defer send notification until entire lineagePath analysis gets finished, just add the instance into a buffer.
    createdFlowPaths.add(new Tuple<>(flowPath, dataSetRefs));
}
Also used : ComputeLineageResult(org.apache.nifi.provenance.lineage.ComputeLineageResult) HashMap(java.util.HashMap) ATTR_QUALIFIED_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME) ArrayList(java.util.ArrayList) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Map(java.util.Map) LineageNode(org.apache.nifi.provenance.lineage.LineageNode) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) NiFiFlow(org.apache.nifi.atlas.NiFiFlow) ATTR_NAME(org.apache.nifi.atlas.NiFiTypes.ATTR_NAME) AtlasUtils.toStr(org.apache.nifi.atlas.AtlasUtils.toStr) AtlasUtils.toTypedQualifiedName(org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName) ProvenanceEventType(org.apache.nifi.provenance.ProvenanceEventType) AtlasUtils.toQualifiedName(org.apache.nifi.atlas.AtlasUtils.toQualifiedName) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) AnalysisContext(org.apache.nifi.atlas.provenance.AnalysisContext) Objects(java.util.Objects) List(java.util.List) Stream(java.util.stream.Stream) Tuple(org.apache.nifi.util.Tuple) DROP(org.apache.nifi.provenance.ProvenanceEventType.DROP) TYPE_NIFI_QUEUE(org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE) CRC32(java.util.zip.CRC32) Referenceable(org.apache.atlas.typesystem.Referenceable) Collections(java.util.Collections) LineageNodeType(org.apache.nifi.provenance.lineage.LineageNodeType) CRC32(java.util.zip.CRC32) ArrayList(java.util.ArrayList) Referenceable(org.apache.atlas.typesystem.Referenceable) NiFiFlowPath(org.apache.nifi.atlas.NiFiFlowPath) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) DataSetRefs(org.apache.nifi.atlas.provenance.DataSetRefs) Stream(java.util.stream.Stream)

Aggregations

StandardCharsets (java.nio.charset.StandardCharsets)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Collectors (java.util.stream.Collectors)1 Stream (java.util.stream.Stream)1 CRC32 (java.util.zip.CRC32)1 Referenceable (org.apache.atlas.typesystem.Referenceable)1 AtlasUtils.toQualifiedName (org.apache.nifi.atlas.AtlasUtils.toQualifiedName)1 AtlasUtils.toStr (org.apache.nifi.atlas.AtlasUtils.toStr)1 AtlasUtils.toTypedQualifiedName (org.apache.nifi.atlas.AtlasUtils.toTypedQualifiedName)1 NiFiFlow (org.apache.nifi.atlas.NiFiFlow)1 NiFiFlowPath (org.apache.nifi.atlas.NiFiFlowPath)1 ATTR_NAME (org.apache.nifi.atlas.NiFiTypes.ATTR_NAME)1 ATTR_QUALIFIED_NAME (org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME)1 TYPE_NIFI_QUEUE (org.apache.nifi.atlas.NiFiTypes.TYPE_NIFI_QUEUE)1 AnalysisContext (org.apache.nifi.atlas.provenance.AnalysisContext)1