use of org.apache.nifi.atlas.NiFiFlow in project nifi by apache.
the class CompleteFlowPathLineage method createCompleteFlowPath.
/**
* Create a new FlowPath from a LineagePath. FlowPaths created by this method will have a hash in its qualified name.
*
* <p>This method processes parents first to generate a hash, as parent LineagePath hashes contribute child hash
* in order to distinguish FlowPaths based on the complete path for a given FlowFile.
* For example, even if two lineagePaths have identical componentIds/inputs/outputs,
* if those parents have different inputs, those should be treated as different paths.</p>
*
* @param nifiFlow A reference to current NiFiFlow
* @param lineagePath LineagePath from which NiFiFlowPath and DataSet refs are created and added to the {@code createdFlowPaths}.
* @param createdFlowPaths A list to buffer created NiFiFlowPaths,
* in order to defer sending notification to Kafka until all parent FlowPath get analyzed.
*/
private void createCompleteFlowPath(NiFiFlow nifiFlow, LineagePath lineagePath, List<Tuple<NiFiFlowPath, DataSetRefs>> createdFlowPaths) {
final List<ProvenanceEventRecord> events = lineagePath.getEvents();
Collections.reverse(events);
final List<String> componentIds = events.stream().map(ProvenanceEventRecord::getComponentId).collect(Collectors.toList());
final String firstComponentId = events.get(0).getComponentId();
final DataSetRefs dataSetRefs = lineagePath.getRefs();
// Process parents first.
Referenceable queueBetweenParent = null;
if (!lineagePath.getParents().isEmpty()) {
// Add queue between this lineage path and parent.
queueBetweenParent = new Referenceable(TYPE_NIFI_QUEUE);
// The first event knows why this lineage has parents, e.g. FORK or JOIN.
final String firstEventType = events.get(0).getEventType().name();
queueBetweenParent.set(ATTR_NAME, firstEventType);
dataSetRefs.addInput(queueBetweenParent);
for (LineagePath parent : lineagePath.getParents()) {
parent.getRefs().addOutput(queueBetweenParent);
createCompleteFlowPath(nifiFlow, parent, createdFlowPaths);
}
}
// Create a variant path.
// Calculate a hash from component_ids and input and output resource ids.
final Stream<String> ioIds = Stream.concat(dataSetRefs.getInputs().stream(), dataSetRefs.getOutputs().stream()).map(ref -> toTypedQualifiedName(ref.getTypeName(), toStr(ref.get(ATTR_QUALIFIED_NAME))));
final Stream<String> parentHashes = lineagePath.getParents().stream().map(p -> String.valueOf(p.getLineagePathHash()));
final CRC32 crc32 = new CRC32();
crc32.update(Stream.of(componentIds.stream(), ioIds, parentHashes).reduce(Stream::concat).orElseGet(Stream::empty).sorted().distinct().collect(Collectors.joining(",")).getBytes(StandardCharsets.UTF_8));
final long hash = crc32.getValue();
lineagePath.setLineagePathHash(hash);
final NiFiFlowPath flowPath = new NiFiFlowPath(firstComponentId, hash);
// E.g, FF1 and FF2 read from dirA were merged, vs FF3 and FF4 read from dirB were merged then passed here, these two should be different queue.
if (queueBetweenParent != null) {
queueBetweenParent.set(ATTR_QUALIFIED_NAME, toQualifiedName(nifiFlow.getClusterName(), firstComponentId + "::" + hash));
}
// If the same components emitted multiple provenance events consecutively, merge it to come up with a simpler name.
String previousComponentId = null;
List<ProvenanceEventRecord> uniqueEventsForName = new ArrayList<>();
for (ProvenanceEventRecord event : events) {
if (!event.getComponentId().equals(previousComponentId)) {
uniqueEventsForName.add(event);
}
previousComponentId = event.getComponentId();
}
final String pathName = uniqueEventsForName.stream().map(event -> nifiFlow.getProcessComponentName(event.getComponentId(), event::getComponentType)).collect(Collectors.joining(", "));
flowPath.setName(pathName);
final NiFiFlowPath staticFlowPath = nifiFlow.findPath(firstComponentId);
flowPath.setGroupId(staticFlowPath != null ? staticFlowPath.getGroupId() : nifiFlow.getRootProcessGroupId());
// To defer send notification until entire lineagePath analysis gets finished, just add the instance into a buffer.
createdFlowPaths.add(new Tuple<>(flowPath, dataSetRefs));
}
use of org.apache.nifi.atlas.NiFiFlow in project nifi by apache.
the class ReportLineageToAtlas method createNiFiFlow.
private NiFiFlow createNiFiFlow(ReportingContext context, NiFiAtlasClient atlasClient) {
final ProcessGroupStatus rootProcessGroup = context.getEventAccess().getGroupStatus("root");
final String flowName = rootProcessGroup.getName();
final String nifiUrl = context.getProperty(ATLAS_NIFI_URL).evaluateAttributeExpressions().getValue();
final String clusterName;
try {
final String nifiHostName = new URL(nifiUrl).getHost();
clusterName = clusterResolvers.fromHostNames(nifiHostName);
} catch (MalformedURLException e) {
throw new IllegalArgumentException("Failed to parse NiFi URL, " + e.getMessage(), e);
}
NiFiFlow existingNiFiFlow = null;
try {
// Retrieve Existing NiFiFlow from Atlas.
existingNiFiFlow = atlasClient.fetchNiFiFlow(rootProcessGroup.getId(), clusterName);
} catch (AtlasServiceException e) {
if (ClientResponse.Status.NOT_FOUND.equals(e.getStatus())) {
getLogger().debug("Existing flow was not found for {}@{}", new Object[] { rootProcessGroup.getId(), clusterName });
} else {
throw new RuntimeException("Failed to fetch existing NiFI flow. " + e, e);
}
}
final NiFiFlow nifiFlow = existingNiFiFlow != null ? existingNiFiFlow : new NiFiFlow(rootProcessGroup.getId());
nifiFlow.setFlowName(flowName);
nifiFlow.setUrl(nifiUrl);
nifiFlow.setClusterName(clusterName);
final NiFiFlowAnalyzer flowAnalyzer = new NiFiFlowAnalyzer();
flowAnalyzer.analyzeProcessGroup(nifiFlow, rootProcessGroup);
flowAnalyzer.analyzePaths(nifiFlow);
return nifiFlow;
}
use of org.apache.nifi.atlas.NiFiFlow in project nifi by apache.
the class ReportLineageToAtlas method onTrigger.
@Override
public void onTrigger(ReportingContext context) {
final String clusterNodeId = context.getClusterNodeIdentifier();
final boolean isClustered = context.isClustered();
if (isClustered && isEmpty(clusterNodeId)) {
// Clustered, but this node's ID is unknown. Not ready for processing yet.
return;
}
// If standalone or being primary node in a NiFi cluster, this node is responsible for doing primary tasks.
final boolean isResponsibleForPrimaryTasks = !isClustered || getNodeTypeProvider().isPrimary();
final NiFiAtlasClient atlasClient = createNiFiAtlasClient(context);
// Create Entity defs in Atlas if there's none yet.
if (!isTypeDefCreated) {
try {
if (isResponsibleForPrimaryTasks) {
// Create NiFi type definitions in Atlas type system.
atlasClient.registerNiFiTypeDefs(false);
} else {
// Otherwise, just check existence of NiFi type definitions.
if (!atlasClient.isNiFiTypeDefsRegistered()) {
getLogger().debug("NiFi type definitions are not ready in Atlas type system yet.");
return;
}
}
isTypeDefCreated = true;
} catch (AtlasServiceException e) {
throw new RuntimeException("Failed to check and create NiFi flow type definitions in Atlas due to " + e, e);
}
}
// Regardless of whether being a primary task node, each node has to analyse NiFiFlow.
// Assuming each node has the same flow definition, that is guaranteed by NiFi cluster management mechanism.
final NiFiFlow nifiFlow = createNiFiFlow(context, atlasClient);
if (isResponsibleForPrimaryTasks) {
try {
atlasClient.registerNiFiFlow(nifiFlow);
} catch (AtlasServiceException e) {
throw new RuntimeException("Failed to register NiFI flow. " + e, e);
}
}
// NOTE: There is a race condition between the primary node and other nodes.
// If a node notifies an event related to a NiFi component which is not yet created by NiFi primary node,
// then the notification message will fail due to having a reference to a non-existing entity.
nifiAtlasHook.setAtlasClient(atlasClient);
consumeNiFiProvenanceEvents(context, nifiFlow);
}
Aggregations