use of org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME in project nifi by apache.
the class NiFiAtlasClient method registerDataSetEntities.
/**
* Register DataSet within specified NiFiFlow.
* @return Set of registered Atlas type names and its remaining entities without deleted ones.
*/
private Map<String, List<AtlasEntity>> registerDataSetEntities(final NiFiFlow nifiFlow) throws AtlasServiceException {
final Map<NiFiFlow.EntityChangeType, List<AtlasEntity>> changedEntities = nifiFlow.getChangedDataSetEntities();
if (changedEntities.containsKey(CREATED)) {
final List<AtlasEntity> createdEntities = changedEntities.get(CREATED);
final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(createdEntities);
final EntityMutationResponse mutationResponse = atlasClient.createEntities(atlasEntities);
logger.debug("Created DataSet entities mutation response={}", mutationResponse);
final Map<String, String> guidAssignments = mutationResponse.getGuidAssignments();
for (AtlasEntity entity : createdEntities) {
final String guid = guidAssignments.get(entity.getGuid());
final String qualifiedName = toStr(entity.getAttribute(ATTR_QUALIFIED_NAME));
if (StringUtils.isEmpty(guid)) {
logger.warn("GUID was not assigned for {}::{} for some reason.", entity.getTypeName(), qualifiedName);
continue;
}
final Map<AtlasObjectId, AtlasEntity> entityMap;
switch(entity.getTypeName()) {
case TYPE_NIFI_INPUT_PORT:
entityMap = nifiFlow.getRootInputPortEntities();
break;
case TYPE_NIFI_OUTPUT_PORT:
entityMap = nifiFlow.getRootOutputPortEntities();
break;
case TYPE_NIFI_QUEUE:
entityMap = nifiFlow.getQueues();
break;
default:
throw new RuntimeException(entity.getTypeName() + " is not expected.");
}
// In order to replace the id, remove current id which does not have GUID.
findIdByQualifiedName(entityMap.keySet(), qualifiedName).ifPresent(entityMap::remove);
entity.setGuid(guid);
final AtlasObjectId idWithGuid = new AtlasObjectId(guid, entity.getTypeName(), Collections.singletonMap(ATTR_QUALIFIED_NAME, qualifiedName));
entityMap.put(idWithGuid, entity);
}
}
if (changedEntities.containsKey(UPDATED)) {
final List<AtlasEntity> updatedEntities = changedEntities.get(UPDATED);
final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(updatedEntities);
final EntityMutationResponse mutationResponse = atlasClient.updateEntities(atlasEntities);
logger.debug("Updated DataSet entities mutation response={}", mutationResponse);
}
final Set<String> changedTypeNames = changedEntities.entrySet().stream().filter(entry -> !AS_IS.equals(entry.getKey())).flatMap(entry -> entry.getValue().stream()).map(AtlasEntity::getTypeName).collect(Collectors.toSet());
// NOTE: Cascading DELETE will be performed when parent NiFiFlow is updated without removed DataSet entities.
final Map<String, List<AtlasEntity>> remainingEntitiesByType = changedEntities.entrySet().stream().filter(entry -> !DELETED.equals(entry.getKey())).flatMap(entry -> entry.getValue().stream()).filter(entity -> changedTypeNames.contains(entity.getTypeName())).collect(Collectors.groupingBy(AtlasEntity::getTypeName));
// If all entities are deleted for a type (e.g. nifi_intput_port), then remainingEntitiesByType will not contain such key.
// If the returning map does not contain anything for a type, then the corresponding attribute will not be updated.
// To empty an attribute when all of its elements are deleted, add empty list for a type.
changedTypeNames.forEach(changedTypeName -> remainingEntitiesByType.computeIfAbsent(changedTypeName, k -> Collections.emptyList()));
return remainingEntitiesByType;
}
use of org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME in project nifi by apache.
the class NiFiAtlasClient method registerFlowPathEntities.
private Set<AtlasObjectId> registerFlowPathEntities(final NiFiFlow nifiFlow) throws AtlasServiceException {
final Map<NiFiFlow.EntityChangeType, List<AtlasEntity>> changedEntities = nifiFlow.getChangedFlowPathEntities();
if (changedEntities.containsKey(CREATED)) {
final List<AtlasEntity> createdEntities = changedEntities.get(CREATED);
final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(createdEntities);
final EntityMutationResponse mutationResponse = atlasClient.createEntities(atlasEntities);
logger.debug("Created FlowPath entities mutation response={}", mutationResponse);
final Map<String, String> guidAssignments = mutationResponse.getGuidAssignments();
createdEntities.forEach(entity -> {
final String guid = entity.getGuid();
entity.setGuid(guidAssignments.get(guid));
final String pathId = getComponentIdFromQualifiedName(toStr(entity.getAttribute(ATTR_QUALIFIED_NAME)));
final NiFiFlowPath path = nifiFlow.getFlowPaths().get(pathId);
path.setExEntity(entity);
});
}
if (changedEntities.containsKey(UPDATED)) {
final List<AtlasEntity> updatedEntities = changedEntities.get(UPDATED);
final AtlasEntity.AtlasEntitiesWithExtInfo atlasEntities = new AtlasEntity.AtlasEntitiesWithExtInfo(updatedEntities);
final EntityMutationResponse mutationResponse = atlasClient.updateEntities(atlasEntities);
logger.debug("Updated FlowPath entities mutation response={}", mutationResponse);
updatedEntities.forEach(entity -> {
final String pathId = getComponentIdFromQualifiedName(toStr(entity.getAttribute(ATTR_QUALIFIED_NAME)));
final NiFiFlowPath path = nifiFlow.getFlowPaths().get(pathId);
path.setExEntity(entity);
});
}
if (NiFiFlow.EntityChangeType.containsChange(changedEntities.keySet())) {
return changedEntities.entrySet().stream().filter(entry -> !DELETED.equals(entry.getKey())).flatMap(entry -> entry.getValue().stream()).map(path -> new AtlasObjectId(path.getGuid(), TYPE_NIFI_FLOW_PATH, Collections.singletonMap(ATTR_QUALIFIED_NAME, path.getAttribute(ATTR_QUALIFIED_NAME)))).collect(Collectors.toSet());
}
return null;
}
use of org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME in project nifi by apache.
the class NiFiAtlasHook method commitMessages.
public void commitMessages() {
final Map<Boolean, List<HookNotificationMessage>> partialNiFiFlowPathUpdateAndOthers = messages.stream().collect(Collectors.groupingBy(msg -> ENTITY_PARTIAL_UPDATE.equals(msg.getType()) && TYPE_NIFI_FLOW_PATH.equals(((EntityPartialUpdateRequest) msg).getTypeName()) && ATTR_QUALIFIED_NAME.equals(((EntityPartialUpdateRequest) msg).getAttribute())));
final List<HookNotificationMessage> otherMessages = partialNiFiFlowPathUpdateAndOthers.computeIfAbsent(false, k -> Collections.emptyList());
final List<HookNotificationMessage> partialNiFiFlowPathUpdates = partialNiFiFlowPathUpdateAndOthers.computeIfAbsent(true, k -> Collections.emptyList());
logger.info("Commit messages: {} partialNiFiFlowPathUpdate and {} other messages.", partialNiFiFlowPathUpdates.size(), otherMessages.size());
final Metrics metrics = new Metrics();
metrics.totalMessages = messages.size();
metrics.partialNiFiFlowPathUpdates = partialNiFiFlowPathUpdates.size();
metrics.otherMessages = otherMessages.size();
try {
// Notify other messages first.
notifyEntities(otherMessages);
// De-duplicate messages.
final List<HookNotificationMessage> deduplicatedMessages = partialNiFiFlowPathUpdates.stream().map(msg -> (EntityPartialUpdateRequest) msg).collect(Collectors.groupingBy(EntityPartialUpdateRequest::getAttributeValue)).entrySet().stream().map(entry -> {
final String flowPathQualifiedName = entry.getKey();
final Map<String, Referenceable> distinctInputs;
final Map<String, Referenceable> distinctOutputs;
final String flowPathGuid;
try {
// Fetch existing nifi_flow_path and its inputs/ouputs.
metrics.flowPathSearched++;
final AtlasEntity.AtlasEntityWithExtInfo flowPathExt = atlasClient.searchEntityDef(new AtlasObjectId(TYPE_NIFI_FLOW_PATH, ATTR_QUALIFIED_NAME, flowPathQualifiedName));
final AtlasEntity flowPathEntity = flowPathExt.getEntity();
flowPathGuid = flowPathEntity.getGuid();
distinctInputs = toReferenceables(flowPathEntity.getAttribute(ATTR_INPUTS), metrics);
distinctOutputs = toReferenceables(flowPathEntity.getAttribute(ATTR_OUTPUTS), metrics);
} catch (AtlasServiceException e) {
if (ClientResponse.Status.NOT_FOUND.equals(e.getStatus())) {
logger.debug("nifi_flow_path was not found for qualifiedName {}", flowPathQualifiedName);
} else {
logger.warn("Failed to retrieve nifi_flow_path with qualifiedName {} due to {}", flowPathQualifiedName, e, e);
}
return null;
}
// Merge all inputs and outputs for this nifi_flow_path.
for (EntityPartialUpdateRequest msg : entry.getValue()) {
fromReferenceable(msg.getEntity().get(ATTR_INPUTS), metrics).entrySet().stream().filter(ref -> !distinctInputs.containsKey(ref.getKey())).forEach(ref -> distinctInputs.put(ref.getKey(), ref.getValue()));
fromReferenceable(msg.getEntity().get(ATTR_OUTPUTS), metrics).entrySet().stream().filter(ref -> !distinctOutputs.containsKey(ref.getKey())).forEach(ref -> distinctOutputs.put(ref.getKey(), ref.getValue()));
}
// Consolidate messages into one.
final Referenceable flowPathRef = new Referenceable(flowPathGuid, TYPE_NIFI_FLOW_PATH, null);
// NOTE: distinctInputs.values() returns HashMap$Values, which causes following error. To avoid that, wrap with ArrayList:
// org.json4s.package$MappingException: Can't find ScalaSig for class org.apache.atlas.typesystem.Referenceable
flowPathRef.set(ATTR_INPUTS, new ArrayList<>(distinctInputs.values()));
flowPathRef.set(ATTR_OUTPUTS, new ArrayList<>(distinctOutputs.values()));
return new EntityPartialUpdateRequest(NIFI_USER, TYPE_NIFI_FLOW_PATH, ATTR_QUALIFIED_NAME, flowPathQualifiedName, flowPathRef);
}).filter(Objects::nonNull).collect(Collectors.toList());
metrics.dedupedPartialNiFiFlowPathUpdates = deduplicatedMessages.size();
notifyEntities(deduplicatedMessages);
} finally {
metrics.log("Committed");
messages.clear();
}
}
use of org.apache.nifi.atlas.NiFiTypes.ATTR_QUALIFIED_NAME in project nifi by apache.
the class CompleteFlowPathLineage method createCompleteFlowPath.
/**
* Create a new FlowPath from a LineagePath. FlowPaths created by this method will have a hash in its qualified name.
*
* <p>This method processes parents first to generate a hash, as parent LineagePath hashes contribute child hash
* in order to distinguish FlowPaths based on the complete path for a given FlowFile.
* For example, even if two lineagePaths have identical componentIds/inputs/outputs,
* if those parents have different inputs, those should be treated as different paths.</p>
*
* @param nifiFlow A reference to current NiFiFlow
* @param lineagePath LineagePath from which NiFiFlowPath and DataSet refs are created and added to the {@code createdFlowPaths}.
* @param createdFlowPaths A list to buffer created NiFiFlowPaths,
* in order to defer sending notification to Kafka until all parent FlowPath get analyzed.
*/
private void createCompleteFlowPath(NiFiFlow nifiFlow, LineagePath lineagePath, List<Tuple<NiFiFlowPath, DataSetRefs>> createdFlowPaths) {
final List<ProvenanceEventRecord> events = lineagePath.getEvents();
Collections.reverse(events);
final List<String> componentIds = events.stream().map(ProvenanceEventRecord::getComponentId).collect(Collectors.toList());
final String firstComponentId = events.get(0).getComponentId();
final DataSetRefs dataSetRefs = lineagePath.getRefs();
// Process parents first.
Referenceable queueBetweenParent = null;
if (!lineagePath.getParents().isEmpty()) {
// Add queue between this lineage path and parent.
queueBetweenParent = new Referenceable(TYPE_NIFI_QUEUE);
// The first event knows why this lineage has parents, e.g. FORK or JOIN.
final String firstEventType = events.get(0).getEventType().name();
queueBetweenParent.set(ATTR_NAME, firstEventType);
dataSetRefs.addInput(queueBetweenParent);
for (LineagePath parent : lineagePath.getParents()) {
parent.getRefs().addOutput(queueBetweenParent);
createCompleteFlowPath(nifiFlow, parent, createdFlowPaths);
}
}
// Create a variant path.
// Calculate a hash from component_ids and input and output resource ids.
final Stream<String> ioIds = Stream.concat(dataSetRefs.getInputs().stream(), dataSetRefs.getOutputs().stream()).map(ref -> toTypedQualifiedName(ref.getTypeName(), toStr(ref.get(ATTR_QUALIFIED_NAME))));
final Stream<String> parentHashes = lineagePath.getParents().stream().map(p -> String.valueOf(p.getLineagePathHash()));
final CRC32 crc32 = new CRC32();
crc32.update(Stream.of(componentIds.stream(), ioIds, parentHashes).reduce(Stream::concat).orElseGet(Stream::empty).sorted().distinct().collect(Collectors.joining(",")).getBytes(StandardCharsets.UTF_8));
final long hash = crc32.getValue();
lineagePath.setLineagePathHash(hash);
final NiFiFlowPath flowPath = new NiFiFlowPath(firstComponentId, hash);
// E.g, FF1 and FF2 read from dirA were merged, vs FF3 and FF4 read from dirB were merged then passed here, these two should be different queue.
if (queueBetweenParent != null) {
queueBetweenParent.set(ATTR_QUALIFIED_NAME, toQualifiedName(nifiFlow.getClusterName(), firstComponentId + "::" + hash));
}
// If the same components emitted multiple provenance events consecutively, merge it to come up with a simpler name.
String previousComponentId = null;
List<ProvenanceEventRecord> uniqueEventsForName = new ArrayList<>();
for (ProvenanceEventRecord event : events) {
if (!event.getComponentId().equals(previousComponentId)) {
uniqueEventsForName.add(event);
}
previousComponentId = event.getComponentId();
}
final String pathName = uniqueEventsForName.stream().map(event -> nifiFlow.getProcessComponentName(event.getComponentId(), event::getComponentType)).collect(Collectors.joining(", "));
flowPath.setName(pathName);
final NiFiFlowPath staticFlowPath = nifiFlow.findPath(firstComponentId);
flowPath.setGroupId(staticFlowPath != null ? staticFlowPath.getGroupId() : nifiFlow.getRootProcessGroupId());
// To defer send notification until entire lineagePath analysis gets finished, just add the instance into a buffer.
createdFlowPaths.add(new Tuple<>(flowPath, dataSetRefs));
}
Aggregations