use of in project cdap by caskdata.
the class LineageAdmin method filterAndAddRelations.
* Filter the relations based on the rollUp flag, if set to true, the method will replace the inner program with
* the workflow using the map and ignore the local datasets relations. The local dataset always ends with the run
* id of the workflow. The set of filtered local datasets is returned
private Set<DatasetId> filterAndAddRelations(boolean rollUpWorkflow, Multimap<RelationKey, Relation> relations, Map<ProgramRunId, ProgramRunId> programWorkflowMap, Set<Relation> relationss) {
Set<DatasetId> localDatasets = new HashSet<>();
for (Relation relation : relationss) {
if (rollUpWorkflow && programWorkflowMap.containsKey(relation.getProgramRunId())) {
ProgramRunId workflowId = programWorkflowMap.get(relation.getProgramRunId());
// skip the relation for local datasets, local datasets always end with the workflow run id
DatasetId data = (DatasetId) relation.getData();
if (data.getDataset().endsWith(workflowId.getRun())) {
relation = new Relation(data, workflowId.getParent(), relation.getAccess(), RunIds.fromString(workflowId.getRun()));
relations.put(new RelationKey(relation), relation);
return localDatasets;
use of in project cdap by caskdata.
the class LineageHTTPHandler method datasetFieldLineageSummary.
* Get the field level lineage about the specified field in one dataset.
* @param field the field name to compute field level lineage
* @param directionStr the direction to compute the field level lineage, can be INCOMING, OUTGOING or BOTH
* @param startStr the start time string, it can be a specific timestamp in milliseconds or a relative time,
* using now and times added to it.
* @param endStr the end time string, it can be a specific timestamp in milliseconds or a relative time,
* using now and times added to it.
public void datasetFieldLineageSummary(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("dataset-id") String datasetId, @PathParam("field-name") String field, @QueryParam("direction") String directionStr, @QueryParam("start") String startStr, @QueryParam("end") String endStr) throws Exception {
accessEnforcer.enforce(new DatasetId(namespaceId, datasetId), authenticationContext.getPrincipal(), StandardPermission.GET);
TimeRange range = parseRange(startStr, endStr);
Constants.FieldLineage.Direction direction = parseDirection(directionStr);
EndPointField endPointField = new EndPointField(EndPoint.of(namespaceId, datasetId), field);
FieldLineageSummary summary = fieldLineageAdmin.getFieldLineage(direction, endPointField, range.getStart(), range.getEnd());
responder.sendJson(HttpResponseStatus.OK, GSON.toJson(summary));
use of in project cdap by caskdata.
the class LineageHTTPHandler method datasetFieldLineageDetails.
* Get the operation details about the specified field in one dataset.
* @param field the field name to compute field operation details
* @param directionStr the direction to compute the field level lineage, can be INCOMING, OUTGOING or BOTH
* @param startStr the start time string, it can be a specific timestamp in milliseconds or a relative time,
* using now and times added to it.
* @param endStr the end time string, it can be a specific timestamp in milliseconds or a relative time,
* using now and times added to it.
public void datasetFieldLineageDetails(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("dataset-id") String datasetId, @PathParam("field-name") String field, @QueryParam("direction") @DefaultValue("both") String directionStr, @QueryParam("start") String startStr, @QueryParam("end") String endStr) throws Exception {
accessEnforcer.enforce(new DatasetId(namespaceId, datasetId), authenticationContext.getPrincipal(), StandardPermission.GET);
TimeRange range = parseRange(startStr, endStr);
Constants.FieldLineage.Direction direction = parseDirection(directionStr);
EndPointField endPointField = new EndPointField(EndPoint.of(namespaceId, datasetId), field);
FieldLineageDetails details = fieldLineageAdmin.getOperationDetails(direction, endPointField, range.getStart(), range.getEnd());
responder.sendJson(HttpResponseStatus.OK, GSON.toJson(details));
use of in project cdap by caskdata.
the class FieldLineageAdmin method convertSummaryToDatasetMap.
private Map<DatasetId, Set<String>> convertSummaryToDatasetMap(Set<EndPointField> summary) {
Map<DatasetId, Set<String>> endPointFields = new HashMap<>();
for (EndPointField endPointField : summary) {
EndPoint endPoint = endPointField.getEndPoint();
// this can be null if the field is not related to any dataset, it can either be generated or dropped
DatasetId datasetId = (endPoint.getNamespace() == null || endPoint.getName() == null) ? null : new DatasetId(endPoint.getNamespace(), endPoint.getName());
Set<String> fields = endPointFields.computeIfAbsent(datasetId, k -> new HashSet<>());
return endPointFields;
use of in project cdap by caskdata.
the class FieldLineageAdmin method getDatasetFieldLineage.
* Get the summary for the specified dataset over a given time range depending on the direction specified.
* The summary will contain all the field level lineage relations about all the fields in a dataset.
* @param direction the direction in which summary need to be computed
* @param endPoint the EndPoint whicn represents the dataset that field level lineage needs to get computed
* @param start start time (inclusive) in milliseconds
* @param end end time (exclusive) in milliseconds
* @return the summary which contains all the field level lineage information about all the fields in a dataset
* @throws IOException if fails to get teh schema of the dataset
public DatasetFieldLineageSummary getDatasetFieldLineage(Constants.FieldLineage.Direction direction, EndPoint endPoint, long start, long end) throws IOException {
Set<String> lineageFields = fieldLineageReader.getFields(endPoint, start, end);
Map<DatasetId, Set<FieldRelation>> incomingRelations = new HashMap<>();
Map<DatasetId, Set<FieldRelation>> outgoingRelations = new HashMap<>();
Map<DatasetId, Integer> fieldCount = new HashMap<>();
for (String field : lineageFields) {
EndPointField endPointField = new EndPointField(endPoint, field);
// compute the incoming field level lineage
if (direction == Constants.FieldLineage.Direction.INCOMING || direction == Constants.FieldLineage.Direction.BOTH) {
Map<DatasetId, Set<String>> incomingSummary = convertSummaryToDatasetMap(fieldLineageReader.getIncomingSummary(endPointField, start, end));
// compute the field count for all incoming datasets
incomingSummary.keySet().forEach(datasetId -> {
fieldCount.computeIfAbsent(datasetId, missingDataset -> missingDataset == null ? 0 : fieldLineageReader.getFields(EndPoint.of(missingDataset.getNamespace(), missingDataset.getDataset()), start, end).size());
// here the field itself will be the destination
computeAndAddRelations(incomingRelations, field, true, incomingSummary);
// compute the outgoing field level lineage
if (direction == Constants.FieldLineage.Direction.OUTGOING || direction == Constants.FieldLineage.Direction.BOTH) {
Map<DatasetId, Set<String>> outgoingSummary = convertSummaryToDatasetMap(fieldLineageReader.getOutgoingSummary(endPointField, start, end));
// compute the field count for all outgoing datasets
outgoingSummary.keySet().forEach(datasetId -> {
fieldCount.computeIfAbsent(datasetId, missingDataset -> missingDataset == null ? 0 : fieldLineageReader.getFields(EndPoint.of(missingDataset.getNamespace(), missingDataset.getDataset()), start, end).size());
// here the field itself will be the source
computeAndAddRelations(outgoingRelations, field, false, outgoingSummary);
Set<String> noLineageFields = getFieldsWithNoFieldLineage(endPoint, lineageFields);
Set<String> allFields = ImmutableSet.<String>builder().addAll(lineageFields).addAll(noLineageFields).build();
return new DatasetFieldLineageSummary(direction, start, end, new DatasetId(endPoint.getNamespace(), endPoint.getName()), allFields, fieldCount, incomingRelations, outgoingRelations);