Search in sources :

Example 1 with InputDatasetFacet

use of io.openlineage.client.OpenLineage.InputDatasetFacet in project OpenLineage by OpenLineage.

the class OpenLineageRunEventBuilder method buildInputDatasets.

private List<OpenLineage.InputDataset> buildInputDatasets(List<Object> nodes) {
    openLineageContext.getQueryExecution().ifPresent(qe -> {
        if (log.isDebugEnabled()) {
            log.debug("Traversing optimized plan {}", qe.optimizedPlan().toJSON());
            log.debug("Physical plan executed {}", qe.executedPlan().toJSON());
        }
    });
    log.info("Visiting query plan {} with input dataset builders {}", openLineageContext.getQueryExecution(), inputDatasetBuilders);
    Function1<LogicalPlan, Collection<InputDataset>> inputVisitor = visitLogicalPlan(PlanUtils.merge(inputDatasetQueryPlanVisitors));
    List<OpenLineage.InputDataset> datasets = Stream.concat(buildDatasets(nodes, inputDatasetBuilders), openLineageContext.getQueryExecution().map(qe -> fromSeq(qe.optimizedPlan().map(inputVisitor)).stream().flatMap(Collection::stream).map(((Class<InputDataset>) InputDataset.class)::cast)).orElse(Stream.empty())).collect(Collectors.toList());
    OpenLineage openLineage = openLineageContext.getOpenLineage();
    if (!datasets.isEmpty()) {
        Map<String, InputDatasetFacet> inputFacetsMap = new HashMap<>();
        nodes.forEach(event -> inputDatasetFacetBuilders.forEach(fn -> fn.accept(event, inputFacetsMap::put)));
        Map<String, DatasetFacets> datasetFacetsMap = new HashMap<>();
        nodes.forEach(event -> inputDatasetFacetBuilders.forEach(fn -> fn.accept(event, inputFacetsMap::put)));
        return datasets.stream().map(ds -> openLineage.newInputDatasetBuilder().name(ds.getName()).namespace(ds.getNamespace()).inputFacets(mergeFacets(inputFacetsMap, ds.getInputFacets(), InputDatasetInputFacets.class)).facets(mergeFacets(datasetFacetsMap, ds.getFacets(), DatasetFacets.class)).build()).collect(Collectors.toList());
    }
    return datasets;
}
Also used : OpenLineageClient(io.openlineage.spark.agent.client.OpenLineageClient) Arrays(java.util.Arrays) InputDataset(io.openlineage.client.OpenLineage.InputDataset) RunFacetsBuilder(io.openlineage.client.OpenLineage.RunFacetsBuilder) RunEventBuilder(io.openlineage.client.OpenLineage.RunEventBuilder) DatasetFacets(io.openlineage.client.OpenLineage.DatasetFacets) OutputDataset(io.openlineage.client.OpenLineage.OutputDataset) Map(java.util.Map) JobFacet(io.openlineage.client.OpenLineage.JobFacet) SparkListenerSQLExecutionStart(org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart) JsonAnyGetter(com.fasterxml.jackson.annotation.JsonAnyGetter) TypeReference(com.fasterxml.jackson.core.type.TypeReference) JsonDeserializer(com.fasterxml.jackson.databind.JsonDeserializer) Method(java.lang.reflect.Method) RunEvent(io.openlineage.client.OpenLineage.RunEvent) Stage(org.apache.spark.scheduler.Stage) ScalaConversionUtils.toScalaFn(io.openlineage.spark.agent.util.ScalaConversionUtils.toScalaFn) LogicalPlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan) PartialFunction(scala.PartialFunction) NonNull(lombok.NonNull) Collection(java.util.Collection) DatasetFacet(io.openlineage.client.OpenLineage.DatasetFacet) Collectors(java.util.stream.Collectors) IntrospectionException(java.beans.IntrospectionException) OutputDatasetOutputFacets(io.openlineage.client.OpenLineage.OutputDatasetOutputFacets) PlanUtils(io.openlineage.spark.agent.util.PlanUtils) JsonAnySetter(com.fasterxml.jackson.annotation.JsonAnySetter) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) Stream(java.util.stream.Stream) CustomFacetBuilder(io.openlineage.spark.api.CustomFacetBuilder) Type(java.lang.reflect.Type) PropertyDescriptor(java.beans.PropertyDescriptor) Optional(java.util.Optional) RDD(org.apache.spark.rdd.RDD) RunFacet(io.openlineage.client.OpenLineage.RunFacet) JobFailed(org.apache.spark.scheduler.JobFailed) OutputDatasetFacet(io.openlineage.client.OpenLineage.OutputDatasetFacet) ParentRunFacet(io.openlineage.client.OpenLineage.ParentRunFacet) Function1(scala.Function1) HashMap(java.util.HashMap) InputDatasetInputFacets(io.openlineage.client.OpenLineage.InputDatasetInputFacets) ScalaConversionUtils.fromSeq(io.openlineage.spark.agent.util.ScalaConversionUtils.fromSeq) ArrayList(java.util.ArrayList) Introspector(java.beans.Introspector) RunFacets(io.openlineage.client.OpenLineage.RunFacets) DeserializationProblemHandler(com.fasterxml.jackson.databind.deser.DeserializationProblemHandler) JobBuilder(io.openlineage.client.OpenLineage.JobBuilder) BeanInfo(java.beans.BeanInfo) SparkListenerJobEnd(org.apache.spark.scheduler.SparkListenerJobEnd) SparkListenerSQLExecutionEnd(org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd) DeserializationContext(com.fasterxml.jackson.databind.DeserializationContext) JsonParser(com.fasterxml.jackson.core.JsonParser) OpenLineageContext(io.openlineage.spark.api.OpenLineageContext) ActiveJob(org.apache.spark.scheduler.ActiveJob) SparkListenerJobStart(org.apache.spark.scheduler.SparkListenerJobStart) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) InputDatasetFacet(io.openlineage.client.OpenLineage.InputDatasetFacet) ScalaConversionUtils(io.openlineage.spark.agent.util.ScalaConversionUtils) SparkListenerStageSubmitted(org.apache.spark.scheduler.SparkListenerStageSubmitted) OpenLineageEventHandlerFactory(io.openlineage.spark.api.OpenLineageEventHandlerFactory) ParameterizedType(java.lang.reflect.ParameterizedType) AllArgsConstructor(lombok.AllArgsConstructor) SparkListenerStageCompleted(org.apache.spark.scheduler.SparkListenerStageCompleted) OpenLineage(io.openlineage.client.OpenLineage) Collections(java.util.Collections) HashMap(java.util.HashMap) DatasetFacets(io.openlineage.client.OpenLineage.DatasetFacets) InputDatasetFacet(io.openlineage.client.OpenLineage.InputDatasetFacet) InputDataset(io.openlineage.client.OpenLineage.InputDataset) OpenLineage(io.openlineage.client.OpenLineage) Collection(java.util.Collection) LogicalPlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan) InputDatasetInputFacets(io.openlineage.client.OpenLineage.InputDatasetInputFacets)

Aggregations

JsonAnyGetter (com.fasterxml.jackson.annotation.JsonAnyGetter)1 JsonAnySetter (com.fasterxml.jackson.annotation.JsonAnySetter)1 JsonParser (com.fasterxml.jackson.core.JsonParser)1 TypeReference (com.fasterxml.jackson.core.type.TypeReference)1 DeserializationContext (com.fasterxml.jackson.databind.DeserializationContext)1 JsonDeserializer (com.fasterxml.jackson.databind.JsonDeserializer)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 DeserializationProblemHandler (com.fasterxml.jackson.databind.deser.DeserializationProblemHandler)1 OpenLineage (io.openlineage.client.OpenLineage)1 DatasetFacet (io.openlineage.client.OpenLineage.DatasetFacet)1 DatasetFacets (io.openlineage.client.OpenLineage.DatasetFacets)1 InputDataset (io.openlineage.client.OpenLineage.InputDataset)1 InputDatasetFacet (io.openlineage.client.OpenLineage.InputDatasetFacet)1 InputDatasetInputFacets (io.openlineage.client.OpenLineage.InputDatasetInputFacets)1 JobBuilder (io.openlineage.client.OpenLineage.JobBuilder)1 JobFacet (io.openlineage.client.OpenLineage.JobFacet)1 OutputDataset (io.openlineage.client.OpenLineage.OutputDataset)1 OutputDatasetFacet (io.openlineage.client.OpenLineage.OutputDatasetFacet)1 OutputDatasetOutputFacets (io.openlineage.client.OpenLineage.OutputDatasetOutputFacets)1 ParentRunFacet (io.openlineage.client.OpenLineage.ParentRunFacet)1