Search in sources :

Example 26 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class OpenLineageSparkListener method errorRunFacet.

private static OpenLineage.RunFacets errorRunFacet(Exception e, OpenLineage ol) {
    OpenLineage.RunFacet errorFacet = ol.newRunFacet();
    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    e.printStackTrace(new PrintWriter(buffer, true));
    errorFacet.getAdditionalProperties().put("exception", buffer.toString());
    OpenLineage.RunFacetsBuilder runFacetsBuilder = ol.newRunFacetsBuilder();
    runFacetsBuilder.put("lineage.error", errorFacet);
    return runFacetsBuilder.build();
}
Also used : OpenLineage(io.openlineage.client.OpenLineage) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) PrintWriter(java.io.PrintWriter)

Example 27 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class RddExecutionContext method end.

@Override
public void end(SparkListenerJobEnd jobEnd) {
    OpenLineage ol = new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI);
    OpenLineage.RunEvent event = ol.newRunEventBuilder().eventTime(toZonedTime(jobEnd.time())).eventType(getEventType(jobEnd.jobResult())).inputs(buildInputs(inputs)).outputs(buildOutputs(outputs)).run(ol.newRunBuilder().runId(runId).facets(buildRunFacets(buildJobErrorFacet(jobEnd.jobResult()))).build()).job(buildJob(jobEnd.jobId())).build();
    log.debug("Posting event for end {}: {}", jobEnd, event);
    sparkContext.emit(event);
}
Also used : OpenLineage(io.openlineage.client.OpenLineage)

Example 28 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class ContextFactory method createSparkSQLExecutionContext.

public ExecutionContext createSparkSQLExecutionContext(long executionId) {
    QueryExecution queryExecution = SQLExecution.getQueryExecution(executionId);
    SparkSession sparkSession = queryExecution.sparkSession();
    OpenLineageContext olContext = OpenLineageContext.builder().sparkSession(Optional.of(sparkSession)).sparkContext(sparkSession.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).queryExecution(queryExecution).build();
    OpenLineageRunEventBuilder runEventBuilder = new OpenLineageRunEventBuilder(olContext, handlerFactory);
    return new SparkSQLExecutionContext(executionId, openLineageEventEmitter, olContext, runEventBuilder);
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) OpenLineage(io.openlineage.client.OpenLineage) OpenLineageContext(io.openlineage.spark.api.OpenLineageContext) QueryExecution(org.apache.spark.sql.execution.QueryExecution)

Example 29 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class StaticExecutionContextFactory method createRddExecutionContext.

@Override
public ExecutionContext createRddExecutionContext(int jobId) {
    RddExecutionContext rdd = new RddExecutionContext(OpenLineageContext.builder().sparkContext(SparkContext.getOrCreate()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).build(), jobId, openLineageEventEmitter) {

        @Override
        public void start(SparkListenerJobStart jobStart) {
            try {
                boolean acquired = semaphore.tryAcquire(1, TimeUnit.SECONDS);
                if (!acquired) {
                    throw new RuntimeException("Timeout acquiring permit");
                }
            } catch (InterruptedException e) {
                throw new RuntimeException("Unable to acquire semaphore", e);
            }
            super.start(jobStart);
        }

        @Override
        public void end(SparkListenerJobEnd jobEnd) {
            super.end(jobEnd);
            semaphore.release();
        }

        @Override
        protected ZonedDateTime toZonedTime(long time) {
            return getZonedTime();
        }

        @Override
        protected URI getDatasetUri(URI pathUri) {
            return URI.create("gs://bucket/data.txt");
        }
    };
    return rdd;
}
Also used : SparkListenerJobStart(org.apache.spark.scheduler.SparkListenerJobStart) SparkListenerJobEnd(org.apache.spark.scheduler.SparkListenerJobEnd) OpenLineage(io.openlineage.client.OpenLineage) URI(java.net.URI)

Example 30 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class DataSourceV2RelationDatasetBuilderTest method testIsApplied.

@ParameterizedTest
@MethodSource("provideBuilders")
public void testIsApplied(AbstractQueryPlanDatasetBuilder builder, DataSourceV2Relation relation, OpenLineageContext context, DatasetFactory factory, OpenLineage openLineage) {
    OpenLineage.DatasetFacetsBuilder datasetFacetsBuilder = mock(OpenLineage.DatasetFacetsBuilder.class);
    List<OpenLineage.InputDataset> datasets = mock(List.class);
    when(openLineage.newDatasetFacetsBuilder()).thenReturn(datasetFacetsBuilder);
    when(context.getOpenLineage()).thenReturn(openLineage);
    try (MockedStatic planUtils3MockedStatic = mockStatic(PlanUtils3.class)) {
        try (MockedStatic facetUtilsMockedStatic = mockStatic(DatasetVersionDatasetFacetUtils.class)) {
            when(PlanUtils3.fromDataSourceV2Relation(factory, context, relation, datasetFacetsBuilder)).thenReturn(datasets);
            assertEquals(datasets, builder.apply(relation));
            facetUtilsMockedStatic.verify(() -> DatasetVersionDatasetFacetUtils.includeDatasetVersion(context, datasetFacetsBuilder, relation), times(1));
        }
    }
}
Also used : MockedStatic(org.mockito.MockedStatic) OpenLineage(io.openlineage.client.OpenLineage) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Aggregations

OpenLineage (io.openlineage.client.OpenLineage)38 Test (org.junit.jupiter.api.Test)23 SparkListenerJobEnd (org.apache.spark.scheduler.SparkListenerJobEnd)12 SparkListenerJobStart (org.apache.spark.scheduler.SparkListenerJobStart)9 SparkListenerSQLExecutionEnd (org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd)9 InputDataset (io.openlineage.client.OpenLineage.InputDataset)7 OpenLineageContext (io.openlineage.spark.api.OpenLineageContext)7 LogicalRelation (org.apache.spark.sql.execution.datasources.LogicalRelation)7 SparkListenerSQLExecutionStart (org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart)7 OutputDataset (io.openlineage.client.OpenLineage.OutputDataset)6 HashMap (java.util.HashMap)6 SparkSession (org.apache.spark.sql.SparkSession)6 AttributeReference (org.apache.spark.sql.catalyst.expressions.AttributeReference)6 RunFacet (io.openlineage.client.OpenLineage.RunFacet)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 RunEvent (io.openlineage.client.OpenLineage.RunEvent)4 SparkListenerStageCompleted (org.apache.spark.scheduler.SparkListenerStageCompleted)4 JsonAnyGetter (com.fasterxml.jackson.annotation.JsonAnyGetter)3 JsonAnySetter (com.fasterxml.jackson.annotation.JsonAnySetter)3 JsonParser (com.fasterxml.jackson.core.JsonParser)3