Search in sources :

Example 16 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class DataSourceV2RelationDatasetBuilderTest method provideBuilders.

private static Stream<Arguments> provideBuilders() {
    OpenLineageContext context = mock(OpenLineageContext.class);
    DatasetFactory factory = mock(DatasetFactory.class);
    OpenLineage openLineage = mock(OpenLineage.class);
    return Stream.of(Arguments.of(new DataSourceV2RelationInputDatasetBuilder(context, factory), mock(DataSourceV2Relation.class), context, factory, openLineage), Arguments.of(new DataSourceV2RelationOutputDatasetBuilder(context, factory), mock(DataSourceV2Relation.class), context, factory, openLineage));
}
Also used : OpenLineage(io.openlineage.client.OpenLineage) OpenLineageContext(io.openlineage.spark.api.OpenLineageContext) DatasetFactory(io.openlineage.spark.api.DatasetFactory)

Example 17 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class DataSourceV2ScanRelationInputDatasetBuilderTest method testApply.

@Test
public void testApply() {
    OpenLineage.DatasetFacetsBuilder datasetFacetsBuilder = mock(OpenLineage.DatasetFacetsBuilder.class);
    List<OpenLineage.InputDataset> datasets = mock(List.class);
    DataSourceV2ScanRelation scanRelation = mock(DataSourceV2ScanRelation.class);
    DataSourceV2Relation relation = mock(DataSourceV2Relation.class);
    when(openLineage.newDatasetFacetsBuilder()).thenReturn(datasetFacetsBuilder);
    when(context.getOpenLineage()).thenReturn(openLineage);
    when(scanRelation.relation()).thenReturn(relation);
    try (MockedStatic planUtils3MockedStatic = mockStatic(PlanUtils3.class)) {
        try (MockedStatic facetUtilsMockedStatic = mockStatic(DatasetVersionDatasetFacetUtils.class)) {
            when(PlanUtils3.fromDataSourceV2Relation(factory, context, relation, datasetFacetsBuilder)).thenReturn(datasets);
            assertEquals(datasets, builder.apply(scanRelation));
            facetUtilsMockedStatic.verify(() -> DatasetVersionDatasetFacetUtils.includeDatasetVersion(context, datasetFacetsBuilder, relation), times(1));
        }
    }
}
Also used : DataSourceV2Relation(org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation) MockedStatic(org.mockito.MockedStatic) OpenLineage(io.openlineage.client.OpenLineage) DataSourceV2ScanRelation(org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation) Test(org.junit.jupiter.api.Test)

Example 18 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class OpenLineageRunEventTest method testSerializeRunEvent.

@Test
public void testSerializeRunEvent() throws IOException, URISyntaxException {
    ObjectMapper mapper = OpenLineageClient.createMapper();
    ZonedDateTime dateTime = ZonedDateTime.parse("2021-01-01T00:00:01.000000000+02:00[UTC]");
    OpenLineage ol = new OpenLineage(new URI("https://github.com/OpenLineage/OpenLineage/tree/0.2.3-SNAPSHOT/integration/spark"));
    UUID runId = UUID.fromString("5f24c93c-2ce9-49dc-82e7-95ab4915242f");
    OpenLineage.RunFacets runFacets = ol.newRunFacets(ol.newParentRunFacet(ol.newParentRunFacetRun(runId), ol.newParentRunFacetJob("namespace", "jobName")), null);
    OpenLineage.Run run = ol.newRun(runId, runFacets);
    OpenLineage.DocumentationJobFacet documentationJobFacet = ol.newDocumentationJobFacetBuilder().description("test documentation").build();
    OpenLineage.SourceCodeLocationJobFacet sourceCodeLocationJobFacet = ol.newSourceCodeLocationJobFacetBuilder().branch("branch").path("/path/to/file").repoUrl("https://github.com/apache/spark").type("git").version("v1").url(URI.create("https://github.com/apache/spark")).tag("v1.0.0").build();
    OpenLineage.SQLJobFacet sqlJobFacet = ol.newSQLJobFacet("SELECT * FROM test");
    OpenLineage.JobFacets jobFacets = ol.newJobFacetsBuilder().sourceCodeLocation(sourceCodeLocationJobFacet).sql(sqlJobFacet).documentation(documentationJobFacet).build();
    OpenLineage.Job job = ol.newJob("namespace", "jobName", jobFacets);
    List<OpenLineage.InputDataset> inputs = Arrays.asList(ol.newInputDataset("ins", "input", null, ol.newInputDatasetInputFacetsBuilder().dataQualityMetrics(ol.newDataQualityMetricsInputDatasetFacetBuilder().rowCount(10L).bytes(20L).columnMetrics(ol.newDataQualityMetricsInputDatasetFacetColumnMetricsBuilder().put("mycol", ol.newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalBuilder().count(10D).distinctCount(10L).max(30D).min(5D).nullCount(1L).sum(3000D).quantiles(ol.newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantilesBuilder().put("25", 52D).build()).build()).build()).build()).build()));
    List<OpenLineage.OutputDataset> outputs = Arrays.asList(ol.newOutputDataset("ons", "output", null, ol.newOutputDatasetOutputFacetsBuilder().outputStatistics(ol.newOutputStatisticsOutputDatasetFacet(10L, 20L)).build()));
    OpenLineage.RunEvent runStateUpdate = ol.newRunEvent(OpenLineage.RunEvent.EventType.START, dateTime, run, job, inputs, outputs);
    Map<String, Object> actualJson = mapper.readValue(mapper.writeValueAsString(runStateUpdate), mapTypeReference);
    Path expectedDataPath = Paths.get("src", "test", "resources", "test_data", "serde", "openlineage-event.json");
    Map<String, Object> expectedJson = mapper.readValue(expectedDataPath.toFile(), mapTypeReference);
    assertThat(actualJson).satisfies(new MatchesMapRecursively(expectedJson));
}
Also used : Path(java.nio.file.Path) URI(java.net.URI) ZonedDateTime(java.time.ZonedDateTime) MatchesMapRecursively(io.openlineage.spark.agent.lifecycle.MatchesMapRecursively) OpenLineage(io.openlineage.client.OpenLineage) UUID(java.util.UUID) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.jupiter.api.Test)

Example 19 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class OpenLineageSparkListenerTest method testSqlEventWithJobEventEmitsOnce.

@Test
public void testSqlEventWithJobEventEmitsOnce() {
    SparkSession sparkSession = mock(SparkSession.class);
    SparkContext sparkContext = mock(SparkContext.class);
    EventEmitter emitter = mock(EventEmitter.class);
    QueryExecution qe = mock(QueryExecution.class);
    LogicalPlan query = UnresolvedRelation$.MODULE$.apply(TableIdentifier.apply("tableName"));
    SparkPlan plan = mock(SparkPlan.class);
    when(sparkSession.sparkContext()).thenReturn(sparkContext);
    when(sparkContext.appName()).thenReturn("appName");
    when(qe.optimizedPlan()).thenReturn(new InsertIntoHadoopFsRelationCommand(new Path("file:///tmp/dir"), null, false, Seq$.MODULE$.empty(), Option.empty(), null, Map$.MODULE$.empty(), query, SaveMode.Overwrite, Option.empty(), Option.empty(), Seq$.MODULE$.<String>empty()));
    when(qe.executedPlan()).thenReturn(plan);
    when(plan.sparkContext()).thenReturn(sparkContext);
    when(plan.nodeName()).thenReturn("execute");
    OpenLineageContext olContext = OpenLineageContext.builder().sparkSession(Optional.of(sparkSession)).sparkContext(sparkSession.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).queryExecution(qe).build();
    olContext.getOutputDatasetQueryPlanVisitors().add(new InsertIntoHadoopFsRelationVisitor(olContext));
    ExecutionContext executionContext = new StaticExecutionContextFactory(emitter).createSparkSQLExecutionContext(1L, emitter, qe, olContext);
    executionContext.start(new SparkListenerSQLExecutionStart(1L, "", "", "", new SparkPlanInfo("name", "string", Seq$.MODULE$.empty(), Map$.MODULE$.empty(), Seq$.MODULE$.empty()), 1L));
    executionContext.start(new SparkListenerJobStart(0, 2L, Seq$.MODULE$.<StageInfo>empty(), new Properties()));
    ArgumentCaptor<OpenLineage.RunEvent> lineageEvent = ArgumentCaptor.forClass(OpenLineage.RunEvent.class);
    verify(emitter, times(2)).emit(lineageEvent.capture());
}
Also used : Path(org.apache.hadoop.fs.Path) SparkSession(org.apache.spark.sql.SparkSession) SparkPlan(org.apache.spark.sql.execution.SparkPlan) SparkListenerJobStart(org.apache.spark.scheduler.SparkListenerJobStart) StageInfo(org.apache.spark.scheduler.StageInfo) StaticExecutionContextFactory(io.openlineage.spark.agent.lifecycle.StaticExecutionContextFactory) InsertIntoHadoopFsRelationCommand(org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand) Properties(java.util.Properties) QueryExecution(org.apache.spark.sql.execution.QueryExecution) SparkListenerSQLExecutionStart(org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart) SparkContext(org.apache.spark.SparkContext) ExecutionContext(io.openlineage.spark.agent.lifecycle.ExecutionContext) SparkPlanInfo(org.apache.spark.sql.execution.SparkPlanInfo) OpenLineage(io.openlineage.client.OpenLineage) LogicalPlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan) OpenLineageContext(io.openlineage.spark.api.OpenLineageContext) InsertIntoHadoopFsRelationVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor) Test(org.junit.jupiter.api.Test)

Example 20 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class OutputStatisticsOutputDatasetFacetBuilderTest method testIsDefined.

@Test
public void testIsDefined() {
    OutputStatisticsOutputDatasetFacetBuilder builder = new OutputStatisticsOutputDatasetFacetBuilder(OpenLineageContext.builder().openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).sparkContext(sparkContext).build());
    assertThat(builder.isDefinedAt(new SparkListenerJobEnd(1, 1L, JobSucceeded$.MODULE$))).isTrue();
    assertThat(builder.isDefinedAt(new SparkListenerSQLExecutionEnd(1L, 1L))).isFalse();
}
Also used : SparkListenerSQLExecutionEnd(org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd) SparkListenerJobEnd(org.apache.spark.scheduler.SparkListenerJobEnd) OpenLineage(io.openlineage.client.OpenLineage) Test(org.junit.jupiter.api.Test)

Aggregations

OpenLineage (io.openlineage.client.OpenLineage)38 Test (org.junit.jupiter.api.Test)23 SparkListenerJobEnd (org.apache.spark.scheduler.SparkListenerJobEnd)12 SparkListenerJobStart (org.apache.spark.scheduler.SparkListenerJobStart)9 SparkListenerSQLExecutionEnd (org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd)9 InputDataset (io.openlineage.client.OpenLineage.InputDataset)7 OpenLineageContext (io.openlineage.spark.api.OpenLineageContext)7 LogicalRelation (org.apache.spark.sql.execution.datasources.LogicalRelation)7 SparkListenerSQLExecutionStart (org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart)7 OutputDataset (io.openlineage.client.OpenLineage.OutputDataset)6 HashMap (java.util.HashMap)6 SparkSession (org.apache.spark.sql.SparkSession)6 AttributeReference (org.apache.spark.sql.catalyst.expressions.AttributeReference)6 RunFacet (io.openlineage.client.OpenLineage.RunFacet)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 RunEvent (io.openlineage.client.OpenLineage.RunEvent)4 SparkListenerStageCompleted (org.apache.spark.scheduler.SparkListenerStageCompleted)4 JsonAnyGetter (com.fasterxml.jackson.annotation.JsonAnyGetter)3 JsonAnySetter (com.fasterxml.jackson.annotation.JsonAnySetter)3 JsonParser (com.fasterxml.jackson.core.JsonParser)3