Search in sources :

Example 36 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class CreateTableLikeCommandVisitorTest method testCreateTableLikeCommand.

@Test
@SneakyThrows
void testCreateTableLikeCommand() {
    CatalogTable sourceCatalogTable = mock(CatalogTable.class);
    when(sparkSession.sparkContext()).thenReturn(mock(SparkContext.class));
    when(sparkSession.sessionState()).thenReturn(sessionState);
    when(sessionState.catalog()).thenReturn(sessionCatalog);
    when(sessionCatalog.getTempViewOrPermanentTableMetadata(sourceTableIdentifier)).thenReturn(sourceCatalogTable);
    when(sessionCatalog.defaultTablePath(targetTableIdentifier)).thenReturn(new URI("/tmp/warehouse/newtable"));
    when(sourceCatalogTable.schema()).thenReturn(schema);
    CreateTableLikeCommandVisitor visitor = new CreateTableLikeCommandVisitor(OpenLineageContext.builder().sparkSession(Optional.of(sparkSession)).sparkContext(sparkSession.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).build());
    CreateTableLikeCommand command = new CreateTableLikeCommand(targetTableIdentifier, sourceTableIdentifier, CatalogStorageFormat.empty(), Option$.MODULE$.empty(), Map$.MODULE$.empty(), false);
    assertThat(visitor.isDefinedAt(command)).isTrue();
    List<OpenLineage.OutputDataset> datasets = visitor.apply(command);
    assertEquals(1, datasets.size());
    OpenLineage.OutputDataset outputDataset = datasets.get(0);
    assertEquals(OpenLineage.LifecycleStateChangeDatasetFacet.LifecycleStateChange.CREATE, outputDataset.getFacets().getLifecycleStateChange().getLifecycleStateChange());
    assertEquals("/tmp/warehouse/newtable", outputDataset.getName());
    assertEquals("file", outputDataset.getNamespace());
}
Also used : SparkContext(org.apache.spark.SparkContext) OpenLineage(io.openlineage.client.OpenLineage) CatalogTable(org.apache.spark.sql.catalyst.catalog.CatalogTable) URI(java.net.URI) CreateTableLikeCommand(org.apache.spark.sql.execution.command.CreateTableLikeCommand) Test(org.junit.jupiter.api.Test) SneakyThrows(lombok.SneakyThrows)

Example 37 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class LogicalRelationDatasetBuilderTest method testApply.

@ParameterizedTest
@ValueSource(strings = { "postgresql://postgreshost:5432/sparkdata", "jdbc:oracle:oci8:@sparkdata", "jdbc:oracle:thin@sparkdata:1521:orcl", "mysql://localhost/sparkdata" })
void testApply(String connectionUri) {
    OpenLineage openLineage = new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI);
    String jdbcUrl = "jdbc:" + connectionUri;
    String sparkTableName = "my_spark_table";
    JDBCRelation relation = new JDBCRelation(new StructType(new StructField[] { new StructField("name", StringType$.MODULE$, false, null) }), new Partition[] {}, new JDBCOptions(jdbcUrl, sparkTableName, Map$.MODULE$.<String, String>newBuilder().$plus$eq(Tuple2.apply("driver", Driver.class.getName())).result()), session);
    QueryExecution qe = mock(QueryExecution.class);
    when(qe.optimizedPlan()).thenReturn(new LogicalRelation(relation, Seq$.MODULE$.<AttributeReference>newBuilder().$plus$eq(new AttributeReference("name", StringType$.MODULE$, false, null, ExprId.apply(1L), Seq$.MODULE$.<String>empty())).result(), Option.empty(), false));
    OpenLineageContext context = OpenLineageContext.builder().sparkContext(mock(SparkContext.class)).openLineage(openLineage).queryExecution(qe).build();
    LogicalRelationDatasetBuilder visitor = new LogicalRelationDatasetBuilder<>(context, DatasetFactory.output(openLineage), false);
    List<OutputDataset> datasets = visitor.apply(new SparkListenerJobStart(1, 1, Seq$.MODULE$.empty(), null));
    assertEquals(1, datasets.size());
    OutputDataset ds = datasets.get(0);
    assertEquals(connectionUri, ds.getNamespace());
    assertEquals(sparkTableName, ds.getName());
    assertEquals(URI.create(connectionUri), ds.getFacets().getDataSource().getUri());
    assertEquals(connectionUri, ds.getFacets().getDataSource().getName());
}
Also used : StructType(org.apache.spark.sql.types.StructType) SparkListenerJobStart(org.apache.spark.scheduler.SparkListenerJobStart) AttributeReference(org.apache.spark.sql.catalyst.expressions.AttributeReference) JDBCRelation(org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation) Driver(org.postgresql.Driver) QueryExecution(org.apache.spark.sql.execution.QueryExecution) LogicalRelation(org.apache.spark.sql.execution.datasources.LogicalRelation) StructField(org.apache.spark.sql.types.StructField) JDBCOptions(org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions) OpenLineage(io.openlineage.client.OpenLineage) OutputDataset(io.openlineage.client.OpenLineage.OutputDataset) OpenLineageContext(io.openlineage.spark.api.OpenLineageContext) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 38 with OpenLineage

use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.

the class SQLDWDatabricksVisitorTest method testSQLDWRelationComplexQuery.

@Test
void testSQLDWRelationComplexQuery() {
    String inputName = "(SELECT * FROM dbo.table1) q";
    String inputJdbcUrl = "jdbc:sqlserver://MYTESTSERVER.database.windows.net:1433;database=MYTESTDB";
    String expectedName = "COMPLEX";
    String expectedNamespace = "sqlserver://MYTESTSERVER.database.windows.net:1433;database=MYTESTDB;";
    // Instantiate a MockSQLDWRelation
    LogicalRelation lr = new LogicalRelation(new MockSqlDWBaseRelation(inputName, inputJdbcUrl), Seq$.MODULE$.<AttributeReference>newBuilder().$plus$eq(new AttributeReference("name", StringType$.MODULE$, false, null, ExprId.apply(1L), Seq$.MODULE$.<String>empty())).result(), Option.empty(), false);
    TestSqlDWDatabricksVisitor visitor = new TestSqlDWDatabricksVisitor(SparkAgentTestExtension.newContext(session), DatasetFactory.output(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)));
    List<OpenLineage.Dataset> datasets = visitor.apply(lr);
    assertEquals(1, datasets.size());
    OpenLineage.Dataset ds = datasets.get(0);
    assertEquals(expectedNamespace, ds.getNamespace());
    assertEquals(expectedName, ds.getName());
}
Also used : LogicalRelation(org.apache.spark.sql.execution.datasources.LogicalRelation) AttributeReference(org.apache.spark.sql.catalyst.expressions.AttributeReference) OpenLineage(io.openlineage.client.OpenLineage) Test(org.junit.jupiter.api.Test)

Aggregations

OpenLineage (io.openlineage.client.OpenLineage)38 Test (org.junit.jupiter.api.Test)23 SparkListenerJobEnd (org.apache.spark.scheduler.SparkListenerJobEnd)12 SparkListenerJobStart (org.apache.spark.scheduler.SparkListenerJobStart)9 SparkListenerSQLExecutionEnd (org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd)9 InputDataset (io.openlineage.client.OpenLineage.InputDataset)7 OpenLineageContext (io.openlineage.spark.api.OpenLineageContext)7 LogicalRelation (org.apache.spark.sql.execution.datasources.LogicalRelation)7 SparkListenerSQLExecutionStart (org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart)7 OutputDataset (io.openlineage.client.OpenLineage.OutputDataset)6 HashMap (java.util.HashMap)6 SparkSession (org.apache.spark.sql.SparkSession)6 AttributeReference (org.apache.spark.sql.catalyst.expressions.AttributeReference)6 RunFacet (io.openlineage.client.OpenLineage.RunFacet)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 RunEvent (io.openlineage.client.OpenLineage.RunEvent)4 SparkListenerStageCompleted (org.apache.spark.scheduler.SparkListenerStageCompleted)4 JsonAnyGetter (com.fasterxml.jackson.annotation.JsonAnyGetter)3 JsonAnySetter (com.fasterxml.jackson.annotation.JsonAnySetter)3 JsonParser (com.fasterxml.jackson.core.JsonParser)3