Search in sources :

Example 1 with QueryExecution

use of org.apache.spark.sql.execution.QueryExecution in project OpenLineage by OpenLineage.

the class AbstractQueryPlanDatasetBuilderTest method createContext.

private OpenLineageContext createContext(SparkSession session, OpenLineage openLineage) {
    QueryExecution queryExecution = session.createDataFrame(Arrays.asList(new GenericRow(new Object[] { 1, "hello" })), new StructType(new StructField[] { new StructField("count", IntegerType$.MODULE$, false, new Metadata(new scala.collection.immutable.HashMap<>())), new StructField("word", StringType$.MODULE$, false, new Metadata(new scala.collection.immutable.HashMap<>())) })).queryExecution();
    OpenLineageContext context = OpenLineageContext.builder().sparkContext(SparkContext.getOrCreate(new SparkConf().setAppName("test").setMaster("local"))).openLineage(openLineage).queryExecution(queryExecution).build();
    return context;
}
Also used : GenericRow(org.apache.spark.sql.catalyst.expressions.GenericRow) StructField(org.apache.spark.sql.types.StructField) StructType(org.apache.spark.sql.types.StructType) Metadata(org.apache.spark.sql.types.Metadata) SparkConf(org.apache.spark.SparkConf) QueryExecution(org.apache.spark.sql.execution.QueryExecution)

Example 2 with QueryExecution

use of org.apache.spark.sql.execution.QueryExecution in project kylo by Teradata.

the class AbstractHiveDataSetProviderTest method createDataFrameWriter.

/**
 * Creates a {@code DataFrameWriter} that creates tables using the specified answer.
 */
@Nonnull
private DataFrameWriter createDataFrameWriter(@Nonnull final Answer<Void> executePlanAnswer) {
    final DataFrame df = Mockito.mock(DataFrame.class);
    Mockito.when(df.sqlContext()).thenReturn(sqlContext);
    final Catalog catalog = Mockito.mock(Catalog.class);
    Mockito.when(catalog.tableExists(Mockito.any(TableIdentifier.class))).thenReturn(false);
    Mockito.when(sqlContext.catalog()).thenReturn(catalog);
    final QueryExecution queryExecution = Mockito.mock(QueryExecution.class);
    Mockito.when(sqlContext.executePlan(Mockito.any(LogicalPlan.class))).then(new Answer<QueryExecution>() {

        @Override
        public QueryExecution answer(InvocationOnMock invocation) throws Throwable {
            executePlanAnswer.answer(invocation);
            return queryExecution;
        }
    });
    return new DataFrameWriter(df);
}
Also used : TableIdentifier(org.apache.spark.sql.catalyst.TableIdentifier) InvocationOnMock(org.mockito.invocation.InvocationOnMock) DataFrameWriter(org.apache.spark.sql.DataFrameWriter) LogicalPlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan) DataFrame(org.apache.spark.sql.DataFrame) Catalog(org.apache.spark.sql.catalyst.analysis.Catalog) QueryExecution(org.apache.spark.sql.execution.QueryExecution) Nonnull(javax.annotation.Nonnull)

Example 3 with QueryExecution

use of org.apache.spark.sql.execution.QueryExecution in project jpmml-sparkml by jpmml.

the class DatasetUtil method createAnalyzedLogicalPlan.

public static LogicalPlan createAnalyzedLogicalPlan(SparkSession sparkSession, StructType schema, String statement) {
    String tableName = "sql2pmml_" + DatasetUtil.ID.getAndIncrement();
    statement = statement.replace("__THIS__", tableName);
    Dataset<Row> dataset = sparkSession.createDataFrame(Collections.emptyList(), schema);
    dataset.createOrReplaceTempView(tableName);
    try {
        QueryExecution queryExecution = sparkSession.sql(statement).queryExecution();
        return queryExecution.analyzed();
    } finally {
        Catalog catalog = sparkSession.catalog();
        catalog.dropTempView(tableName);
    }
}
Also used : Row(org.apache.spark.sql.Row) QueryExecution(org.apache.spark.sql.execution.QueryExecution) Catalog(org.apache.spark.sql.catalog.Catalog)

Example 4 with QueryExecution

use of org.apache.spark.sql.execution.QueryExecution in project OpenLineage by OpenLineage.

the class OpenLineageSparkListenerTest method testSqlEventWithJobEventEmitsOnce.

@Test
public void testSqlEventWithJobEventEmitsOnce() {
    SparkSession sparkSession = mock(SparkSession.class);
    SparkContext sparkContext = mock(SparkContext.class);
    EventEmitter emitter = mock(EventEmitter.class);
    QueryExecution qe = mock(QueryExecution.class);
    LogicalPlan query = UnresolvedRelation$.MODULE$.apply(TableIdentifier.apply("tableName"));
    SparkPlan plan = mock(SparkPlan.class);
    when(sparkSession.sparkContext()).thenReturn(sparkContext);
    when(sparkContext.appName()).thenReturn("appName");
    when(qe.optimizedPlan()).thenReturn(new InsertIntoHadoopFsRelationCommand(new Path("file:///tmp/dir"), null, false, Seq$.MODULE$.empty(), Option.empty(), null, Map$.MODULE$.empty(), query, SaveMode.Overwrite, Option.empty(), Option.empty(), Seq$.MODULE$.<String>empty()));
    when(qe.executedPlan()).thenReturn(plan);
    when(plan.sparkContext()).thenReturn(sparkContext);
    when(plan.nodeName()).thenReturn("execute");
    OpenLineageContext olContext = OpenLineageContext.builder().sparkSession(Optional.of(sparkSession)).sparkContext(sparkSession.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).queryExecution(qe).build();
    olContext.getOutputDatasetQueryPlanVisitors().add(new InsertIntoHadoopFsRelationVisitor(olContext));
    ExecutionContext executionContext = new StaticExecutionContextFactory(emitter).createSparkSQLExecutionContext(1L, emitter, qe, olContext);
    executionContext.start(new SparkListenerSQLExecutionStart(1L, "", "", "", new SparkPlanInfo("name", "string", Seq$.MODULE$.empty(), Map$.MODULE$.empty(), Seq$.MODULE$.empty()), 1L));
    executionContext.start(new SparkListenerJobStart(0, 2L, Seq$.MODULE$.<StageInfo>empty(), new Properties()));
    ArgumentCaptor<OpenLineage.RunEvent> lineageEvent = ArgumentCaptor.forClass(OpenLineage.RunEvent.class);
    verify(emitter, times(2)).emit(lineageEvent.capture());
}
Also used : Path(org.apache.hadoop.fs.Path) SparkSession(org.apache.spark.sql.SparkSession) SparkPlan(org.apache.spark.sql.execution.SparkPlan) SparkListenerJobStart(org.apache.spark.scheduler.SparkListenerJobStart) StageInfo(org.apache.spark.scheduler.StageInfo) StaticExecutionContextFactory(io.openlineage.spark.agent.lifecycle.StaticExecutionContextFactory) InsertIntoHadoopFsRelationCommand(org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand) Properties(java.util.Properties) QueryExecution(org.apache.spark.sql.execution.QueryExecution) SparkListenerSQLExecutionStart(org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart) SparkContext(org.apache.spark.SparkContext) ExecutionContext(io.openlineage.spark.agent.lifecycle.ExecutionContext) SparkPlanInfo(org.apache.spark.sql.execution.SparkPlanInfo) OpenLineage(io.openlineage.client.OpenLineage) LogicalPlan(org.apache.spark.sql.catalyst.plans.logical.LogicalPlan) OpenLineageContext(io.openlineage.spark.api.OpenLineageContext) InsertIntoHadoopFsRelationVisitor(io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor) Test(org.junit.jupiter.api.Test)

Example 5 with QueryExecution

use of org.apache.spark.sql.execution.QueryExecution in project OpenLineage by OpenLineage.

the class ContextFactory method createSparkSQLExecutionContext.

public ExecutionContext createSparkSQLExecutionContext(long executionId) {
    QueryExecution queryExecution = SQLExecution.getQueryExecution(executionId);
    SparkSession sparkSession = queryExecution.sparkSession();
    OpenLineageContext olContext = OpenLineageContext.builder().sparkSession(Optional.of(sparkSession)).sparkContext(sparkSession.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).queryExecution(queryExecution).build();
    OpenLineageRunEventBuilder runEventBuilder = new OpenLineageRunEventBuilder(olContext, handlerFactory);
    return new SparkSQLExecutionContext(executionId, openLineageEventEmitter, olContext, runEventBuilder);
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) OpenLineage(io.openlineage.client.OpenLineage) OpenLineageContext(io.openlineage.spark.api.OpenLineageContext) QueryExecution(org.apache.spark.sql.execution.QueryExecution)

Aggregations

QueryExecution (org.apache.spark.sql.execution.QueryExecution)6 OpenLineage (io.openlineage.client.OpenLineage)3 OpenLineageContext (io.openlineage.spark.api.OpenLineageContext)3 SparkListenerJobStart (org.apache.spark.scheduler.SparkListenerJobStart)2 SparkSession (org.apache.spark.sql.SparkSession)2 LogicalPlan (org.apache.spark.sql.catalyst.plans.logical.LogicalPlan)2 StructField (org.apache.spark.sql.types.StructField)2 StructType (org.apache.spark.sql.types.StructType)2 OutputDataset (io.openlineage.client.OpenLineage.OutputDataset)1 ExecutionContext (io.openlineage.spark.agent.lifecycle.ExecutionContext)1 StaticExecutionContextFactory (io.openlineage.spark.agent.lifecycle.StaticExecutionContextFactory)1 InsertIntoHadoopFsRelationVisitor (io.openlineage.spark.agent.lifecycle.plan.InsertIntoHadoopFsRelationVisitor)1 Properties (java.util.Properties)1 Nonnull (javax.annotation.Nonnull)1 Path (org.apache.hadoop.fs.Path)1 SparkConf (org.apache.spark.SparkConf)1 SparkContext (org.apache.spark.SparkContext)1 StageInfo (org.apache.spark.scheduler.StageInfo)1 DataFrame (org.apache.spark.sql.DataFrame)1 DataFrameWriter (org.apache.spark.sql.DataFrameWriter)1