use of org.apache.spark.sql.execution.QueryExecution in project OpenLineage by OpenLineage.
the class AbstractQueryPlanDatasetBuilderTest method createContext.
private OpenLineageContext createContext(SparkSession session, OpenLineage openLineage) {
QueryExecution queryExecution = session.createDataFrame(Arrays.asList(new GenericRow(new Object[] { 1, "hello" })), new StructType(new StructField[] { new StructField("count", IntegerType$.MODULE$, false, new Metadata(new scala.collection.immutable.HashMap<>())), new StructField("word", StringType$.MODULE$, false, new Metadata(new scala.collection.immutable.HashMap<>())) })).queryExecution();
OpenLineageContext context = OpenLineageContext.builder().sparkContext(SparkContext.getOrCreate(new SparkConf().setAppName("test").setMaster("local"))).openLineage(openLineage).queryExecution(queryExecution).build();
return context;
}
use of org.apache.spark.sql.execution.QueryExecution in project kylo by Teradata.
the class AbstractHiveDataSetProviderTest method createDataFrameWriter.
/**
* Creates a {@code DataFrameWriter} that creates tables using the specified answer.
*/
@Nonnull
private DataFrameWriter createDataFrameWriter(@Nonnull final Answer<Void> executePlanAnswer) {
final DataFrame df = Mockito.mock(DataFrame.class);
Mockito.when(df.sqlContext()).thenReturn(sqlContext);
final Catalog catalog = Mockito.mock(Catalog.class);
Mockito.when(catalog.tableExists(Mockito.any(TableIdentifier.class))).thenReturn(false);
Mockito.when(sqlContext.catalog()).thenReturn(catalog);
final QueryExecution queryExecution = Mockito.mock(QueryExecution.class);
Mockito.when(sqlContext.executePlan(Mockito.any(LogicalPlan.class))).then(new Answer<QueryExecution>() {
@Override
public QueryExecution answer(InvocationOnMock invocation) throws Throwable {
executePlanAnswer.answer(invocation);
return queryExecution;
}
});
return new DataFrameWriter(df);
}
use of org.apache.spark.sql.execution.QueryExecution in project jpmml-sparkml by jpmml.
the class DatasetUtil method createAnalyzedLogicalPlan.
public static LogicalPlan createAnalyzedLogicalPlan(SparkSession sparkSession, StructType schema, String statement) {
String tableName = "sql2pmml_" + DatasetUtil.ID.getAndIncrement();
statement = statement.replace("__THIS__", tableName);
Dataset<Row> dataset = sparkSession.createDataFrame(Collections.emptyList(), schema);
dataset.createOrReplaceTempView(tableName);
try {
QueryExecution queryExecution = sparkSession.sql(statement).queryExecution();
return queryExecution.analyzed();
} finally {
Catalog catalog = sparkSession.catalog();
catalog.dropTempView(tableName);
}
}
use of org.apache.spark.sql.execution.QueryExecution in project OpenLineage by OpenLineage.
the class OpenLineageSparkListenerTest method testSqlEventWithJobEventEmitsOnce.
@Test
public void testSqlEventWithJobEventEmitsOnce() {
SparkSession sparkSession = mock(SparkSession.class);
SparkContext sparkContext = mock(SparkContext.class);
EventEmitter emitter = mock(EventEmitter.class);
QueryExecution qe = mock(QueryExecution.class);
LogicalPlan query = UnresolvedRelation$.MODULE$.apply(TableIdentifier.apply("tableName"));
SparkPlan plan = mock(SparkPlan.class);
when(sparkSession.sparkContext()).thenReturn(sparkContext);
when(sparkContext.appName()).thenReturn("appName");
when(qe.optimizedPlan()).thenReturn(new InsertIntoHadoopFsRelationCommand(new Path("file:///tmp/dir"), null, false, Seq$.MODULE$.empty(), Option.empty(), null, Map$.MODULE$.empty(), query, SaveMode.Overwrite, Option.empty(), Option.empty(), Seq$.MODULE$.<String>empty()));
when(qe.executedPlan()).thenReturn(plan);
when(plan.sparkContext()).thenReturn(sparkContext);
when(plan.nodeName()).thenReturn("execute");
OpenLineageContext olContext = OpenLineageContext.builder().sparkSession(Optional.of(sparkSession)).sparkContext(sparkSession.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).queryExecution(qe).build();
olContext.getOutputDatasetQueryPlanVisitors().add(new InsertIntoHadoopFsRelationVisitor(olContext));
ExecutionContext executionContext = new StaticExecutionContextFactory(emitter).createSparkSQLExecutionContext(1L, emitter, qe, olContext);
executionContext.start(new SparkListenerSQLExecutionStart(1L, "", "", "", new SparkPlanInfo("name", "string", Seq$.MODULE$.empty(), Map$.MODULE$.empty(), Seq$.MODULE$.empty()), 1L));
executionContext.start(new SparkListenerJobStart(0, 2L, Seq$.MODULE$.<StageInfo>empty(), new Properties()));
ArgumentCaptor<OpenLineage.RunEvent> lineageEvent = ArgumentCaptor.forClass(OpenLineage.RunEvent.class);
verify(emitter, times(2)).emit(lineageEvent.capture());
}
use of org.apache.spark.sql.execution.QueryExecution in project OpenLineage by OpenLineage.
the class ContextFactory method createSparkSQLExecutionContext.
public ExecutionContext createSparkSQLExecutionContext(long executionId) {
QueryExecution queryExecution = SQLExecution.getQueryExecution(executionId);
SparkSession sparkSession = queryExecution.sparkSession();
OpenLineageContext olContext = OpenLineageContext.builder().sparkSession(Optional.of(sparkSession)).sparkContext(sparkSession.sparkContext()).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).queryExecution(queryExecution).build();
OpenLineageRunEventBuilder runEventBuilder = new OpenLineageRunEventBuilder(olContext, handlerFactory);
return new SparkSQLExecutionContext(executionId, openLineageEventEmitter, olContext, runEventBuilder);
}
Aggregations