use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.
the class OutputStatisticsOutputDatasetFacetBuilderTest method testBuild.
@Test
public void testBuild() {
OutputStatisticsOutputDatasetFacetBuilder builder = new OutputStatisticsOutputDatasetFacetBuilder(OpenLineageContext.builder().openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).sparkContext(sparkContext).build());
JobMetricsHolder.getInstance().addJobStages(1, Collections.singleton(1));
TaskMetrics taskMetrics = new TaskMetrics();
taskMetrics.outputMetrics().setBytesWritten(10L);
taskMetrics.outputMetrics().setRecordsWritten(100L);
JobMetricsHolder.getInstance().addMetrics(1, taskMetrics);
Map<String, OutputDatasetFacet> facetsMap = new HashMap<>();
builder.build(new SparkListenerJobEnd(1, 1L, JobSucceeded$.MODULE$), facetsMap::put);
assertThat(facetsMap).hasEntrySatisfying("outputStatistics", facet -> assertThat(facet).hasFieldOrPropertyWithValue("rowCount", 100L).hasFieldOrPropertyWithValue("size", 10L));
}
use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.
the class SparkVersionFacetBuilderTest method testIsDefinedForSparkListenerEvents.
@Test
public void testIsDefinedForSparkListenerEvents() {
SparkVersionFacetBuilder builder = new SparkVersionFacetBuilder(OpenLineageContext.builder().sparkContext(sparkContext).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).build());
assertThat(builder.isDefinedAt(new SparkListenerSQLExecutionEnd(1, 1L))).isTrue();
assertThat(builder.isDefinedAt(new SparkListenerSQLExecutionStart(1L, "abc", "abc", "abc", null, 1L))).isTrue();
assertThat(builder.isDefinedAt(new SparkListenerJobStart(1, 1L, Seq$.MODULE$.empty(), new Properties()))).isTrue();
assertThat(builder.isDefinedAt(new SparkListenerJobEnd(1, 1L, JobSucceeded$.MODULE$))).isTrue();
assertThat(builder.isDefinedAt(new SparkListenerStageSubmitted(null, new Properties()))).isTrue();
assertThat(builder.isDefinedAt(new SparkListenerStageCompleted(null))).isTrue();
}
use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.
the class InternalEventHandlerFactoryTest method setup.
@BeforeAll
public static void setup() {
sparkContext = SparkContext.getOrCreate(new SparkConf().setAppName("InternalEventHandlerFactoryTest").setMaster("local"));
context = OpenLineageContext.builder().sparkContext(sparkContext).openLineage(new OpenLineage(OpenLineageClient.OPEN_LINEAGE_CLIENT_URI)).build();
}
use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.
the class OpenLineageRunEventBuilder method populateRun.
private RunEvent populateRun(Optional<ParentRunFacet> parentRunFacet, RunEventBuilder runEventBuilder, JobBuilder jobBuilder, List<Object> nodes) {
OpenLineage openLineage = openLineageContext.getOpenLineage();
RunFacetsBuilder runFacetsBuilder = openLineage.newRunFacetsBuilder();
parentRunFacet.ifPresent(runFacetsBuilder::parent);
OpenLineage.JobFacets jobFacets = buildFacets(nodes, jobFacetBuilders, openLineage.newJobFacetsBuilder().build());
List<InputDataset> inputDatasets = buildInputDatasets(nodes);
List<OutputDataset> outputDatasets = buildOutputDatasets(nodes);
openLineageContext.getQueryExecution().flatMap(qe -> unknownEntryFacetListener.build(qe.optimizedPlan())).ifPresent(facet -> runFacetsBuilder.put("spark_unknown", facet));
RunFacets runFacets = buildFacets(nodes, runFacetBuilders, runFacetsBuilder.build());
OpenLineage.RunBuilder runBuilder = openLineage.newRunBuilder().runId(openLineageContext.getRunUuid()).facets(runFacets);
return runEventBuilder.run(runBuilder.build()).job(jobBuilder.facets(jobFacets).build()).inputs(inputDatasets).outputs(outputDatasets).build();
}
use of io.openlineage.client.OpenLineage in project OpenLineage by OpenLineage.
the class OpenLineageRunEventBuilder method buildOutputDatasets.
private List<OpenLineage.OutputDataset> buildOutputDatasets(List<Object> nodes) {
log.info("Visiting query plan {} with output dataset builders {}", openLineageContext.getQueryExecution(), outputDatasetBuilders);
Function1<LogicalPlan, Collection<OutputDataset>> visitor = visitLogicalPlan(PlanUtils.merge(outputDatasetQueryPlanVisitors));
List<OutputDataset> datasets = Stream.concat(buildDatasets(nodes, outputDatasetBuilders), openLineageContext.getQueryExecution().map(qe -> visitor.apply(qe.optimizedPlan())).map(Collection::stream).orElse(Stream.empty())).collect(Collectors.toList());
OpenLineage openLineage = openLineageContext.getOpenLineage();
if (!datasets.isEmpty()) {
Map<String, OutputDatasetFacet> outputFacetsMap = new HashMap<>();
nodes.forEach(event -> outputDatasetFacetBuilders.forEach(fn -> fn.accept(event, outputFacetsMap::put)));
Map<String, DatasetFacet> datasetFacetsMap = new HashMap<>();
nodes.forEach(event -> datasetFacetBuilders.forEach(fn -> fn.accept(event, datasetFacetsMap::put)));
return datasets.stream().map(ds -> openLineage.newOutputDatasetBuilder().name(ds.getName()).namespace(ds.getNamespace()).outputFacets(mergeFacets(outputFacetsMap, ds.getOutputFacets(), OutputDatasetOutputFacets.class)).facets(mergeFacets(datasetFacetsMap, ds.getFacets(), DatasetFacets.class)).build()).collect(Collectors.toList());
}
return datasets;
}
Aggregations