Search in sources :

Example 1 with DataQualityMetricsInputDatasetFacet

use of io.openlineage.client.OpenLineage.DataQualityMetricsInputDatasetFacet in project OpenLineage by OpenLineage.

the class OpenLineageTest method factory.

@Test
public void factory() throws JsonProcessingException {
    ZonedDateTime now = ZonedDateTime.now(ZoneId.of("UTC"));
    URI producer = URI.create("producer");
    OpenLineage ol = new OpenLineage(producer);
    UUID runId = UUID.randomUUID();
    RunFacets runFacets = ol.newRunFacetsBuilder().nominalTime(ol.newNominalTimeRunFacetBuilder().nominalStartTime(now).nominalEndTime(now).build()).build();
    Run run = ol.newRunBuilder().runId(runId).facets(runFacets).build();
    String name = "jobName";
    String namespace = "namespace";
    JobFacets jobFacets = ol.newJobFacetsBuilder().build();
    Job job = ol.newJobBuilder().namespace(namespace).name(name).facets(jobFacets).build();
    List<InputDataset> inputs = Arrays.asList(ol.newInputDatasetBuilder().namespace("ins").name("input").facets(ol.newDatasetFacetsBuilder().version(ol.newDatasetVersionDatasetFacet("input-version")).build()).inputFacets(ol.newInputDatasetInputFacetsBuilder().dataQualityMetrics(ol.newDataQualityMetricsInputDatasetFacetBuilder().rowCount(10L).bytes(20L).columnMetrics(ol.newDataQualityMetricsInputDatasetFacetColumnMetricsBuilder().put("mycol", ol.newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalBuilder().count(10D).distinctCount(10L).max(30D).min(5D).nullCount(1L).sum(3000D).quantiles(ol.newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantilesBuilder().put("25", 52D).build()).build()).build()).build()).build()).build());
    List<OutputDataset> outputs = Arrays.asList(ol.newOutputDatasetBuilder().namespace("ons").name("output").facets(ol.newDatasetFacetsBuilder().version(ol.newDatasetVersionDatasetFacet("output-version")).build()).outputFacets(ol.newOutputDatasetOutputFacetsBuilder().outputStatistics(ol.newOutputStatisticsOutputDatasetFacet(10L, 20L)).build()).build());
    RunEvent runStateUpdate = ol.newRunEventBuilder().eventType(OpenLineage.RunEvent.EventType.START).eventTime(now).run(run).job(job).inputs(inputs).outputs(outputs).build();
    ObjectMapper mapper = new ObjectMapper();
    mapper.registerModule(new JavaTimeModule());
    mapper.setSerializationInclusion(Include.NON_NULL);
    mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS);
    mapper.configure(SerializationFeature.INDENT_OUTPUT, true);
    String json = mapper.writeValueAsString(runStateUpdate);
    {
        RunEvent read = mapper.readValue(json, RunEvent.class);
        assertEquals(producer, read.getProducer());
        assertEquals(runId, read.getRun().getRunId());
        assertEquals(name, read.getJob().getName());
        assertEquals(namespace, read.getJob().getNamespace());
        assertEquals(runStateUpdate.getEventType(), read.getEventType());
        assertEquals(runStateUpdate.getEventTime(), read.getEventTime());
        assertEquals(1, runStateUpdate.getInputs().size());
        InputDataset inputDataset = runStateUpdate.getInputs().get(0);
        assertEquals("ins", inputDataset.getNamespace());
        assertEquals("input", inputDataset.getName());
        assertEquals("input-version", inputDataset.getFacets().getVersion().getDatasetVersion());
        DataQualityMetricsInputDatasetFacet dq = inputDataset.getInputFacets().getDataQualityMetrics();
        assertEquals((Long) 10L, dq.getRowCount());
        assertEquals((Long) 20L, dq.getBytes());
        DataQualityMetricsInputDatasetFacetColumnMetricsAdditional colMetrics = dq.getColumnMetrics().getAdditionalProperties().get("mycol");
        assertEquals((Double) 10D, colMetrics.getCount());
        assertEquals((Long) 10L, colMetrics.getDistinctCount());
        assertEquals((Double) 30D, colMetrics.getMax());
        assertEquals((Double) 5D, colMetrics.getMin());
        assertEquals((Long) 1L, colMetrics.getNullCount());
        assertEquals((Double) 3000D, colMetrics.getSum());
        assertEquals((Double) 52D, colMetrics.getQuantiles().getAdditionalProperties().get("25"));
        assertEquals(1, runStateUpdate.getOutputs().size());
        OutputDataset outputDataset = runStateUpdate.getOutputs().get(0);
        assertEquals("ons", outputDataset.getNamespace());
        assertEquals("output", outputDataset.getName());
        assertEquals("output-version", outputDataset.getFacets().getVersion().getDatasetVersion());
        assertEquals(roundTrip(json), roundTrip(mapper.writeValueAsString(read)));
        assertEquals((Long) 10L, outputDataset.getOutputFacets().getOutputStatistics().getRowCount());
        assertEquals((Long) 20L, outputDataset.getOutputFacets().getOutputStatistics().getSize());
        assertEquals(json, mapper.writeValueAsString(read));
    }
    {
        io.openlineage.server.OpenLineage.RunEvent readServer = mapper.readValue(json, io.openlineage.server.OpenLineage.RunEvent.class);
        assertEquals(producer, readServer.getProducer());
        assertEquals(runId, readServer.getRun().getRunId());
        assertEquals(name, readServer.getJob().getName());
        assertEquals(namespace, readServer.getJob().getNamespace());
        assertEquals(runStateUpdate.getEventType().name(), readServer.getEventType().name());
        assertEquals(runStateUpdate.getEventTime(), readServer.getEventTime());
        assertEquals(json, mapper.writeValueAsString(readServer));
    }
}
Also used : JavaTimeModule(com.fasterxml.jackson.datatype.jsr310.JavaTimeModule) Run(io.openlineage.client.OpenLineage.Run) URI(java.net.URI) ZonedDateTime(java.time.ZonedDateTime) InputDataset(io.openlineage.client.OpenLineage.InputDataset) OutputDataset(io.openlineage.client.OpenLineage.OutputDataset) DataQualityMetricsInputDatasetFacet(io.openlineage.client.OpenLineage.DataQualityMetricsInputDatasetFacet) UUID(java.util.UUID) RunFacets(io.openlineage.client.OpenLineage.RunFacets) JobFacets(io.openlineage.client.OpenLineage.JobFacets) Job(io.openlineage.client.OpenLineage.Job) DataQualityMetricsInputDatasetFacetColumnMetricsAdditional(io.openlineage.client.OpenLineage.DataQualityMetricsInputDatasetFacetColumnMetricsAdditional) RunEvent(io.openlineage.client.OpenLineage.RunEvent) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Aggregations

ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 JavaTimeModule (com.fasterxml.jackson.datatype.jsr310.JavaTimeModule)1 DataQualityMetricsInputDatasetFacet (io.openlineage.client.OpenLineage.DataQualityMetricsInputDatasetFacet)1 DataQualityMetricsInputDatasetFacetColumnMetricsAdditional (io.openlineage.client.OpenLineage.DataQualityMetricsInputDatasetFacetColumnMetricsAdditional)1 InputDataset (io.openlineage.client.OpenLineage.InputDataset)1 Job (io.openlineage.client.OpenLineage.Job)1 JobFacets (io.openlineage.client.OpenLineage.JobFacets)1 OutputDataset (io.openlineage.client.OpenLineage.OutputDataset)1 Run (io.openlineage.client.OpenLineage.Run)1 RunEvent (io.openlineage.client.OpenLineage.RunEvent)1 RunFacets (io.openlineage.client.OpenLineage.RunFacets)1 URI (java.net.URI)1 ZonedDateTime (java.time.ZonedDateTime)1 UUID (java.util.UUID)1 Test (org.junit.Test)1