Search in sources :

Example 1 with ContentMetadataColumn

use of org.talend.dataprep.qa.dto.ContentMetadataColumn in project data-prep by Talend.

the class PreparationStep method thePreparationShouldHaveThefollowingQualityBar.

@Then("^The preparation \"(.*)\" should have the following quality bar characteristics on the column number \"(.*)\":$")
public void thePreparationShouldHaveThefollowingQualityBar(String preparationName, String columnNumber, DataTable dataTable) throws Exception {
    Response response = api.getPreparationContent(context.getPreparationId(suffixName(preparationName)), VERSION_HEAD, HEAD_ID, StringUtils.EMPTY);
    response.then().statusCode(OK.value());
    DatasetContent datasetContent = response.as(DatasetContent.class);
    final Map<String, String> parameters = dataTable.asMap(String.class, String.class);
    Integer validExpected = Integer.parseInt(parameters.get(VALID_CELL));
    Integer invalidExpected = Integer.parseInt(parameters.get(INVALID_CELL));
    Integer emptyExpected = Integer.parseInt(parameters.get(EMPTY_CELL));
    ContentMetadataColumn columnMetadata = datasetContent.metadata.columns.get(Integer.parseInt(columnNumber));
    assertEquals(validExpected, columnMetadata.quality.get(VALID_CELL));
    assertEquals(invalidExpected, columnMetadata.quality.get(INVALID_CELL));
    assertEquals(emptyExpected, columnMetadata.quality.get(EMPTY_CELL));
}
Also used : Response(com.jayway.restassured.response.Response) ContentMetadataColumn(org.talend.dataprep.qa.dto.ContentMetadataColumn) DatasetContent(org.talend.dataprep.qa.dto.DatasetContent) Then(cucumber.api.java.en.Then)

Example 2 with ContentMetadataColumn

use of org.talend.dataprep.qa.dto.ContentMetadataColumn in project data-prep by Talend.

the class PreparationStep method thePreparationShouldHaveThefollowingTypeOnThefollowingColumn.

@Then("^The preparation \"(.*)\" should have the following type \"(.*)\" on the following column \"(.*)\"$")
public void thePreparationShouldHaveThefollowingTypeOnThefollowingColumn(String preparationName, String columnType, String columnNumber) throws Exception {
    Response response = api.getPreparationContent(context.getPreparationId(suffixName(preparationName)), VERSION_HEAD, HEAD_ID, StringUtils.EMPTY);
    response.then().statusCode(OK.value());
    DatasetContent datasetContent = response.as(DatasetContent.class);
    ContentMetadataColumn columnMetadata = datasetContent.metadata.columns.get(Integer.parseInt(columnNumber));
    assertEquals(columnType, columnMetadata.type);
}
Also used : Response(com.jayway.restassured.response.Response) ContentMetadataColumn(org.talend.dataprep.qa.dto.ContentMetadataColumn) DatasetContent(org.talend.dataprep.qa.dto.DatasetContent) Then(cucumber.api.java.en.Then)

Example 3 with ContentMetadataColumn

use of org.talend.dataprep.qa.dto.ContentMetadataColumn in project data-prep by Talend.

the class DataPrepStep method checkQualityPerColumn.

protected void checkQualityPerColumn(List<ContentMetadataColumn> columns, String expectedQualityFilename) throws Exception {
    if (expectedQualityFilename == null) {
        return;
    }
    InputStream expectedQualityFileStream = DataPrepStep.class.getResourceAsStream(expectedQualityFilename);
    List<ContentMetadataColumn> expectedQualityPerColumn = objectMapper.readValue(expectedQualityFileStream, DatasetContent.class).metadata.columns;
    Assert.assertEquals(expectedQualityPerColumn.size(), columns.size());
    Collections.sort(columns);
    Collections.sort(expectedQualityPerColumn);
    for (int i = 0; i < expectedQualityPerColumn.size(); i++) {
        ContentMetadataColumn expectedColumn = expectedQualityPerColumn.get(i);
        ContentMetadataColumn column = columns.get(i);
        Assert.assertEquals(expectedColumn.id, column.id);
        Assert.assertEquals(expectedColumn.name, column.name);
        Assert.assertEquals(expectedColumn.type, column.type);
        Assert.assertEquals(expectedColumn.domain, column.domain);
        Map<String, Integer> expectedQuality = expectedColumn.quality;
        Statistics expectedStatistics = expectedColumn.statistics;
        Map<String, Integer> quality = column.quality;
        Assert.assertEquals("The valid records count " + expectedQuality.get("valid") + "is wrong: " + quality.get("valid"), expectedQuality.get("valid"), quality.get("valid"));
        Assert.assertEquals("The valid records count " + expectedQuality.get("empty") + "is wrong: " + quality.get("empty"), expectedQuality.get("empty"), quality.get("empty"));
        Assert.assertEquals("The valid records count " + expectedQuality.get("invalid") + "is wrong: " + quality.get("invalid"), expectedQuality.get("invalid"), quality.get("invalid"));
        Statistics statistics = column.statistics;
        if (expectedStatistics != null && statistics != null) {
            Assert.assertTrue(// 
            "Difference between expected records and actual records:" + CollectionUtils.disjunction(expectedStatistics.patternFrequencyTable, statistics.patternFrequencyTable).toString(), expectedStatistics.patternFrequencyTable.containsAll(statistics.patternFrequencyTable));
            Assert.assertTrue(expectedStatistics.frequencyTable.containsAll(statistics.frequencyTable));
        }
    }
}
Also used : InputStream(java.io.InputStream) ContentMetadataColumn(org.talend.dataprep.qa.dto.ContentMetadataColumn) Statistics(org.talend.dataprep.qa.dto.Statistics)

Example 4 with ContentMetadataColumn

use of org.talend.dataprep.qa.dto.ContentMetadataColumn in project data-prep by Talend.

the class DataPrepStep method getDatasetContent.

/**
 * Returns the dataset content, once all DQ analysis are done and so all fields are up-to-date.
 *
 * @param datasetId the id of the dataset
 * @param tql the TQL filter to apply to the dataset
 * @return the up-to-date dataset content
 */
protected DatasetContent getDatasetContent(String datasetId, String tql) throws Exception {
    AtomicReference<DatasetContent> datasetContentReference = new AtomicReference<>();
    // TODO I guess this wait is useless since we use {DataPrepStep#checkDatasetMetadataStatus} before
    api.waitResponse("Waiting frequency table from dataset metadata of " + datasetId).until(() -> {
        Response response = api.getDataset(datasetId, tql);
        response.then().statusCode(200);
        DatasetContent datasetContent = response.as(DatasetContent.class);
        datasetContentReference.set(datasetContent);
        return // 
        datasetContent.metadata.columns.stream().findFirst().orElse(new ContentMetadataColumn()).statistics.frequencyTable;
    }, is(not(empty())));
    return datasetContentReference.get();
}
Also used : Response(com.jayway.restassured.response.Response) ContentMetadataColumn(org.talend.dataprep.qa.dto.ContentMetadataColumn) DatasetContent(org.talend.dataprep.qa.dto.DatasetContent) AtomicReference(java.util.concurrent.atomic.AtomicReference)

Aggregations

ContentMetadataColumn (org.talend.dataprep.qa.dto.ContentMetadataColumn)4 Response (com.jayway.restassured.response.Response)3 DatasetContent (org.talend.dataprep.qa.dto.DatasetContent)3 Then (cucumber.api.java.en.Then)2 InputStream (java.io.InputStream)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Statistics (org.talend.dataprep.qa.dto.Statistics)1