Search in sources :

Example 1 with Statistics

use of org.talend.dataprep.qa.dto.Statistics in project data-prep by Talend.

the class DataPrepStep method checkQualityPerColumn.

protected void checkQualityPerColumn(List<ContentMetadataColumn> columns, String expectedQualityFilename) throws Exception {
    if (expectedQualityFilename == null) {
        return;
    }
    InputStream expectedQualityFileStream = DataPrepStep.class.getResourceAsStream(expectedQualityFilename);
    List<ContentMetadataColumn> expectedQualityPerColumn = objectMapper.readValue(expectedQualityFileStream, DatasetContent.class).metadata.columns;
    Assert.assertEquals(expectedQualityPerColumn.size(), columns.size());
    Collections.sort(columns);
    Collections.sort(expectedQualityPerColumn);
    for (int i = 0; i < expectedQualityPerColumn.size(); i++) {
        ContentMetadataColumn expectedColumn = expectedQualityPerColumn.get(i);
        ContentMetadataColumn column = columns.get(i);
        Assert.assertEquals(expectedColumn.id, column.id);
        Assert.assertEquals(expectedColumn.name, column.name);
        Assert.assertEquals(expectedColumn.type, column.type);
        Assert.assertEquals(expectedColumn.domain, column.domain);
        Map<String, Integer> expectedQuality = expectedColumn.quality;
        Statistics expectedStatistics = expectedColumn.statistics;
        Map<String, Integer> quality = column.quality;
        Assert.assertEquals("The valid records count " + expectedQuality.get("valid") + "is wrong: " + quality.get("valid"), expectedQuality.get("valid"), quality.get("valid"));
        Assert.assertEquals("The valid records count " + expectedQuality.get("empty") + "is wrong: " + quality.get("empty"), expectedQuality.get("empty"), quality.get("empty"));
        Assert.assertEquals("The valid records count " + expectedQuality.get("invalid") + "is wrong: " + quality.get("invalid"), expectedQuality.get("invalid"), quality.get("invalid"));
        Statistics statistics = column.statistics;
        if (expectedStatistics != null && statistics != null) {
            Assert.assertTrue(// 
            "Difference between expected records and actual records:" + CollectionUtils.disjunction(expectedStatistics.patternFrequencyTable, statistics.patternFrequencyTable).toString(), expectedStatistics.patternFrequencyTable.containsAll(statistics.patternFrequencyTable));
            Assert.assertTrue(expectedStatistics.frequencyTable.containsAll(statistics.frequencyTable));
        }
    }
}
Also used : InputStream(java.io.InputStream) ContentMetadataColumn(org.talend.dataprep.qa.dto.ContentMetadataColumn) Statistics(org.talend.dataprep.qa.dto.Statistics)

Aggregations

InputStream (java.io.InputStream)1 ContentMetadataColumn (org.talend.dataprep.qa.dto.ContentMetadataColumn)1 Statistics (org.talend.dataprep.qa.dto.Statistics)1