use of org.talend.dataprep.qa.dto.Statistics in project data-prep by Talend.
the class DataPrepStep method checkQualityPerColumn.
protected void checkQualityPerColumn(List<ContentMetadataColumn> columns, String expectedQualityFilename) throws Exception {
if (expectedQualityFilename == null) {
return;
}
InputStream expectedQualityFileStream = DataPrepStep.class.getResourceAsStream(expectedQualityFilename);
List<ContentMetadataColumn> expectedQualityPerColumn = objectMapper.readValue(expectedQualityFileStream, DatasetContent.class).metadata.columns;
Assert.assertEquals(expectedQualityPerColumn.size(), columns.size());
Collections.sort(columns);
Collections.sort(expectedQualityPerColumn);
for (int i = 0; i < expectedQualityPerColumn.size(); i++) {
ContentMetadataColumn expectedColumn = expectedQualityPerColumn.get(i);
ContentMetadataColumn column = columns.get(i);
Assert.assertEquals(expectedColumn.id, column.id);
Assert.assertEquals(expectedColumn.name, column.name);
Assert.assertEquals(expectedColumn.type, column.type);
Assert.assertEquals(expectedColumn.domain, column.domain);
Map<String, Integer> expectedQuality = expectedColumn.quality;
Statistics expectedStatistics = expectedColumn.statistics;
Map<String, Integer> quality = column.quality;
Assert.assertEquals("The valid records count " + expectedQuality.get("valid") + "is wrong: " + quality.get("valid"), expectedQuality.get("valid"), quality.get("valid"));
Assert.assertEquals("The valid records count " + expectedQuality.get("empty") + "is wrong: " + quality.get("empty"), expectedQuality.get("empty"), quality.get("empty"));
Assert.assertEquals("The valid records count " + expectedQuality.get("invalid") + "is wrong: " + quality.get("invalid"), expectedQuality.get("invalid"), quality.get("invalid"));
Statistics statistics = column.statistics;
if (expectedStatistics != null && statistics != null) {
Assert.assertTrue(//
"Difference between expected records and actual records:" + CollectionUtils.disjunction(expectedStatistics.patternFrequencyTable, statistics.patternFrequencyTable).toString(), expectedStatistics.patternFrequencyTable.containsAll(statistics.patternFrequencyTable));
Assert.assertTrue(expectedStatistics.frequencyTable.containsAll(statistics.frequencyTable));
}
}
}
Aggregations