use of org.talend.dataprep.qa.dto.ContentMetadataColumn in project data-prep by Talend.
the class PreparationStep method thePreparationShouldHaveThefollowingQualityBar.
@Then("^The preparation \"(.*)\" should have the following quality bar characteristics on the column number \"(.*)\":$")
public void thePreparationShouldHaveThefollowingQualityBar(String preparationName, String columnNumber, DataTable dataTable) throws Exception {
Response response = api.getPreparationContent(context.getPreparationId(suffixName(preparationName)), VERSION_HEAD, HEAD_ID, StringUtils.EMPTY);
response.then().statusCode(OK.value());
DatasetContent datasetContent = response.as(DatasetContent.class);
final Map<String, String> parameters = dataTable.asMap(String.class, String.class);
Integer validExpected = Integer.parseInt(parameters.get(VALID_CELL));
Integer invalidExpected = Integer.parseInt(parameters.get(INVALID_CELL));
Integer emptyExpected = Integer.parseInt(parameters.get(EMPTY_CELL));
ContentMetadataColumn columnMetadata = datasetContent.metadata.columns.get(Integer.parseInt(columnNumber));
assertEquals(validExpected, columnMetadata.quality.get(VALID_CELL));
assertEquals(invalidExpected, columnMetadata.quality.get(INVALID_CELL));
assertEquals(emptyExpected, columnMetadata.quality.get(EMPTY_CELL));
}
use of org.talend.dataprep.qa.dto.ContentMetadataColumn in project data-prep by Talend.
the class PreparationStep method thePreparationShouldHaveThefollowingTypeOnThefollowingColumn.
@Then("^The preparation \"(.*)\" should have the following type \"(.*)\" on the following column \"(.*)\"$")
public void thePreparationShouldHaveThefollowingTypeOnThefollowingColumn(String preparationName, String columnType, String columnNumber) throws Exception {
Response response = api.getPreparationContent(context.getPreparationId(suffixName(preparationName)), VERSION_HEAD, HEAD_ID, StringUtils.EMPTY);
response.then().statusCode(OK.value());
DatasetContent datasetContent = response.as(DatasetContent.class);
ContentMetadataColumn columnMetadata = datasetContent.metadata.columns.get(Integer.parseInt(columnNumber));
assertEquals(columnType, columnMetadata.type);
}
use of org.talend.dataprep.qa.dto.ContentMetadataColumn in project data-prep by Talend.
the class DataPrepStep method checkQualityPerColumn.
protected void checkQualityPerColumn(List<ContentMetadataColumn> columns, String expectedQualityFilename) throws Exception {
if (expectedQualityFilename == null) {
return;
}
InputStream expectedQualityFileStream = DataPrepStep.class.getResourceAsStream(expectedQualityFilename);
List<ContentMetadataColumn> expectedQualityPerColumn = objectMapper.readValue(expectedQualityFileStream, DatasetContent.class).metadata.columns;
Assert.assertEquals(expectedQualityPerColumn.size(), columns.size());
Collections.sort(columns);
Collections.sort(expectedQualityPerColumn);
for (int i = 0; i < expectedQualityPerColumn.size(); i++) {
ContentMetadataColumn expectedColumn = expectedQualityPerColumn.get(i);
ContentMetadataColumn column = columns.get(i);
Assert.assertEquals(expectedColumn.id, column.id);
Assert.assertEquals(expectedColumn.name, column.name);
Assert.assertEquals(expectedColumn.type, column.type);
Assert.assertEquals(expectedColumn.domain, column.domain);
Map<String, Integer> expectedQuality = expectedColumn.quality;
Statistics expectedStatistics = expectedColumn.statistics;
Map<String, Integer> quality = column.quality;
Assert.assertEquals("The valid records count " + expectedQuality.get("valid") + "is wrong: " + quality.get("valid"), expectedQuality.get("valid"), quality.get("valid"));
Assert.assertEquals("The valid records count " + expectedQuality.get("empty") + "is wrong: " + quality.get("empty"), expectedQuality.get("empty"), quality.get("empty"));
Assert.assertEquals("The valid records count " + expectedQuality.get("invalid") + "is wrong: " + quality.get("invalid"), expectedQuality.get("invalid"), quality.get("invalid"));
Statistics statistics = column.statistics;
if (expectedStatistics != null && statistics != null) {
Assert.assertTrue(//
"Difference between expected records and actual records:" + CollectionUtils.disjunction(expectedStatistics.patternFrequencyTable, statistics.patternFrequencyTable).toString(), expectedStatistics.patternFrequencyTable.containsAll(statistics.patternFrequencyTable));
Assert.assertTrue(expectedStatistics.frequencyTable.containsAll(statistics.frequencyTable));
}
}
}
use of org.talend.dataprep.qa.dto.ContentMetadataColumn in project data-prep by Talend.
the class DataPrepStep method getDatasetContent.
/**
* Returns the dataset content, once all DQ analysis are done and so all fields are up-to-date.
*
* @param datasetId the id of the dataset
* @param tql the TQL filter to apply to the dataset
* @return the up-to-date dataset content
*/
protected DatasetContent getDatasetContent(String datasetId, String tql) throws Exception {
AtomicReference<DatasetContent> datasetContentReference = new AtomicReference<>();
// TODO I guess this wait is useless since we use {DataPrepStep#checkDatasetMetadataStatus} before
api.waitResponse("Waiting frequency table from dataset metadata of " + datasetId).until(() -> {
Response response = api.getDataset(datasetId, tql);
response.then().statusCode(200);
DatasetContent datasetContent = response.as(DatasetContent.class);
datasetContentReference.set(datasetContent);
return //
datasetContent.metadata.columns.stream().findFirst().orElse(new ContentMetadataColumn()).statistics.frequencyTable;
}, is(not(empty())));
return datasetContentReference.get();
}
Aggregations