use of org.talend.dataprep.api.dataset.row.Flag in project data-prep by Talend.
the class TransformationService method getSemanticDomains.
/**
* Return the semantic domains for the given parameters.
*
* @param metadata the dataset metadata.
* @param columnId the column id to analyze.
* @param records the dataset records.
* @return the semantic domains for the given parameters.
* @throws IOException can happen...
*/
private List<SemanticDomain> getSemanticDomains(DataSetMetadata metadata, String columnId, InputStream records) throws IOException {
// copy the column metadata and set the semantic domain forced flag to false to make sure the statistics adapter set all
// available domains
final ColumnMetadata columnMetadata = //
column().copy(//
metadata.getRowMetadata().getById(columnId)).semanticDomainForce(//
false).build();
final Analyzer<Analyzers.Result> analyzer = analyzerService.build(columnMetadata, SEMANTIC);
analyzer.init();
try (final JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(records, UTF_8))) {
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
dataSet.getRecords().map(//
r -> r.get(columnId)).forEach(analyzer::analyze);
analyzer.end();
}
final List<Analyzers.Result> analyzerResult = analyzer.getResult();
statisticsAdapter.adapt(singletonList(columnMetadata), analyzerResult);
return columnMetadata.getSemanticDomains();
}
Aggregations