use of org.talend.dataprep.api.dataset.statistics.SemanticDomain in project data-prep by Talend.
the class TransformationService method getPreparationColumnSemanticCategories.
/**
* Return the semantic types for a given preparation / column.
*
* @param preparationId the preparation id.
* @param columnId the column id.
* @param stepId the step id (optional, if not specified, it's 'head')
* @return the semantic types for a given preparation / column.
*/
@RequestMapping(value = "/preparations/{preparationId}/columns/{columnId}/types", method = GET)
@ApiOperation(value = "list the types of the wanted column", notes = "This list can be used by user to change the column type.")
@Timed
@PublicAPI
public List<SemanticDomain> getPreparationColumnSemanticCategories(@ApiParam(value = "The preparation id") @PathVariable String preparationId, @ApiParam(value = "The column id") @PathVariable String columnId, @ApiParam(value = "The preparation version") @RequestParam(defaultValue = "head") String stepId) {
LOG.debug("listing preparation semantic categories for preparation #{} column #{}@{}", preparationId, columnId, stepId);
// get the preparation
final Preparation preparation = getPreparation(preparationId);
// get the step (in case of 'head', the real step id must be found)
final String version = //
StringUtils.equals("head", stepId) ? preparation.getSteps().get(preparation.getSteps().size() - 1).getId() : stepId;
/*
* OK, this one is a bit tricky so pay attention.
*
* To be able to get the semantic types, the analyzer service needs to run on the result of the preparation.
*
* The result must be found in the cache, so if the preparation is not cached, the preparation is run so that
* it gets cached.
*
* Then, the analyzer service just gets the data from the cache. That's it.
*/
// generate the cache keys for both metadata & content
final ContentCacheKey metadataKey = cacheKeyGenerator.metadataBuilder().preparationId(preparationId).stepId(version).sourceType(HEAD).build();
final ContentCacheKey contentKey = cacheKeyGenerator.contentBuilder().datasetId(preparation.getDataSetId()).preparationId(preparationId).stepId(//
version).format(JSON).sourceType(//
HEAD).build();
// if the preparation is not cached, let's compute it to have some cache
if (!contentCache.has(metadataKey) || !contentCache.has(contentKey)) {
addPreparationInCache(preparation, stepId);
}
// run the analyzer service on the cached content
try (final InputStream metadataCache = contentCache.get(metadataKey);
final InputStream contentCache = this.contentCache.get(contentKey)) {
final DataSetMetadata metadata = mapper.readerFor(DataSetMetadata.class).readValue(metadataCache);
final List<SemanticDomain> semanticDomains = getSemanticDomains(metadata, columnId, contentCache);
LOG.debug("found {} for preparation #{}, column #{}", semanticDomains, preparationId, columnId);
return semanticDomains;
} catch (IOException e) {
throw new TDPException(UNEXPECTED_EXCEPTION, e);
}
}
Aggregations