use of org.talend.dataprep.api.preparation.Preparation in project data-prep by Talend.
the class TransformationService method executeMetadata.
@RequestMapping(value = "/apply/preparation/{preparationId}/{stepId}/metadata", method = GET)
@ApiOperation(value = "Run the transformation given the provided export parameters", notes = "This operation transforms the dataset or preparation using parameters in export parameters.")
@VolumeMetered
@AsyncOperation(//
conditionalClass = GetPrepMetadataAsyncCondition.class, //
resultUrlGenerator = PrepMetadataGetContentUrlGenerator.class, executionIdGeneratorClass = PrepMetadataExecutionIdGenerator.class)
public DataSetMetadata executeMetadata(@PathVariable("preparationId") @AsyncParameter String preparationId, @PathVariable("stepId") @AsyncParameter String stepId) {
LOG.debug("getting preparation metadata for #{}, step {}", preparationId, stepId);
final Preparation preparation = getPreparation(preparationId);
if (preparation.getSteps().size() > 1) {
String headId = "head".equalsIgnoreCase(stepId) ? preparation.getHeadId() : stepId;
final TransformationMetadataCacheKey cacheKey = cacheKeyGenerator.generateMetadataKey(preparationId, headId, HEAD);
// No metadata in cache, recompute it
if (!contentCache.has(cacheKey)) {
try {
LOG.debug("Metadata not available for preparation '{}' at step '{}'", preparationId, headId);
ExportParameters parameters = new ExportParameters();
parameters.setPreparationId(preparationId);
parameters.setExportType("JSON");
parameters.setStepId(headId);
parameters.setFrom(HEAD);
// we regenerate cache
parameters = exportParametersUtil.populateFromPreparationExportParameter(parameters);
preparationExportStrategy.performPreparation(parameters, new NullOutputStream());
} catch (Exception e) {
throw new TDPException(TransformationErrorCodes.METADATA_NOT_FOUND, e);
}
}
// }
if (contentCache.has(cacheKey)) {
try (InputStream stream = contentCache.get(cacheKey)) {
return mapper.readerFor(DataSetMetadata.class).readValue(stream);
} catch (IOException e) {
throw new TDPException(CommonErrorCodes.UNEXPECTED_EXCEPTION, e);
}
}
} else {
LOG.debug("No step in preparation '{}', falls back to get dataset metadata (id: {})", preparationId, preparation.getDataSetId());
DataSetGetMetadata getMetadata = context.getBean(DataSetGetMetadata.class, preparation.getDataSetId());
return getMetadata.execute();
}
return null;
}
use of org.talend.dataprep.api.preparation.Preparation in project data-prep by Talend.
the class TransformationService method getPreparationColumnSemanticCategories.
/**
* Return the semantic types for a given preparation / column.
*
* @param preparationId the preparation id.
* @param columnId the column id.
* @param stepId the step id (optional, if not specified, it's 'head')
* @return the semantic types for a given preparation / column.
*/
@RequestMapping(value = "/preparations/{preparationId}/columns/{columnId}/types", method = GET)
@ApiOperation(value = "list the types of the wanted column", notes = "This list can be used by user to change the column type.")
@Timed
@PublicAPI
public List<SemanticDomain> getPreparationColumnSemanticCategories(@ApiParam(value = "The preparation id") @PathVariable String preparationId, @ApiParam(value = "The column id") @PathVariable String columnId, @ApiParam(value = "The preparation version") @RequestParam(defaultValue = "head") String stepId) {
LOG.debug("listing preparation semantic categories for preparation #{} column #{}@{}", preparationId, columnId, stepId);
// get the preparation
final Preparation preparation = getPreparation(preparationId);
// get the step (in case of 'head', the real step id must be found)
final String version = //
StringUtils.equals("head", stepId) ? preparation.getSteps().get(preparation.getSteps().size() - 1).getId() : stepId;
/*
* OK, this one is a bit tricky so pay attention.
*
* To be able to get the semantic types, the analyzer service needs to run on the result of the preparation.
*
* The result must be found in the cache, so if the preparation is not cached, the preparation is run so that
* it gets cached.
*
* Then, the analyzer service just gets the data from the cache. That's it.
*/
// generate the cache keys for both metadata & content
final ContentCacheKey metadataKey = cacheKeyGenerator.metadataBuilder().preparationId(preparationId).stepId(version).sourceType(HEAD).build();
final ContentCacheKey contentKey = cacheKeyGenerator.contentBuilder().datasetId(preparation.getDataSetId()).preparationId(preparationId).stepId(//
version).format(JSON).sourceType(//
HEAD).build();
// if the preparation is not cached, let's compute it to have some cache
if (!contentCache.has(metadataKey) || !contentCache.has(contentKey)) {
addPreparationInCache(preparation, stepId);
}
// run the analyzer service on the cached content
try (final InputStream metadataCache = contentCache.get(metadataKey);
final InputStream contentCache = this.contentCache.get(contentKey)) {
final DataSetMetadata metadata = mapper.readerFor(DataSetMetadata.class).readValue(metadataCache);
final List<SemanticDomain> semanticDomains = getSemanticDomains(metadata, columnId, contentCache);
LOG.debug("found {} for preparation #{}, column #{}", semanticDomains, preparationId, columnId);
return semanticDomains;
} catch (IOException e) {
throw new TDPException(UNEXPECTED_EXCEPTION, e);
}
}
use of org.talend.dataprep.api.preparation.Preparation in project data-prep by Talend.
the class ApplyPreparationExportStrategy method executeApplyPreparation.
private void executeApplyPreparation(ExportParameters parameters, OutputStream outputStream) {
final String stepId = parameters.getStepId();
final String preparationId = parameters.getPreparationId();
final String formatName = parameters.getExportType();
final Preparation preparation = getPreparation(preparationId);
final String dataSetId = parameters.getDatasetId();
final ExportFormat format = getFormat(parameters.getExportType());
// dataset content must be retrieved as the technical user because it might not be shared
boolean technicianIdentityReleased = false;
securityProxy.asTechnicalUser();
// get the dataset content (in an auto-closable block to make sure it is properly closed)
final boolean fullContent = parameters.getFrom() == ExportParameters.SourceType.FILTER;
final DataSetGet dataSetGet = applicationContext.getBean(DataSetGet.class, dataSetId, fullContent, true);
try (final InputStream datasetContent = dataSetGet.execute();
final JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(datasetContent, UTF_8))) {
// release the technical user identity
securityProxy.releaseIdentity();
technicianIdentityReleased = true;
// head is not allowed as step id
final String version = getCleanStepId(preparation, stepId);
// Create dataset
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
// get the actions to apply (no preparation ==> dataset export ==> no actions)
final String actions = getActions(preparationId, version);
// create tee to broadcast to cache + service output
final TransformationCacheKey key = //
cacheKeyGenerator.generateContentKey(//
dataSetId, //
preparationId, //
version, //
formatName, //
parameters.getFrom(), //
parameters.getArguments(), //
parameters.getFilter());
LOGGER.debug("Cache key: " + key.getKey());
LOGGER.debug("Cache key details: " + key.toString());
try (final TeeOutputStream tee = new TeeOutputStream(outputStream, contentCache.put(key, ContentCache.TimeToLive.DEFAULT))) {
final Configuration.Builder configurationBuilder = //
Configuration.builder().args(//
parameters.getArguments()).outFilter(//
rm -> filterService.build(parameters.getFilter(), rm)).sourceType(parameters.getFrom()).format(//
format.getName()).actions(//
actions).preparation(//
getPreparation(preparationId)).stepId(//
version).volume(//
SMALL).output(//
tee).limit(this.limit);
// no need for statistics if it's not JSON output
if (!Objects.equals(format.getName(), JSON)) {
configurationBuilder.globalStatistics(false);
}
final Configuration configuration = configurationBuilder.build();
factory.get(configuration).buildExecutable(dataSet, configuration).execute();
tee.flush();
} catch (Throwable e) {
// NOSONAR
LOGGER.debug("evicting cache {}", key.getKey());
contentCache.evict(key);
throw e;
}
} catch (TDPException e) {
throw e;
} catch (Exception e) {
throw new TDPException(TransformationErrorCodes.UNABLE_TO_TRANSFORM_DATASET, e);
} finally {
if (!technicianIdentityReleased) {
securityProxy.releaseIdentity();
}
}
}
use of org.talend.dataprep.api.preparation.Preparation in project data-prep by Talend.
the class BasePreparationTest method createPreparationFromService.
/**
* Create a new preparation via the PreparationService.
*
* @param datasetId the dataset id related to this preparation.
* @param name preparation name.
* @param numberOfColumns the wanted number of columns in the preparation row metadata.
* @return The preparation id
*/
protected String createPreparationFromService(final String datasetId, final String name, int numberOfColumns) {
final Preparation preparation = new Preparation(UUID.randomUUID().toString(), datasetId, Step.ROOT_STEP.id(), versionService.version().getVersionId());
preparation.setName(name);
preparation.setCreationDate(0);
RowMetadata rowMetadata = new RowMetadata();
for (int i = 0; i < numberOfColumns; i++) {
rowMetadata.addColumn(new ColumnMetadata());
}
preparation.setRowMetadata(rowMetadata);
repository.add(preparation);
return preparation.id();
}
use of org.talend.dataprep.api.preparation.Preparation in project data-prep by Talend.
the class FolderServiceTest method childrenShouldContainsPreparationsCount.
@Test
public void childrenShouldContainsPreparationsCount() throws Exception {
// given
createFolder(home.getId(), "foo");
final Folder fooFolder = getFolder(home.getId(), "foo");
long expectedNbPreparations = 12;
for (int i = 0; i < expectedNbPreparations; i++) {
Preparation preparation = new Preparation();
preparation.setName("prep_" + i);
preparation.setDataSetId("1234");
preparation.setRowMetadata(new RowMetadata());
clientTest.createPreparation(preparation, fooFolder.getId());
}
// when
final List<Folder> folders = getFolderChildren(home.getId());
// then
assertThat(folders.size(), is(1));
assertThat(folders.get(0).getNbPreparations(), is(expectedNbPreparations));
}
Aggregations