use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class DataSetServiceTest method preview_multi_sheet_with_a_sheet_name.
@Test
public void preview_multi_sheet_with_a_sheet_name() throws Exception {
String dataSetId = createXlsDataSet(this.getClass().getResourceAsStream("../Talend_Desk-Tableau_de_Bord-011214.xls"));
String json = given().contentType(JSON).get("/datasets/{id}/preview?sheetName=Leads", dataSetId).asString();
DataSet dataSet = mapper.readerFor(DataSet.class).readValue(json);
Assertions.assertThat(dataSet.getMetadata().getRowMetadata().getColumns()).isNotNull().isNotEmpty().hasSize(21);
json = given().contentType(JSON).get("/datasets/{id}/preview?sheetName=Tableau de bord", dataSetId).asString();
dataSet = mapper.readerFor(DataSet.class).readValue(json);
Assertions.assertThat(dataSet.getMetadata().getRowMetadata().getColumns()).isNotNull().isNotEmpty().hasSize(10);
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class TransformationService method executeDiffOnSample.
private void executeDiffOnSample(final PreviewParameters previewParameters, final OutputStream output) {
final TransformationMetadataCacheKey metadataKey = //
cacheKeyGenerator.generateMetadataKey(//
previewParameters.getPreparationId(), //
Step.ROOT_STEP.id(), //
previewParameters.getSourceType());
final ContentCacheKey contentKey = //
cacheKeyGenerator.generateContentKey(//
previewParameters.getDataSetId(), //
previewParameters.getPreparationId(), //
Step.ROOT_STEP.id(), //
JSON, //
previewParameters.getSourceType(), // no filters for preview
"");
try (//
final InputStream metadata = contentCache.get(metadataKey);
//
final InputStream content = contentCache.get(contentKey);
final JsonParser contentParser = mapper.getFactory().createParser(new InputStreamReader(content, UTF_8))) {
// build metadata
final RowMetadata rowMetadata = mapper.readerFor(RowMetadata.class).readValue(metadata);
final DataSetMetadata dataSetMetadata = new DataSetMetadata();
dataSetMetadata.setRowMetadata(rowMetadata);
// build dataset
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(contentParser);
dataSet.setMetadata(dataSetMetadata);
// trigger diff
executePreview(//
previewParameters.getNewActions(), //
previewParameters.getBaseActions(), //
previewParameters.getTdpIds(), //
dataSet, //
output);
} catch (final IOException e) {
throw new TDPException(TransformationErrorCodes.UNABLE_TO_PERFORM_PREVIEW, e);
}
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class ApplyPreparationExportStrategy method executeApplyPreparation.
private void executeApplyPreparation(ExportParameters parameters, OutputStream outputStream) {
final String stepId = parameters.getStepId();
final String preparationId = parameters.getPreparationId();
final String formatName = parameters.getExportType();
final Preparation preparation = getPreparation(preparationId);
final String dataSetId = parameters.getDatasetId();
final ExportFormat format = getFormat(parameters.getExportType());
// dataset content must be retrieved as the technical user because it might not be shared
boolean technicianIdentityReleased = false;
securityProxy.asTechnicalUser();
// get the dataset content (in an auto-closable block to make sure it is properly closed)
final boolean fullContent = parameters.getFrom() == ExportParameters.SourceType.FILTER;
final DataSetGet dataSetGet = applicationContext.getBean(DataSetGet.class, dataSetId, fullContent, true);
try (final InputStream datasetContent = dataSetGet.execute();
final JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(datasetContent, UTF_8))) {
// release the technical user identity
securityProxy.releaseIdentity();
technicianIdentityReleased = true;
// head is not allowed as step id
final String version = getCleanStepId(preparation, stepId);
// Create dataset
final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
// get the actions to apply (no preparation ==> dataset export ==> no actions)
final String actions = getActions(preparationId, version);
// create tee to broadcast to cache + service output
final TransformationCacheKey key = //
cacheKeyGenerator.generateContentKey(//
dataSetId, //
preparationId, //
version, //
formatName, //
parameters.getFrom(), //
parameters.getArguments(), //
parameters.getFilter());
LOGGER.debug("Cache key: " + key.getKey());
LOGGER.debug("Cache key details: " + key.toString());
try (final TeeOutputStream tee = new TeeOutputStream(outputStream, contentCache.put(key, ContentCache.TimeToLive.DEFAULT))) {
final Configuration.Builder configurationBuilder = //
Configuration.builder().args(//
parameters.getArguments()).outFilter(//
rm -> filterService.build(parameters.getFilter(), rm)).sourceType(parameters.getFrom()).format(//
format.getName()).actions(//
actions).preparation(//
getPreparation(preparationId)).stepId(//
version).volume(//
SMALL).output(//
tee).limit(this.limit);
// no need for statistics if it's not JSON output
if (!Objects.equals(format.getName(), JSON)) {
configurationBuilder.globalStatistics(false);
}
final Configuration configuration = configurationBuilder.build();
factory.get(configuration).buildExecutable(dataSet, configuration).execute();
tee.flush();
} catch (Throwable e) {
// NOSONAR
LOGGER.debug("evicting cache {}", key.getKey());
contentCache.evict(key);
throw e;
}
} catch (TDPException e) {
throw e;
} catch (Exception e) {
throw new TDPException(TransformationErrorCodes.UNABLE_TO_TRANSFORM_DATASET, e);
} finally {
if (!technicianIdentityReleased) {
securityProxy.releaseIdentity();
}
}
}
use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.
the class PipelineTransformer method buildExecutable.
@Override
public ExecutableTransformer buildExecutable(DataSet input, Configuration configuration) {
final RowMetadata rowMetadata = input.getMetadata().getRowMetadata();
// prepare the fallback row metadata
RowMetadata fallBackRowMetadata = transformationRowMetadataUtils.getMatchingEmptyRowMetadata(rowMetadata);
final TransformerWriter writer = writerRegistrationService.getWriter(configuration.formatId(), configuration.output(), configuration.getArguments());
final ConfiguredCacheWriter metadataWriter = new ConfiguredCacheWriter(contentCache, DEFAULT);
final TransformationMetadataCacheKey metadataKey = cacheKeyGenerator.generateMetadataKey(configuration.getPreparationId(), configuration.stepId(), configuration.getSourceType());
final PreparationMessage preparation = configuration.getPreparation();
// function that from a step gives the rowMetadata associated to the previous/parent step
final Function<Step, RowMetadata> previousStepRowMetadataSupplier = s -> //
Optional.ofNullable(s.getParent()).map(//
id -> preparationUpdater.get(id)).orElse(null);
final Pipeline pipeline = //
Pipeline.Builder.builder().withAnalyzerService(//
analyzerService).withActionRegistry(//
actionRegistry).withPreparation(//
preparation).withActions(//
actionParser.parse(configuration.getActions())).withInitialMetadata(rowMetadata, //
configuration.volume() == SMALL).withMonitor(//
configuration.getMonitor()).withFilter(//
configuration.getFilter()).withLimit(//
configuration.getLimit()).withFilterOut(//
configuration.getOutFilter()).withOutput(//
() -> new WriterNode(writer, metadataWriter, metadataKey, fallBackRowMetadata)).withStatisticsAdapter(//
adapter).withStepMetadataSupplier(//
previousStepRowMetadataSupplier).withGlobalStatistics(//
configuration.isGlobalStatistics()).allowMetadataChange(//
configuration.isAllowMetadataChange()).build();
// wrap this transformer into an executable transformer
return new ExecutableTransformer() {
@Override
public void execute() {
try {
LOGGER.debug("Before transformation: {}", pipeline);
pipeline.execute(input);
} finally {
LOGGER.debug("After transformation: {}", pipeline);
}
if (preparation != null) {
final UpdatedStepVisitor visitor = new UpdatedStepVisitor(preparationUpdater);
pipeline.accept(visitor);
}
}
@Override
public void signal(Signal signal) {
pipeline.signal(signal);
}
};
}
Aggregations