Search in sources :

Example 36 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class DataSetServiceTest method preview_multi_sheet_with_a_sheet_name.

@Test
public void preview_multi_sheet_with_a_sheet_name() throws Exception {
    String dataSetId = createXlsDataSet(this.getClass().getResourceAsStream("../Talend_Desk-Tableau_de_Bord-011214.xls"));
    String json = given().contentType(JSON).get("/datasets/{id}/preview?sheetName=Leads", dataSetId).asString();
    DataSet dataSet = mapper.readerFor(DataSet.class).readValue(json);
    Assertions.assertThat(dataSet.getMetadata().getRowMetadata().getColumns()).isNotNull().isNotEmpty().hasSize(21);
    json = given().contentType(JSON).get("/datasets/{id}/preview?sheetName=Tableau de bord", dataSetId).asString();
    dataSet = mapper.readerFor(DataSet.class).readValue(json);
    Assertions.assertThat(dataSet.getMetadata().getRowMetadata().getColumns()).isNotNull().isNotEmpty().hasSize(10);
}
Also used : DataSet(org.talend.dataprep.api.dataset.DataSet) Matchers.containsString(org.hamcrest.Matchers.containsString) Matchers.isEmptyString(org.hamcrest.Matchers.isEmptyString) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) Test(org.junit.Test)

Example 37 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class TransformationService method executeDiffOnSample.

private void executeDiffOnSample(final PreviewParameters previewParameters, final OutputStream output) {
    final TransformationMetadataCacheKey metadataKey = // 
    cacheKeyGenerator.generateMetadataKey(// 
    previewParameters.getPreparationId(), // 
    Step.ROOT_STEP.id(), // 
    previewParameters.getSourceType());
    final ContentCacheKey contentKey = // 
    cacheKeyGenerator.generateContentKey(// 
    previewParameters.getDataSetId(), // 
    previewParameters.getPreparationId(), // 
    Step.ROOT_STEP.id(), // 
    JSON, // 
    previewParameters.getSourceType(), // no filters for preview
    "");
    try (// 
    final InputStream metadata = contentCache.get(metadataKey);
        // 
        final InputStream content = contentCache.get(contentKey);
        final JsonParser contentParser = mapper.getFactory().createParser(new InputStreamReader(content, UTF_8))) {
        // build metadata
        final RowMetadata rowMetadata = mapper.readerFor(RowMetadata.class).readValue(metadata);
        final DataSetMetadata dataSetMetadata = new DataSetMetadata();
        dataSetMetadata.setRowMetadata(rowMetadata);
        // build dataset
        final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(contentParser);
        dataSet.setMetadata(dataSetMetadata);
        // trigger diff
        executePreview(// 
        previewParameters.getNewActions(), // 
        previewParameters.getBaseActions(), // 
        previewParameters.getTdpIds(), // 
        dataSet, // 
        output);
    } catch (final IOException e) {
        throw new TDPException(TransformationErrorCodes.UNABLE_TO_PERFORM_PREVIEW, e);
    }
}
Also used : TDPException(org.talend.dataprep.exception.TDPException) DataSet(org.talend.dataprep.api.dataset.DataSet) ContentCacheKey(org.talend.dataprep.cache.ContentCacheKey) TransformationMetadataCacheKey(org.talend.dataprep.cache.TransformationMetadataCacheKey) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) JsonParser(com.fasterxml.jackson.core.JsonParser)

Example 38 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class ApplyPreparationExportStrategy method executeApplyPreparation.

private void executeApplyPreparation(ExportParameters parameters, OutputStream outputStream) {
    final String stepId = parameters.getStepId();
    final String preparationId = parameters.getPreparationId();
    final String formatName = parameters.getExportType();
    final Preparation preparation = getPreparation(preparationId);
    final String dataSetId = parameters.getDatasetId();
    final ExportFormat format = getFormat(parameters.getExportType());
    // dataset content must be retrieved as the technical user because it might not be shared
    boolean technicianIdentityReleased = false;
    securityProxy.asTechnicalUser();
    // get the dataset content (in an auto-closable block to make sure it is properly closed)
    final boolean fullContent = parameters.getFrom() == ExportParameters.SourceType.FILTER;
    final DataSetGet dataSetGet = applicationContext.getBean(DataSetGet.class, dataSetId, fullContent, true);
    try (final InputStream datasetContent = dataSetGet.execute();
        final JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(datasetContent, UTF_8))) {
        // release the technical user identity
        securityProxy.releaseIdentity();
        technicianIdentityReleased = true;
        // head is not allowed as step id
        final String version = getCleanStepId(preparation, stepId);
        // Create dataset
        final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
        // get the actions to apply (no preparation ==> dataset export ==> no actions)
        final String actions = getActions(preparationId, version);
        // create tee to broadcast to cache + service output
        final TransformationCacheKey key = // 
        cacheKeyGenerator.generateContentKey(// 
        dataSetId, // 
        preparationId, // 
        version, // 
        formatName, // 
        parameters.getFrom(), // 
        parameters.getArguments(), // 
        parameters.getFilter());
        LOGGER.debug("Cache key: " + key.getKey());
        LOGGER.debug("Cache key details: " + key.toString());
        try (final TeeOutputStream tee = new TeeOutputStream(outputStream, contentCache.put(key, ContentCache.TimeToLive.DEFAULT))) {
            final Configuration.Builder configurationBuilder = // 
            Configuration.builder().args(// 
            parameters.getArguments()).outFilter(// 
            rm -> filterService.build(parameters.getFilter(), rm)).sourceType(parameters.getFrom()).format(// 
            format.getName()).actions(// 
            actions).preparation(// 
            getPreparation(preparationId)).stepId(// 
            version).volume(// 
            SMALL).output(// 
            tee).limit(this.limit);
            // no need for statistics if it's not JSON output
            if (!Objects.equals(format.getName(), JSON)) {
                configurationBuilder.globalStatistics(false);
            }
            final Configuration configuration = configurationBuilder.build();
            factory.get(configuration).buildExecutable(dataSet, configuration).execute();
            tee.flush();
        } catch (Throwable e) {
            // NOSONAR
            LOGGER.debug("evicting cache {}", key.getKey());
            contentCache.evict(key);
            throw e;
        }
    } catch (TDPException e) {
        throw e;
    } catch (Exception e) {
        throw new TDPException(TransformationErrorCodes.UNABLE_TO_TRANSFORM_DATASET, e);
    } finally {
        if (!technicianIdentityReleased) {
            securityProxy.releaseIdentity();
        }
    }
}
Also used : DataSetGet(org.talend.dataprep.command.dataset.DataSetGet) TeeOutputStream(org.apache.commons.io.output.TeeOutputStream) InputStreamReader(java.io.InputStreamReader) Configuration(org.talend.dataprep.transformation.api.transformer.configuration.Configuration) DataSet(org.talend.dataprep.api.dataset.DataSet) InputStream(java.io.InputStream) ExportFormat(org.talend.dataprep.format.export.ExportFormat) TDPException(org.talend.dataprep.exception.TDPException) TransformationCacheKey(org.talend.dataprep.cache.TransformationCacheKey) TDPException(org.talend.dataprep.exception.TDPException) Preparation(org.talend.dataprep.api.preparation.Preparation) JsonParser(com.fasterxml.jackson.core.JsonParser)

Example 39 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class PipelineTransformer method buildExecutable.

@Override
public ExecutableTransformer buildExecutable(DataSet input, Configuration configuration) {
    final RowMetadata rowMetadata = input.getMetadata().getRowMetadata();
    // prepare the fallback row metadata
    RowMetadata fallBackRowMetadata = transformationRowMetadataUtils.getMatchingEmptyRowMetadata(rowMetadata);
    final TransformerWriter writer = writerRegistrationService.getWriter(configuration.formatId(), configuration.output(), configuration.getArguments());
    final ConfiguredCacheWriter metadataWriter = new ConfiguredCacheWriter(contentCache, DEFAULT);
    final TransformationMetadataCacheKey metadataKey = cacheKeyGenerator.generateMetadataKey(configuration.getPreparationId(), configuration.stepId(), configuration.getSourceType());
    final PreparationMessage preparation = configuration.getPreparation();
    // function that from a step gives the rowMetadata associated to the previous/parent step
    final Function<Step, RowMetadata> previousStepRowMetadataSupplier = s -> // 
    Optional.ofNullable(s.getParent()).map(// 
    id -> preparationUpdater.get(id)).orElse(null);
    final Pipeline pipeline = // 
    Pipeline.Builder.builder().withAnalyzerService(// 
    analyzerService).withActionRegistry(// 
    actionRegistry).withPreparation(// 
    preparation).withActions(// 
    actionParser.parse(configuration.getActions())).withInitialMetadata(rowMetadata, // 
    configuration.volume() == SMALL).withMonitor(// 
    configuration.getMonitor()).withFilter(// 
    configuration.getFilter()).withLimit(// 
    configuration.getLimit()).withFilterOut(// 
    configuration.getOutFilter()).withOutput(// 
    () -> new WriterNode(writer, metadataWriter, metadataKey, fallBackRowMetadata)).withStatisticsAdapter(// 
    adapter).withStepMetadataSupplier(// 
    previousStepRowMetadataSupplier).withGlobalStatistics(// 
    configuration.isGlobalStatistics()).allowMetadataChange(// 
    configuration.isAllowMetadataChange()).build();
    // wrap this transformer into an executable transformer
    return new ExecutableTransformer() {

        @Override
        public void execute() {
            try {
                LOGGER.debug("Before transformation: {}", pipeline);
                pipeline.execute(input);
            } finally {
                LOGGER.debug("After transformation: {}", pipeline);
            }
            if (preparation != null) {
                final UpdatedStepVisitor visitor = new UpdatedStepVisitor(preparationUpdater);
                pipeline.accept(visitor);
            }
        }

        @Override
        public void signal(Signal signal) {
            pipeline.signal(signal);
        }
    };
}
Also used : WriterNode(org.talend.dataprep.transformation.pipeline.model.WriterNode) WriterRegistrationService(org.talend.dataprep.transformation.format.WriterRegistrationService) SMALL(org.talend.dataprep.transformation.api.transformer.configuration.Configuration.Volume.SMALL) StepMetadataRepository(org.talend.dataprep.transformation.service.StepMetadataRepository) TransformerWriter(org.talend.dataprep.transformation.api.transformer.TransformerWriter) LoggerFactory(org.slf4j.LoggerFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) Configuration(org.talend.dataprep.transformation.api.transformer.configuration.Configuration) Signal(org.talend.dataprep.transformation.pipeline.Signal) DEFAULT(org.talend.dataprep.cache.ContentCache.TimeToLive.DEFAULT) PreparationMessage(org.talend.dataprep.api.preparation.PreparationMessage) Function(java.util.function.Function) AnalyzerService(org.talend.dataprep.quality.AnalyzerService) ActionParser(org.talend.dataprep.transformation.api.action.ActionParser) CacheKeyGenerator(org.talend.dataprep.cache.CacheKeyGenerator) TransformationMetadataCacheKey(org.talend.dataprep.cache.TransformationMetadataCacheKey) DataSet(org.talend.dataprep.api.dataset.DataSet) Logger(org.slf4j.Logger) ActionRegistry(org.talend.dataprep.transformation.pipeline.ActionRegistry) TransformationRowMetadataUtils(org.talend.dataprep.transformation.service.TransformationRowMetadataUtils) Step(org.talend.dataprep.api.preparation.Step) ConfiguredCacheWriter(org.talend.dataprep.transformation.api.transformer.ConfiguredCacheWriter) ContentCache(org.talend.dataprep.cache.ContentCache) ExecutableTransformer(org.talend.dataprep.transformation.api.transformer.ExecutableTransformer) Component(org.springframework.stereotype.Component) StatisticsAdapter(org.talend.dataprep.dataset.StatisticsAdapter) Optional(java.util.Optional) Pipeline(org.talend.dataprep.transformation.pipeline.Pipeline) Transformer(org.talend.dataprep.transformation.api.transformer.Transformer) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) TransformationMetadataCacheKey(org.talend.dataprep.cache.TransformationMetadataCacheKey) Step(org.talend.dataprep.api.preparation.Step) Pipeline(org.talend.dataprep.transformation.pipeline.Pipeline) Signal(org.talend.dataprep.transformation.pipeline.Signal) WriterNode(org.talend.dataprep.transformation.pipeline.model.WriterNode) ExecutableTransformer(org.talend.dataprep.transformation.api.transformer.ExecutableTransformer) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) PreparationMessage(org.talend.dataprep.api.preparation.PreparationMessage) TransformerWriter(org.talend.dataprep.transformation.api.transformer.TransformerWriter) ConfiguredCacheWriter(org.talend.dataprep.transformation.api.transformer.ConfiguredCacheWriter)

Aggregations

DataSet (org.talend.dataprep.api.dataset.DataSet)39 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)18 Test (org.junit.Test)16 TDPException (org.talend.dataprep.exception.TDPException)15 JsonParser (com.fasterxml.jackson.core.JsonParser)13 InputStream (java.io.InputStream)13 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)11 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)10 OutputStream (java.io.OutputStream)8 Logger (org.slf4j.Logger)8 DataSetGet (org.talend.dataprep.command.dataset.DataSetGet)8 Configuration (org.talend.dataprep.transformation.api.transformer.configuration.Configuration)8 ApiOperation (io.swagger.annotations.ApiOperation)7 IOException (java.io.IOException)7 ArrayList (java.util.ArrayList)7 LoggerFactory (org.slf4j.LoggerFactory)7 Autowired (org.springframework.beans.factory.annotation.Autowired)7 ServiceBaseTest (org.talend.ServiceBaseTest)7 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)7 ContentCache (org.talend.dataprep.cache.ContentCache)7