Search in sources :

Example 21 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class PreparationExportStrategy method performPreparation.

public void performPreparation(final ExportParameters parameters, final OutputStream outputStream) {
    final String stepId = parameters.getStepId();
    final String preparationId = parameters.getPreparationId();
    final String formatName = parameters.getExportType();
    final PreparationMessage preparation = getPreparation(preparationId, stepId);
    final String dataSetId = preparation.getDataSetId();
    final ExportFormat format = getFormat(parameters.getExportType());
    // get the dataset content (in an auto-closable block to make sure it is properly closed)
    boolean releasedIdentity = false;
    // Allow get dataset and get dataset metadata access whatever share status is
    securityProxy.asTechnicalUser();
    final DataSetGet dataSetGet = applicationContext.getBean(DataSetGet.class, dataSetId, false, true);
    final DataSetGetMetadata dataSetGetMetadata = applicationContext.getBean(DataSetGetMetadata.class, dataSetId);
    try (InputStream datasetContent = dataSetGet.execute()) {
        try (JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(datasetContent, UTF_8))) {
            // head is not allowed as step id
            final String version = getCleanStepId(preparation, stepId);
            // Create dataset
            final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
            dataSet.setMetadata(dataSetGetMetadata.execute());
            // All good, can already release identity
            securityProxy.releaseIdentity();
            releasedIdentity = true;
            // get the actions to apply (no preparation ==> dataset export ==> no actions)
            final String actions = getActions(preparationId, version);
            final TransformationCacheKey key = // 
            cacheKeyGenerator.generateContentKey(// 
            dataSetId, // 
            preparationId, // 
            version, // 
            formatName, // 
            parameters.getFrom(), // 
            parameters.getArguments(), // 
            parameters.getFilter());
            LOGGER.debug("Cache key: " + key.getKey());
            LOGGER.debug("Cache key details: " + key.toString());
            try (final TeeOutputStream tee = new TeeOutputStream(outputStream, contentCache.put(key, ContentCache.TimeToLive.DEFAULT))) {
                final Configuration configuration = // 
                Configuration.builder().args(// 
                parameters.getArguments()).outFilter(// 
                rm -> filterService.build(parameters.getFilter(), rm)).sourceType(parameters.getFrom()).format(// 
                format.getName()).actions(// 
                actions).preparation(// 
                preparation).stepId(// 
                version).volume(// 
                Configuration.Volume.SMALL).output(// 
                tee).limit(// 
                limit).build();
                factory.get(configuration).buildExecutable(dataSet, configuration).execute();
                tee.flush();
            } catch (Throwable e) {
                // NOSONAR
                contentCache.evict(key);
                throw e;
            }
        }
    } catch (TDPException e) {
        throw e;
    } catch (Exception e) {
        throw new TDPException(TransformationErrorCodes.UNABLE_TO_TRANSFORM_DATASET, e);
    } finally {
        if (!releasedIdentity) {
            // Release identity in case of error.
            securityProxy.releaseIdentity();
        }
    }
}
Also used : ExportFormat(org.talend.dataprep.format.export.ExportFormat) StringUtils(org.apache.commons.lang.StringUtils) DataSetGet(org.talend.dataprep.command.dataset.DataSetGet) TDPException(org.talend.dataprep.exception.TDPException) TransformationErrorCodes(org.talend.dataprep.exception.error.TransformationErrorCodes) LoggerFactory(org.slf4j.LoggerFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) Configuration(org.talend.dataprep.transformation.api.transformer.configuration.Configuration) PreparationMessage(org.talend.dataprep.api.preparation.PreparationMessage) CacheKeyGenerator(org.talend.dataprep.cache.CacheKeyGenerator) TeeOutputStream(org.apache.commons.io.output.TeeOutputStream) ExportUtils(org.talend.dataprep.transformation.service.ExportUtils) DataSet(org.talend.dataprep.api.dataset.DataSet) OutputStream(java.io.OutputStream) ExportParameters(org.talend.dataprep.api.export.ExportParameters) Logger(org.slf4j.Logger) HEAD(org.talend.dataprep.api.export.ExportParameters.SourceType.HEAD) BaseExportStrategy(org.talend.dataprep.transformation.service.BaseExportStrategy) JsonParser(com.fasterxml.jackson.core.JsonParser) UTF_8(java.nio.charset.StandardCharsets.UTF_8) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) StreamingResponseBody(org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody) InputStreamReader(java.io.InputStreamReader) ContentCache(org.talend.dataprep.cache.ContentCache) SecurityProxy(org.talend.dataprep.security.SecurityProxy) CSVFormat(org.talend.dataprep.transformation.format.CSVFormat) Component(org.springframework.stereotype.Component) TransformationCacheKey(org.talend.dataprep.cache.TransformationCacheKey) DataSetGetMetadata(org.talend.dataprep.command.dataset.DataSetGetMetadata) InputStream(java.io.InputStream) DataSetGet(org.talend.dataprep.command.dataset.DataSetGet) TeeOutputStream(org.apache.commons.io.output.TeeOutputStream) InputStreamReader(java.io.InputStreamReader) Configuration(org.talend.dataprep.transformation.api.transformer.configuration.Configuration) DataSet(org.talend.dataprep.api.dataset.DataSet) InputStream(java.io.InputStream) ExportFormat(org.talend.dataprep.format.export.ExportFormat) DataSetGetMetadata(org.talend.dataprep.command.dataset.DataSetGetMetadata) TDPException(org.talend.dataprep.exception.TDPException) TransformationCacheKey(org.talend.dataprep.cache.TransformationCacheKey) TDPException(org.talend.dataprep.exception.TDPException) PreparationMessage(org.talend.dataprep.api.preparation.PreparationMessage) JsonParser(com.fasterxml.jackson.core.JsonParser)

Example 22 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class AggregationServiceTest method shouldNotAggregateBecauseNoGroupBy.

@Test(expected = TDPException.class)
public void shouldNotAggregateBecauseNoGroupBy() {
    final AggregationParameters params = new AggregationParameters();
    params.addOperation(new AggregationOperation("0001", Operator.AVERAGE));
    service.aggregate(params, new DataSet());
}
Also used : DataSet(org.talend.dataprep.api.dataset.DataSet) AggregationParameters(org.talend.dataprep.transformation.aggregation.api.AggregationParameters) AggregationOperation(org.talend.dataprep.transformation.aggregation.api.AggregationOperation) Test(org.junit.Test) TransformationBaseTest(org.talend.dataprep.transformation.TransformationBaseTest)

Example 23 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class PipelineTest method testCancelledPipeline.

@Test
public void testCancelledPipeline() throws Exception {
    // given
    final Pipeline pipeline = new Pipeline(NodeBuilder.source().to(output).build());
    final RowMetadata rowMetadata = new RowMetadata();
    final DataSetRow row1 = new DataSetRow(rowMetadata);
    final DataSetRow row2 = new DataSetRow(rowMetadata);
    final List<DataSetRow> records = new ArrayList<>();
    records.add(row1);
    records.add(row2);
    final DataSet dataSet = new DataSet();
    final DataSetMetadata metadata = new DataSetMetadata();
    metadata.setRowMetadata(rowMetadata);
    dataSet.setMetadata(metadata);
    dataSet.setRecords(records.stream());
    // when
    pipeline.signal(Signal.STOP);
    pipeline.execute(dataSet);
    // then
    assertThat(output.getCount(), is(1));
    assertThat(output.getRow(), is(row1));
    assertThat(output.getMetadata(), is(rowMetadata));
    assertThat(output.getSignal(), is(END_OF_STREAM));
}
Also used : DataSet(org.talend.dataprep.api.dataset.DataSet) ArrayList(java.util.ArrayList) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Test(org.junit.Test)

Example 24 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class DataSetGetMetadata method configureLimitedDataset.

private void configureLimitedDataset(final String dataSetId) {
    execute(() -> new HttpGet(datasetServiceUrl + "/datasets/" + dataSetId + "/metadata"));
    on(HttpStatus.OK).then((req, res) -> {
        try {
            final DataSet dataSet = objectMapper.readerFor(DataSet.class).readValue(res.getEntity().getContent());
            return dataSet.getMetadata();
        } catch (IOException e) {
            throw new TDPException(CommonErrorCodes.UNEXPECTED_EXCEPTION, e);
        } finally {
            req.releaseConnection();
        }
    });
}
Also used : TDPException(org.talend.dataprep.exception.TDPException) DataSet(org.talend.dataprep.api.dataset.DataSet) HttpGet(org.apache.http.client.methods.HttpGet) IOException(java.io.IOException)

Example 25 with DataSet

use of org.talend.dataprep.api.dataset.DataSet in project data-prep by Talend.

the class TypeDetectionNode method signal.

@Override
public void signal(Signal signal) {
    final long start = System.currentTimeMillis();
    try {
        if (signal == Signal.END_OF_STREAM || signal == Signal.CANCEL || signal == Signal.STOP) {
            // End temporary output
            generator.writeEndArray();
            generator.writeEndObject();
            generator.flush();
            generator.close();
            // Send stored records to next steps
            final ObjectMapper mapper = new ObjectMapper();
            if (rowMetadata != null && resultAnalyzer != null) {
                // Adapt row metadata to infer type (adapter takes care of type-forced columns)
                resultAnalyzer.end();
                final List<ColumnMetadata> columns = rowMetadata.getColumns();
                adapter.adapt(columns, resultAnalyzer.getResult(), (Predicate<ColumnMetadata>) filter);
                resultAnalyzer.close();
            }
            // Continue process
            try (JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(new GZIPInputStream(new FileInputStream(reservoir)), UTF_8))) {
                final DataSet dataSet = mapper.reader(DataSet.class).readValue(parser);
                dataSet.getRecords().forEach(r -> {
                    r.setRowMetadata(rowMetadata);
                    link.exec().emit(r, rowMetadata);
                });
            }
        }
    } catch (Exception e) {
        LOGGER.warn("Unable to perform delayed analysis.", e);
    } finally {
        try {
            generator.close();
        } catch (IOException e) {
            LOGGER.error("Unable to close JSON generator (causing potential temp file delete issues).", e);
        }
        FilesHelper.deleteQuietly(reservoir);
        totalTime += System.currentTimeMillis() - start;
    }
    super.signal(signal);
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) InputStreamReader(java.io.InputStreamReader) DataSet(org.talend.dataprep.api.dataset.DataSet) IOException(java.io.IOException) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FileInputStream(java.io.FileInputStream) TalendRuntimeException(org.talend.daikon.exception.TalendRuntimeException) IOException(java.io.IOException) JsonParser(com.fasterxml.jackson.core.JsonParser)

Aggregations

DataSet (org.talend.dataprep.api.dataset.DataSet)39 DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)18 Test (org.junit.Test)16 TDPException (org.talend.dataprep.exception.TDPException)15 JsonParser (com.fasterxml.jackson.core.JsonParser)13 InputStream (java.io.InputStream)13 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)11 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)10 OutputStream (java.io.OutputStream)8 Logger (org.slf4j.Logger)8 DataSetGet (org.talend.dataprep.command.dataset.DataSetGet)8 Configuration (org.talend.dataprep.transformation.api.transformer.configuration.Configuration)8 ApiOperation (io.swagger.annotations.ApiOperation)7 IOException (java.io.IOException)7 ArrayList (java.util.ArrayList)7 LoggerFactory (org.slf4j.LoggerFactory)7 Autowired (org.springframework.beans.factory.annotation.Autowired)7 ServiceBaseTest (org.talend.ServiceBaseTest)7 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)7 ContentCache (org.talend.dataprep.cache.ContentCache)7