Search in sources :

Example 56 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class OptimizedExportStrategy method performOptimizedTransform.

private void performOptimizedTransform(ExportParameters parameters, OutputStream outputStream) throws IOException {
    // Initial check
    final OptimizedPreparationInput optimizedPreparationInput = new OptimizedPreparationInput(parameters).invoke();
    if (optimizedPreparationInput == null) {
        throw new IllegalStateException("Unable to use this strategy (call accept() before calling this).");
    }
    final String preparationId = parameters.getPreparationId();
    final String dataSetId = optimizedPreparationInput.getDataSetId();
    final TransformationCacheKey transformationCacheKey = optimizedPreparationInput.getTransformationCacheKey();
    final DataSetMetadata metadata = optimizedPreparationInput.getMetadata();
    final String previousVersion = optimizedPreparationInput.getPreviousVersion();
    final String version = optimizedPreparationInput.getVersion();
    final ExportFormat format = getFormat(parameters.getExportType());
    // Get content from previous step
    try (JsonParser parser = mapper.getFactory().createParser(new InputStreamReader(contentCache.get(transformationCacheKey), UTF_8))) {
        // Create dataset
        final DataSet dataSet = mapper.readerFor(DataSet.class).readValue(parser);
        dataSet.setMetadata(metadata);
        // get the actions to apply (no preparation ==> dataset export ==> no actions)
        final String actions = getActions(preparationId, previousVersion, version);
        final PreparationMessage preparation = getPreparation(preparationId);
        preparation.setSteps(getMatchingSteps(preparation.getSteps(), previousVersion, version));
        LOGGER.debug("Running optimized strategy for preparation {} @ step #{}", preparationId, version);
        // create tee to broadcast to cache + service output
        final TransformationCacheKey key = // 
        cacheKeyGenerator.generateContentKey(// 
        dataSetId, // 
        preparationId, // 
        version, // 
        parameters.getExportType(), // 
        parameters.getFrom(), // 
        parameters.getArguments(), // 
        parameters.getFilter());
        LOGGER.debug("Cache key: " + key.getKey());
        LOGGER.debug("Cache key details: " + key.toString());
        try (final TeeOutputStream tee = new TeeOutputStream(outputStream, contentCache.put(key, ContentCache.TimeToLive.DEFAULT))) {
            final Configuration configuration = // 
            Configuration.builder().args(// 
            parameters.getArguments()).outFilter(// 
            rm -> filterService.build(parameters.getFilter(), rm)).sourceType(parameters.getFrom()).format(// 
            format.getName()).actions(// 
            actions).preparation(// 
            preparation).stepId(// 
            version).volume(// 
            Configuration.Volume.SMALL).output(// 
            tee).limit(// 
            limit).build();
            factory.get(configuration).buildExecutable(dataSet, configuration).execute();
            tee.flush();
        } catch (Throwable e) {
            // NOSONAR
            contentCache.evict(key);
            throw e;
        }
    } catch (TDPException e) {
        throw e;
    } catch (Exception e) {
        throw new TDPException(TransformationErrorCodes.UNABLE_TO_TRANSFORM_DATASET, e);
    }
}
Also used : ExportFormat(org.talend.dataprep.format.export.ExportFormat) StringUtils(org.apache.commons.lang.StringUtils) TDPException(org.talend.dataprep.exception.TDPException) TransformationErrorCodes(org.talend.dataprep.exception.error.TransformationErrorCodes) LoggerFactory(org.slf4j.LoggerFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) Configuration(org.talend.dataprep.transformation.api.transformer.configuration.Configuration) PreparationMessage(org.talend.dataprep.api.preparation.PreparationMessage) ArrayList(java.util.ArrayList) CacheKeyGenerator(org.talend.dataprep.cache.CacheKeyGenerator) TransformationMetadataCacheKey(org.talend.dataprep.cache.TransformationMetadataCacheKey) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) TeeOutputStream(org.apache.commons.io.output.TeeOutputStream) ExportUtils(org.talend.dataprep.transformation.service.ExportUtils) DataSet(org.talend.dataprep.api.dataset.DataSet) OutputStream(java.io.OutputStream) Preparation(org.talend.dataprep.api.preparation.Preparation) ExportParameters(org.talend.dataprep.api.export.ExportParameters) Logger(org.slf4j.Logger) BaseExportStrategy(org.talend.dataprep.transformation.service.BaseExportStrategy) JsonParser(com.fasterxml.jackson.core.JsonParser) UTF_8(java.nio.charset.StandardCharsets.UTF_8) StreamingResponseBody(org.springframework.web.servlet.mvc.method.annotation.StreamingResponseBody) IOException(java.io.IOException) Step(org.talend.dataprep.api.preparation.Step) InputStreamReader(java.io.InputStreamReader) Collectors(java.util.stream.Collectors) ContentCache(org.talend.dataprep.cache.ContentCache) CSVFormat(org.talend.dataprep.transformation.format.CSVFormat) List(java.util.List) Component(org.springframework.stereotype.Component) TransformationCacheKey(org.talend.dataprep.cache.TransformationCacheKey) InputStream(java.io.InputStream) TeeOutputStream(org.apache.commons.io.output.TeeOutputStream) InputStreamReader(java.io.InputStreamReader) Configuration(org.talend.dataprep.transformation.api.transformer.configuration.Configuration) DataSet(org.talend.dataprep.api.dataset.DataSet) ExportFormat(org.talend.dataprep.format.export.ExportFormat) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) TDPException(org.talend.dataprep.exception.TDPException) IOException(java.io.IOException) TransformationCacheKey(org.talend.dataprep.cache.TransformationCacheKey) TDPException(org.talend.dataprep.exception.TDPException) PreparationMessage(org.talend.dataprep.api.preparation.PreparationMessage) JsonParser(com.fasterxml.jackson.core.JsonParser)

Example 57 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class PreparationExportStrategyTest method setUp.

@Before
public void setUp() throws Exception {
    // Given
    mapper.registerModule(new Jdk8Module());
    strategy.setMapper(new ObjectMapper());
    when(formatRegistrationService.getByName(eq("JSON"))).thenReturn(new JsonFormat());
    final DataSetGetMetadata dataSetGetMetadata = mock(DataSetGetMetadata.class);
    when(applicationContext.getBean(eq(DataSetGetMetadata.class), anyVararg())).thenReturn(dataSetGetMetadata);
    DataSetGet dataSetGet = mock(DataSetGet.class);
    final StringWriter dataSetAsString = new StringWriter();
    DataSet dataSet = new DataSet();
    final DataSetMetadata dataSetMetadata = new DataSetMetadata("ds-1234", "", "", 0L, 0L, new RowMetadata(), "");
    final DataSetContent content = new DataSetContent();
    dataSetMetadata.setContent(content);
    dataSet.setMetadata(dataSetMetadata);
    dataSet.setRecords(Stream.empty());
    mapper.writerFor(DataSet.class).writeValue(dataSetAsString, dataSet);
    when(dataSetGet.execute()).thenReturn(new ByteArrayInputStream(dataSetAsString.toString().getBytes()));
    when(applicationContext.getBean(eq(DataSetGet.class), anyVararg())).thenReturn(dataSetGet);
    final PreparationGetActions preparationGetActions = mock(PreparationGetActions.class);
    when(preparationGetActions.execute()).thenReturn(new ByteArrayInputStream("{}".getBytes()));
    when(applicationContext.getBean(eq(PreparationGetActions.class), eq("prep-1234"), anyString())).thenReturn(preparationGetActions);
    final TransformationCacheKey cacheKey = mock(TransformationCacheKey.class);
    when(cacheKey.getKey()).thenReturn("cache-1234");
    when(cacheKeyGenerator.generateContentKey(anyString(), anyString(), anyString(), anyString(), any(), any(), anyString())).thenReturn(cacheKey);
    final ExecutableTransformer executableTransformer = mock(ExecutableTransformer.class);
    reset(transformer);
    when(transformer.buildExecutable(any(), any())).thenReturn(executableTransformer);
    when(factory.get(any())).thenReturn(transformer);
    when(contentCache.put(any(), any())).thenReturn(new NullOutputStream());
}
Also used : DataSetGet(org.talend.dataprep.command.dataset.DataSetGet) DataSet(org.talend.dataprep.api.dataset.DataSet) DataSetGetMetadata(org.talend.dataprep.command.dataset.DataSetGetMetadata) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) TransformationCacheKey(org.talend.dataprep.cache.TransformationCacheKey) Jdk8Module(com.fasterxml.jackson.datatype.jdk8.Jdk8Module) JsonFormat(org.talend.dataprep.transformation.format.JsonFormat) StringWriter(java.io.StringWriter) ByteArrayInputStream(java.io.ByteArrayInputStream) PreparationGetActions(org.talend.dataprep.command.preparation.PreparationGetActions) ExecutableTransformer(org.talend.dataprep.transformation.api.transformer.ExecutableTransformer) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) DataSetContent(org.talend.dataprep.api.dataset.DataSetContent) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) NullOutputStream(org.apache.commons.io.output.NullOutputStream) Before(org.junit.Before)

Example 58 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class RenameDataSetsWithFolderPath method run.

/**
 * @see UpgradeTask#run()
 */
@Override
public void run() {
    final Stream<DataSetMetadata> datasets = dataSetMetadataRepository.list();
    datasets.forEach(dataset -> {
        final Folder folder = folderRepository.locateEntry(dataset.getId(), FolderContentType.DATASET);
        // skip home folder
        if (folder != null && !StringUtils.equals("/", folder.getPath())) {
            String newName = dataset.getName() + " - " + StringUtils.strip(folder.getPath(), "/");
            dataset.setName(newName);
            dataSetMetadataRepository.save(dataset);
            LOG.debug("dataset #{} renamed to {}", dataset.getId(), newName);
        } else {
            LOG.debug("dataset #{} not renamed since it's in the user home folder", dataset.getId());
        }
    });
}
Also used : Folder(org.talend.dataprep.api.folder.Folder) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata)

Example 59 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class FileSystemDataSetMetadataRepository method get.

/**
 * @see DataSetMetadataRepository#get(String)
 */
@Override
public DataSetMetadata get(String id) {
    final File file = getFile(id);
    if (file.getName().startsWith(".")) {
        LOG.info("Ignore hidden file {}", file.getName());
        return null;
    }
    if (!file.exists()) {
        LOG.info("dataset #{} not found in file system", id);
        return null;
    }
    ReentrantReadWriteLock lock = locks.getLock(id);
    lock.readLock().lock();
    try (FileInputStream fileInputStream = new FileInputStream(file);
        GZIPInputStream input = new GZIPInputStream(fileInputStream)) {
        return mapper.readerFor(DataSetMetadata.class).readValue(input);
    } catch (IOException e) {
        LOG.error("unable to load dataset {}", id, e);
        return null;
    } finally {
        lock.readLock().unlock();
    }
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata)

Example 60 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class InMemoryDataSetMetadataRepository method get.

/**
 * @see DataSetMetadataRepository#get(String)
 */
@Override
public DataSetMetadata get(String id) {
    DataSetMetadata dataSetMetadata = store.get(id);
    if (dataSetMetadata == null) {
        LOG.info("data set metadata #{} not found in the system", id);
        return null;
    }
    resetTransientValues(dataSetMetadata);
    return metadataBuilder.metadata().copy(dataSetMetadata).build();
}
Also used : DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata)

Aggregations

DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)192 Test (org.junit.Test)126 DataSetBaseTest (org.talend.dataprep.dataset.DataSetBaseTest)63 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)48 InputStream (java.io.InputStream)45 Matchers.containsString (org.hamcrest.Matchers.containsString)28 Matchers.isEmptyString (org.hamcrest.Matchers.isEmptyString)28 TDPException (org.talend.dataprep.exception.TDPException)26 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)20 DataSetServiceTest (org.talend.dataprep.dataset.service.DataSetServiceTest)20 ApiOperation (io.swagger.annotations.ApiOperation)18 DataSet (org.talend.dataprep.api.dataset.DataSet)18 Type (org.talend.dataprep.api.type.Type)17 Timed (org.talend.dataprep.metrics.Timed)17 DistributedLock (org.talend.dataprep.lock.DistributedLock)16 Autowired (org.springframework.beans.factory.annotation.Autowired)14 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)14 IOException (java.io.IOException)13 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)13 ArrayList (java.util.ArrayList)12