Search in sources :

Example 91 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class DataSetService method copy.

/**
 * Copy this dataset to a new one and returns the new data set id as text in the response.
 *
 * @param copyName the name of the copy
 * @return The new data id.
 */
@RequestMapping(value = "/datasets/{id}/copy", method = POST, produces = TEXT_PLAIN_VALUE)
@ApiOperation(value = "Copy a data set", produces = TEXT_PLAIN_VALUE, notes = "Copy a new data set based on the given id. Returns the id of the newly created data set.")
@Timed
public String copy(@PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the data set to clone") String dataSetId, @ApiParam(value = "The name of the cloned dataset.") @RequestParam(required = false) String copyName) throws IOException {
    if (copyName != null) {
        checkDataSetName(copyName);
    }
    HttpResponseContext.contentType(TEXT_PLAIN_VALUE);
    DataSetMetadata original = dataSetMetadataRepository.get(dataSetId);
    if (original == null) {
        return StringUtils.EMPTY;
    }
    // use a default name if empty (original name + " Copy" )
    final String newName;
    if (StringUtils.isBlank(copyName)) {
        newName = message("dataset.copy.newname", original.getName());
    } else {
        newName = copyName;
    }
    final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(dataSetId);
    try {
        // lock to ensure any asynchronous analysis is completed.
        lock.lock();
        // check that the name is not already taken
        checkIfNameIsAvailable(newName);
        // check that there's enough space
        final long maxDataSetSizeAllowed = getMaxDataSetSizeAllowed();
        if (maxDataSetSizeAllowed < original.getDataSetSize()) {
            throw new TDPException(MAX_STORAGE_MAY_BE_EXCEEDED, build().put("limit", maxDataSetSizeAllowed));
        }
        // Create copy (based on original data set metadata)
        final String newId = UUID.randomUUID().toString();
        final Marker marker = Markers.dataset(newId);
        LOG.debug(marker, "Cloning...");
        DataSetMetadata target = // 
        metadataBuilder.metadata().copy(// 
        original).id(// 
        newId).name(// 
        newName).author(// 
        security.getUserId()).location(// 
        original.getLocation()).created(// 
        System.currentTimeMillis()).build();
        // Save data set content
        LOG.debug(marker, "Storing content...");
        try (InputStream content = contentStore.getAsRaw(original)) {
            contentStore.storeAsRaw(target, content);
        }
        LOG.debug(marker, "Content stored.");
        // Create the new data set
        dataSetMetadataRepository.save(target);
        LOG.info(marker, "Copy done --> {}", newId);
        return newId;
    } finally {
        lock.unlock();
    }
}
Also used : TDPException(org.talend.dataprep.exception.TDPException) DistributedLock(org.talend.dataprep.lock.DistributedLock) PipedInputStream(java.io.PipedInputStream) StrictlyBoundedInputStream(org.talend.dataprep.dataset.store.content.StrictlyBoundedInputStream) InputStream(java.io.InputStream) Marker(org.slf4j.Marker) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Timed(org.talend.dataprep.metrics.Timed) ApiOperation(io.swagger.annotations.ApiOperation) RequestMapping(org.springframework.web.bind.annotation.RequestMapping)

Example 92 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class DataSetService method delete.

/**
 * Deletes a data set with provided id.
 *
 * @param dataSetId A data set id. If data set id is unknown, no exception nor status code to indicate this is set.
 */
@RequestMapping(value = "/datasets/{id}", method = RequestMethod.DELETE)
@ApiOperation(value = "Delete a data set by id", notes = "Delete a data set content based on provided id. Id should be a UUID returned by the list operation. Not valid or non existing data set id returns empty content.")
@Timed
public void delete(@PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the data set to delete") String dataSetId) {
    DataSetMetadata metadata = dataSetMetadataRepository.get(dataSetId);
    final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(dataSetId);
    try {
        lock.lock();
        if (metadata != null) {
            // first remove the metadata as there may be additional check
            dataSetMetadataRepository.remove(dataSetId);
            contentStore.delete(metadata);
        } else {
            HttpResponseContext.status(HttpStatus.NOT_FOUND);
        }
    } finally {
        lock.unlock();
    }
}
Also used : DistributedLock(org.talend.dataprep.lock.DistributedLock) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Timed(org.talend.dataprep.metrics.Timed) ApiOperation(io.swagger.annotations.ApiOperation) RequestMapping(org.springframework.web.bind.annotation.RequestMapping)

Example 93 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class ContentAnalysis method analyze.

/**
 * @see DataSetAnalyzer#analyze(String)
 */
@Override
public void analyze(String dataSetId) {
    // defensive programming
    if (StringUtils.isEmpty(dataSetId)) {
        throw new IllegalArgumentException("Data set id cannot be null or empty.");
    }
    DistributedLock datasetLock = repository.createDatasetMetadataLock(dataSetId);
    datasetLock.lock();
    try {
        DataSetMetadata metadata = repository.get(dataSetId);
        if (metadata != null) {
            LOG.info("Indexing content of data set #{}...", metadata.getId());
            updateHeaderAndFooter(metadata);
            updateLimit(metadata);
            metadata.getLifecycle().contentIndexed(true);
            repository.save(metadata);
            LOG.info("Indexed content of data set #{}.", dataSetId);
        } else {
            // $NON-NLS-1$
            LOG.info("Data set #{} no longer exists.", dataSetId);
        }
    } finally {
        datasetLock.unlock();
    }
}
Also used : DistributedLock(org.talend.dataprep.lock.DistributedLock) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata)

Example 94 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class StepRowMetadataMigration method run.

@Override
public void run() {
    // Allow non numeric value like NaN
    objectMapper.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
    LOGGER.info("Migration of step row metadata in preparations...");
    preparationRepository.list(PersistentStep.class).forEach(persistentStep -> {
        String id = persistentStep.getId();
        LOGGER.info("Migration of step #{}", id);
        String rowMetadata = persistentStep.getRowMetadata();
        try {
            // the rootstep has no metadata => avoid conversion
            if (rowMetadata != null) {
                // Dirty patch to convert all histogram (2.0) to new one (2.1)
                rowMetadata = rowMetadata.replace("_class", "type").replace("org.talend.dataprep.api.dataset.statistics.number.NumberHistogram", "number").replace("org.talend.dataprep.api.dataset.statistics.date.DateHistogram", "date");
                final DataSetMetadata dataSetMetadata = objectMapper.readerFor(DataSetMetadata.class).readValue(rowMetadata);
                final StepRowMetadata stepRowMetadata = new StepRowMetadata(dataSetMetadata.getRowMetadata());
                persistentStep.setRowMetadata(stepRowMetadata.getId());
                preparationRepository.add(persistentStep);
                preparationRepository.add(stepRowMetadata);
            }
        } catch (Exception e) {
            LOGGER.info("Ignore migration of step #{} (enable debug for full log).", id);
            LOGGER.debug("Unable to migrate step", e);
        }
    });
    LOGGER.info("Migration of step metadata in preparations done.");
}
Also used : StepRowMetadata(org.talend.dataprep.api.preparation.StepRowMetadata) PersistentStep(org.talend.dataprep.preparation.store.PersistentStep) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata)

Example 95 with DataSetMetadata

use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.

the class ToPEPersistentIdentifiable method run.

@Override
public void run() {
    LOGGER.debug("starting upgrade from {} to {}.", Step.class, PersistentStep.class);
    final AtomicLong counter = new AtomicLong(0L);
    fileSystemPreparationRepository.list(Step.class).forEach(s -> {
        fileSystemPreparationRepository.remove(s);
        PersistentStep persistentStep = turnToPersistentStep(s);
        preparationRepository.add(persistentStep);
        LOGGER.debug("step {} updated to {}", s, persistentStep);
        counter.incrementAndGet();
    });
    LOGGER.info("Upgrade from {} to {} done, {} steps processed.", Step.class, PersistentStep.class, counter.get());
    LOGGER.debug("starting upgrade from {} to {}.", Preparation.class, PersistentPreparation.class);
    final Stream<Preparation> preparations = fileSystemPreparationRepository.list(Preparation.class);
    preparations.forEach(p -> {
        fileSystemPreparationRepository.remove(p);
        PersistentPreparation persistentPreparation = turnToPersistentPreparation(p);
        preparationRepository.add(persistentPreparation);
    });
    LOGGER.info("Upgrade from {} to {} done.", Preparation.class, PersistentPreparation.class);
    LOGGER.info("Migration of step ids in preparation...");
    final Stream<PersistentPreparation> persistentPreparations = preparationRepository.list(PersistentPreparation.class);
    persistentPreparations.forEach(p -> {
        LOGGER.info("Migration of preparation #{}", p.getId());
        final List<String> stepsIds = preparationUtils.listStepsIds(p.getHeadId(), preparationRepository);
        p.setSteps(stepsIds);
        final DataSetMetadata metadata = dataSetMetadataRepository.get(p.getDataSetId());
        if (metadata != null) {
            LOGGER.info("Set metadata {} in preparation {}.", p.getDataSetId(), p.getId());
            p.setRowMetadata(metadata.getRowMetadata());
        } else {
            LOGGER.info("Metadata {} not found for preparation {}.", p.getDataSetId(), p.getId());
            p.setRowMetadata(new RowMetadata());
        }
        preparationRepository.add(p);
        LOGGER.info("Migration of preparation #{} done ({} steps)", p.getId(), stepsIds.size());
    });
    LOGGER.info("Migration of step ids in preparation done.");
}
Also used : PersistentStep(org.talend.dataprep.preparation.store.PersistentStep) AtomicLong(java.util.concurrent.atomic.AtomicLong) Preparation(org.talend.dataprep.api.preparation.Preparation) PersistentPreparation(org.talend.dataprep.preparation.store.PersistentPreparation) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) PersistentStep(org.talend.dataprep.preparation.store.PersistentStep) Step(org.talend.dataprep.api.preparation.Step) PersistentPreparation(org.talend.dataprep.preparation.store.PersistentPreparation) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata)

Aggregations

DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)192 Test (org.junit.Test)126 DataSetBaseTest (org.talend.dataprep.dataset.DataSetBaseTest)63 ColumnMetadata (org.talend.dataprep.api.dataset.ColumnMetadata)48 InputStream (java.io.InputStream)45 Matchers.containsString (org.hamcrest.Matchers.containsString)28 Matchers.isEmptyString (org.hamcrest.Matchers.isEmptyString)28 TDPException (org.talend.dataprep.exception.TDPException)26 RowMetadata (org.talend.dataprep.api.dataset.RowMetadata)20 DataSetServiceTest (org.talend.dataprep.dataset.service.DataSetServiceTest)20 ApiOperation (io.swagger.annotations.ApiOperation)18 DataSet (org.talend.dataprep.api.dataset.DataSet)18 Type (org.talend.dataprep.api.type.Type)17 Timed (org.talend.dataprep.metrics.Timed)17 DistributedLock (org.talend.dataprep.lock.DistributedLock)16 Autowired (org.springframework.beans.factory.annotation.Autowired)14 DataSetRow (org.talend.dataprep.api.dataset.row.DataSetRow)14 IOException (java.io.IOException)13 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)13 ArrayList (java.util.ArrayList)12