Search in sources :

Example 1 with DataSetMetadataBuilder

use of org.talend.dataprep.dataset.DataSetMetadataBuilder in project data-prep by Talend.

the class DataSetService method updateRawDataSet.

/**
 * Updates a data set content and metadata. If no data set exists for given id, data set is silently created.
 *
 * @param dataSetId The id of data set to be updated.
 * @param name The new name for the data set. Empty name (or <code>null</code>) does not update dataset name.
 * @param dataSetContent The new content for the data set. If empty, existing content will <b>not</b> be replaced.
 * For delete operation, look at {@link #delete(String)}.
 */
@RequestMapping(value = "/datasets/{id}/raw", method = PUT)
@ApiOperation(value = "Update a data set by id", notes = "Update a data set content based on provided id and PUT body. Id should be a UUID returned by the list operation. Not valid or non existing data set id returns empty content. For documentation purposes, body is typed as 'text/plain' but operation accepts binary content too.")
@Timed
@VolumeMetered
public String updateRawDataSet(// 
@PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the data set to update") String dataSetId, // 
@RequestParam(value = "name", required = false) @ApiParam(name = "name", value = "New value for the data set name") String name, // 
@RequestParam(value = "size", required = false) @ApiParam(name = "size", value = "The size of the dataSet") Long size, @ApiParam(value = "content") InputStream dataSetContent) {
    LOG.debug("updating dataset content #{}", dataSetId);
    if (name != null) {
        checkDataSetName(name);
    }
    DataSetMetadata currentDataSetMetadata = dataSetMetadataRepository.get(dataSetId);
    if (currentDataSetMetadata == null) {
        return create(name, null, size, TEXT_PLAIN_VALUE, dataSetContent);
    } else {
        // just like the creation, let's make sure invalid size forbids dataset creation
        if (size != null && size < 0) {
            LOG.warn("invalid size provided {}", size);
            throw new TDPException(UNSUPPORTED_CONTENT);
        }
        final UpdateDataSetCacheKey cacheKey = new UpdateDataSetCacheKey(currentDataSetMetadata.getId());
        final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(currentDataSetMetadata.getId());
        try {
            lock.lock();
            // check the size if it's available (quick win)
            if (size != null && size > 0) {
                quotaService.checkIfAddingSizeExceedsAvailableStorage(Math.abs(size - currentDataSetMetadata.getDataSetSize()));
            }
            final DataSetMetadataBuilder datasetBuilder = metadataBuilder.metadata().id(currentDataSetMetadata.getId());
            datasetBuilder.copyNonContentRelated(currentDataSetMetadata);
            datasetBuilder.modified(System.currentTimeMillis());
            if (!StringUtils.isEmpty(name)) {
                datasetBuilder.name(name);
            }
            final DataSetMetadata updatedDataSetMetadata = datasetBuilder.build();
            // Save data set content into cache to make sure there's enough space in the content store
            final long maxDataSetSizeAllowed = getMaxDataSetSizeAllowed();
            final StrictlyBoundedInputStream sizeCalculator = new StrictlyBoundedInputStream(dataSetContent, maxDataSetSizeAllowed);
            try (OutputStream cacheEntry = cacheManager.put(cacheKey, TimeToLive.DEFAULT)) {
                IOUtils.copy(sizeCalculator, cacheEntry);
            }
            // once fully copied to the cache, we know for sure that the content store has enough space, so let's copy
            // from the cache to the content store
            PipedInputStream toContentStore = new PipedInputStream();
            PipedOutputStream fromCache = new PipedOutputStream(toContentStore);
            Runnable r = () -> {
                try (final InputStream input = cacheManager.get(cacheKey)) {
                    IOUtils.copy(input, fromCache);
                    // it's important to close this stream, otherwise the piped stream will never close
                    fromCache.close();
                } catch (IOException e) {
                    throw new TDPException(UNABLE_TO_CREATE_OR_UPDATE_DATASET, e);
                }
            };
            executor.execute(r);
            contentStore.storeAsRaw(updatedDataSetMetadata, toContentStore);
            // update the dataset metadata with its new size
            updatedDataSetMetadata.setDataSetSize(sizeCalculator.getTotal());
            dataSetMetadataRepository.save(updatedDataSetMetadata);
            // publishing update event
            publisher.publishEvent(new DatasetUpdatedEvent(updatedDataSetMetadata));
        } catch (StrictlyBoundedInputStream.InputStreamTooLargeException e) {
            LOG.warn("Dataset update {} cannot be done, new content is too big", currentDataSetMetadata.getId());
            throw new TDPException(MAX_STORAGE_MAY_BE_EXCEEDED, e, build().put("limit", e.getMaxSize()));
        } catch (IOException e) {
            LOG.error("Error updating the dataset", e);
            throw new TDPException(UNABLE_TO_CREATE_OR_UPDATE_DATASET, e);
        } finally {
            dataSetContentToNull(dataSetContent);
            // whatever the outcome the cache needs to be cleaned
            if (cacheManager.has(cacheKey)) {
                cacheManager.evict(cacheKey);
            }
            lock.unlock();
        }
        // Content was changed, so queue events (format analysis, content indexing for search...)
        analyzeDataSet(currentDataSetMetadata.getId(), true, emptyList());
        return currentDataSetMetadata.getId();
    }
}
Also used : DataSetMetadataBuilder(org.talend.dataprep.dataset.DataSetMetadataBuilder) PipedInputStream(java.io.PipedInputStream) StrictlyBoundedInputStream(org.talend.dataprep.dataset.store.content.StrictlyBoundedInputStream) InputStream(java.io.InputStream) PipedOutputStream(java.io.PipedOutputStream) NullOutputStream(org.apache.commons.io.output.NullOutputStream) OutputStream(java.io.OutputStream) PipedOutputStream(java.io.PipedOutputStream) PipedInputStream(java.io.PipedInputStream) IOException(java.io.IOException) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) TDPException(org.talend.dataprep.exception.TDPException) DistributedLock(org.talend.dataprep.lock.DistributedLock) StrictlyBoundedInputStream(org.talend.dataprep.dataset.store.content.StrictlyBoundedInputStream) DatasetUpdatedEvent(org.talend.dataprep.dataset.event.DatasetUpdatedEvent) UpdateDataSetCacheKey(org.talend.dataprep.dataset.service.cache.UpdateDataSetCacheKey) VolumeMetered(org.talend.dataprep.metrics.VolumeMetered) Timed(org.talend.dataprep.metrics.Timed) ApiOperation(io.swagger.annotations.ApiOperation) RequestMapping(org.springframework.web.bind.annotation.RequestMapping)

Example 2 with DataSetMetadataBuilder

use of org.talend.dataprep.dataset.DataSetMetadataBuilder in project data-prep by Talend.

the class DataSetServiceTest method getMetadata.

@Test
public void getMetadata() throws Exception {
    DataSetMetadataBuilder builder = metadataBuilder.metadata().id("1234");
    builder.row(// 
    ColumnMetadata.Builder.column().id(// 
    1234).name(// 
    "id").empty(// 
    0).invalid(// 
    0).valid(// 
    0).type(// 
    Type.STRING)).created(// 
    0).name(// 
    "name").author(// 
    "author").footerSize(// 
    0).headerSize(// 
    1).qualityAnalyzed(// 
    true).schemaAnalyzed(// 
    true).formatFamilyId(// 
    new CSVFormatFamily().getBeanId()).mediaType("text/csv");
    DataSetMetadata metadata = builder.build();
    metadata.getContent().addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ";");
    dataSetMetadataRepository.save(metadata);
    String contentAsString = when().get("/datasets/{id}/metadata", "1234").asString();
    InputStream expected = this.getClass().getResourceAsStream("../metadata1.json");
    assertThat(contentAsString, sameJSONAsFile(expected));
    Boolean isFavorites = from(contentAsString).get("metadata.favorite");
    assertFalse(isFavorites);
    // add favorite
    UserData userData = new UserData(security.getUserId(), versionService.version().getVersionId());
    HashSet<String> favorites = new HashSet<>();
    favorites.add("1234");
    userData.setFavoritesDatasets(favorites);
    userDataRepository.save(userData);
    contentAsString = when().get("/datasets/{id}/metadata", "1234").asString();
    isFavorites = from(contentAsString).get("metadata.favorite");
    assertTrue(isFavorites);
}
Also used : DataSetMetadataBuilder(org.talend.dataprep.dataset.DataSetMetadataBuilder) UserData(org.talend.dataprep.api.user.UserData) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) Matchers.containsString(org.hamcrest.Matchers.containsString) Matchers.isEmptyString(org.hamcrest.Matchers.isEmptyString) CSVFormatFamily(org.talend.dataprep.schema.csv.CSVFormatFamily) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) HashSet(java.util.HashSet) DataSetBaseTest(org.talend.dataprep.dataset.DataSetBaseTest) Test(org.junit.Test)

Example 3 with DataSetMetadataBuilder

use of org.talend.dataprep.dataset.DataSetMetadataBuilder in project data-prep by Talend.

the class DataSetMetadataRepositoryTestUtils method ensureThatOnlyCompatibleDataSetsAreReturned.

public static void ensureThatOnlyCompatibleDataSetsAreReturned(DataSetMetadataRepository repository, DataSetMetadataBuilder builder) {
    // given
    final DataSetMetadata metadata1 = // 
    builder.metadata().id("0001").row(column().type(Type.STRING).name("first"), // 
    column().type(Type.STRING).name("last")).build();
    final DataSetMetadata metadata2 = // 
    builder.metadata().id("0002").row(column().type(Type.STRING).name("last"), // 
    column().type(Type.STRING).name("first")).build();
    final DataSetMetadata metadata3 = // 
    builder.metadata().id("0003").row(column().type(Type.STRING).name("first"), // 
    column().type(Type.INTEGER).name("last")).build();
    List<DataSetMetadata> metadatas = Arrays.asList(metadata1, metadata2, metadata3);
    // retrieve set of data sets which are different from metadata1 but with similar schema
    List<DataSetMetadata> expected = metadatas.stream().filter(m -> (!metadata1.equals(m) && metadata1.compatible(m))).sorted((m1, m2) -> m1.getId().compareTo(m2.getId())).collect(Collectors.toList());
    // when
    metadatas.stream().forEach(m -> repository.save(m));
    Iterable<DataSetMetadata> iterable = repository.listCompatible(metadata1.getId());
    List<DataSetMetadata> actual = StreamSupport.stream(iterable.spliterator(), false).sorted((m1, m2) -> m1.getId().compareTo(m2.getId())).collect(Collectors.toList());
    // then
    assertEquals(expected, actual);
}
Also used : Arrays(java.util.Arrays) List(java.util.List) Type(org.talend.dataprep.api.type.Type) DataSetMetadataBuilder(org.talend.dataprep.dataset.DataSetMetadataBuilder) Builder.column(org.talend.dataprep.api.dataset.ColumnMetadata.Builder.column) StreamSupport(java.util.stream.StreamSupport) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) Collectors(java.util.stream.Collectors) Assert.assertEquals(org.junit.Assert.assertEquals) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata)

Aggregations

DataSetMetadata (org.talend.dataprep.api.dataset.DataSetMetadata)3 DataSetMetadataBuilder (org.talend.dataprep.dataset.DataSetMetadataBuilder)3 InputStream (java.io.InputStream)2 ApiOperation (io.swagger.annotations.ApiOperation)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 IOException (java.io.IOException)1 OutputStream (java.io.OutputStream)1 PipedInputStream (java.io.PipedInputStream)1 PipedOutputStream (java.io.PipedOutputStream)1 Arrays (java.util.Arrays)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Collectors (java.util.stream.Collectors)1 StreamSupport (java.util.stream.StreamSupport)1 NullOutputStream (org.apache.commons.io.output.NullOutputStream)1 Matchers.containsString (org.hamcrest.Matchers.containsString)1 Matchers.isEmptyString (org.hamcrest.Matchers.isEmptyString)1 Assert.assertEquals (org.junit.Assert.assertEquals)1 Test (org.junit.Test)1 RequestMapping (org.springframework.web.bind.annotation.RequestMapping)1