Search in sources :

Example 1 with FormatFamily

use of org.talend.dataprep.schema.FormatFamily in project data-prep by Talend.

the class DataSetService method updateDataSet.

/**
 * Updates a data set metadata. If no data set exists for given id, a {@link TDPException} is thrown.
 *
 * @param dataSetId The id of data set to be updated.
 * @param dataSetMetadata The new content for the data set. If empty, existing content will <b>not</b> be replaced.
 * For delete operation, look at {@link #delete(String)}.
 */
@RequestMapping(value = "/datasets/{id}", method = PUT)
@ApiOperation(value = "Update a data set metadata by id", notes = "Update a data set metadata according to the content of the PUT body. Id should be a UUID returned by the list operation. Not valid or non existing data set id return an error response.")
@Timed
public void updateDataSet(@PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the data set to update") String dataSetId, @RequestBody DataSetMetadata dataSetMetadata) {
    if (dataSetMetadata != null && dataSetMetadata.getName() != null) {
        checkDataSetName(dataSetMetadata.getName());
    }
    final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(dataSetId);
    lock.lock();
    try {
        DataSetMetadata metadataForUpdate = dataSetMetadataRepository.get(dataSetId);
        if (metadataForUpdate == null) {
            // No need to silently create the data set metadata: associated content will most likely not exist.
            throw new TDPException(DataSetErrorCodes.DATASET_DOES_NOT_EXIST, build().put("id", dataSetId));
        }
        LOG.debug("updateDataSet: {}", dataSetMetadata);
        publisher.publishEvent(new DatasetUpdatedEvent(dataSetMetadata));
        // 
        // Only part of the metadata can be updated, so the original dataset metadata is loaded and updated
        // 
        DataSetMetadata original = metadataBuilder.metadata().copy(metadataForUpdate).build();
        try {
            // update the name
            metadataForUpdate.setName(dataSetMetadata.getName());
            // update the sheet content (in case of a multi-sheet excel file)
            if (metadataForUpdate.getSchemaParserResult() != null) {
                Optional<Schema.SheetContent> sheetContentFound = metadataForUpdate.getSchemaParserResult().getSheetContents().stream().filter(sheetContent -> dataSetMetadata.getSheetName().equals(sheetContent.getName())).findFirst();
                if (sheetContentFound.isPresent()) {
                    List<ColumnMetadata> columnMetadatas = sheetContentFound.get().getColumnMetadatas();
                    if (metadataForUpdate.getRowMetadata() == null) {
                        metadataForUpdate.setRowMetadata(new RowMetadata(emptyList()));
                    }
                    metadataForUpdate.getRowMetadata().setColumns(columnMetadatas);
                }
                metadataForUpdate.setSheetName(dataSetMetadata.getSheetName());
                metadataForUpdate.setSchemaParserResult(null);
            }
            // Location updates
            metadataForUpdate.setLocation(dataSetMetadata.getLocation());
            // update parameters & encoding (so that user can change import parameters for CSV)
            metadataForUpdate.getContent().setParameters(dataSetMetadata.getContent().getParameters());
            metadataForUpdate.setEncoding(dataSetMetadata.getEncoding());
            // update limit
            final Optional<Long> newLimit = dataSetMetadata.getContent().getLimit();
            newLimit.ifPresent(limit -> metadataForUpdate.getContent().setLimit(limit));
            // Validate that the new data set metadata and removes the draft status
            final String formatFamilyId = dataSetMetadata.getContent().getFormatFamilyId();
            if (formatFamilyFactory.hasFormatFamily(formatFamilyId)) {
                FormatFamily format = formatFamilyFactory.getFormatFamily(formatFamilyId);
                try {
                    DraftValidator draftValidator = format.getDraftValidator();
                    DraftValidator.Result result = draftValidator.validate(dataSetMetadata);
                    if (result.isDraft()) {
                        // This is not an exception case: data set may remain a draft after update (although rather
                        // unusual)
                        LOG.warn("Data set #{} is still a draft after update.", dataSetId);
                        return;
                    }
                    // Data set metadata to update is no longer a draft
                    metadataForUpdate.setDraft(false);
                } catch (UnsupportedOperationException e) {
                // no need to validate draft here
                }
            }
            // update schema
            formatAnalyzer.update(original, metadataForUpdate);
            // save the result
            dataSetMetadataRepository.save(metadataForUpdate);
            // all good mate!! so send that to jms
            // Asks for a in depth schema analysis (for column type information).
            analyzeDataSet(dataSetId, true, singletonList(FormatAnalysis.class));
        } catch (TDPException e) {
            throw e;
        } catch (Exception e) {
            throw new TDPException(UNABLE_TO_CREATE_OR_UPDATE_DATASET, e);
        }
    } finally {
        lock.unlock();
    }
}
Also used : VolumeMetered(org.talend.dataprep.metrics.VolumeMetered) RequestParam(org.springframework.web.bind.annotation.RequestParam) ImportBuilder(org.talend.dataprep.api.dataset.Import.ImportBuilder) FormatFamilyFactory(org.talend.dataprep.schema.FormatFamilyFactory) Autowired(org.springframework.beans.factory.annotation.Autowired) ApiParam(io.swagger.annotations.ApiParam) StringUtils(org.apache.commons.lang3.StringUtils) TEXT_PLAIN_VALUE(org.springframework.http.MediaType.TEXT_PLAIN_VALUE) SortAndOrderHelper.getDataSetMetadataComparator(org.talend.dataprep.util.SortAndOrderHelper.getDataSetMetadataComparator) Collections.singletonList(java.util.Collections.singletonList) SemanticDomain(org.talend.dataprep.api.dataset.statistics.SemanticDomain) BeanConversionService(org.talend.dataprep.conversions.BeanConversionService) PipedInputStream(java.io.PipedInputStream) DistributedLock(org.talend.dataprep.lock.DistributedLock) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) DataprepBundle.message(org.talend.dataprep.i18n.DataprepBundle.message) UserData(org.talend.dataprep.api.user.UserData) TaskExecutor(org.springframework.core.task.TaskExecutor) MAX_STORAGE_MAY_BE_EXCEEDED(org.talend.dataprep.exception.error.DataSetErrorCodes.MAX_STORAGE_MAY_BE_EXCEEDED) DataSet(org.talend.dataprep.api.dataset.DataSet) LocalStoreLocation(org.talend.dataprep.api.dataset.location.LocalStoreLocation) FormatFamily(org.talend.dataprep.schema.FormatFamily) Resource(javax.annotation.Resource) Set(java.util.Set) DatasetUpdatedEvent(org.talend.dataprep.dataset.event.DatasetUpdatedEvent) RestController(org.springframework.web.bind.annotation.RestController) QuotaService(org.talend.dataprep.dataset.store.QuotaService) Stream(java.util.stream.Stream) StreamSupport.stream(java.util.stream.StreamSupport.stream) FlagNames(org.talend.dataprep.api.dataset.row.FlagNames) UNEXPECTED_CONTENT(org.talend.dataprep.exception.error.CommonErrorCodes.UNEXPECTED_CONTENT) Analyzers(org.talend.dataquality.common.inference.Analyzers) DataSetLocatorService(org.talend.dataprep.api.dataset.location.locator.DataSetLocatorService) Callable(java.util.concurrent.Callable) Schema(org.talend.dataprep.schema.Schema) ArrayList(java.util.ArrayList) Value(org.springframework.beans.factory.annotation.Value) RequestBody(org.springframework.web.bind.annotation.RequestBody) DataSetLocationService(org.talend.dataprep.api.dataset.location.DataSetLocationService) AnalyzerService(org.talend.dataprep.quality.AnalyzerService) UserDataRepository(org.talend.dataprep.user.store.UserDataRepository) Markers(org.talend.dataprep.log.Markers) Api(io.swagger.annotations.Api) DraftValidator(org.talend.dataprep.schema.DraftValidator) HttpResponseContext(org.talend.dataprep.http.HttpResponseContext) Sort(org.talend.dataprep.util.SortAndOrderHelper.Sort) IOException(java.io.IOException) PipedOutputStream(java.io.PipedOutputStream) FormatAnalysis(org.talend.dataprep.dataset.service.analysis.synchronous.FormatAnalysis) ContentAnalysis(org.talend.dataprep.dataset.service.analysis.synchronous.ContentAnalysis) SchemaAnalysis(org.talend.dataprep.dataset.service.analysis.synchronous.SchemaAnalysis) HttpStatus(org.springframework.http.HttpStatus) FilterService(org.talend.dataprep.api.filter.FilterService) Marker(org.slf4j.Marker) NullOutputStream(org.apache.commons.io.output.NullOutputStream) StatisticsAdapter(org.talend.dataprep.dataset.StatisticsAdapter) Timed(org.talend.dataprep.metrics.Timed) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) PathVariable(org.springframework.web.bind.annotation.PathVariable) DataSetMetadataBuilder(org.talend.dataprep.dataset.DataSetMetadataBuilder) URLDecoder(java.net.URLDecoder) DataSetErrorCodes(org.talend.dataprep.exception.error.DataSetErrorCodes) PUT(org.springframework.web.bind.annotation.RequestMethod.PUT) LoggerFactory(org.slf4j.LoggerFactory) SEMANTIC(org.talend.dataprep.quality.AnalyzerService.Analysis.SEMANTIC) ApiOperation(io.swagger.annotations.ApiOperation) UNABLE_TO_CREATE_OR_UPDATE_DATASET(org.talend.dataprep.exception.error.DataSetErrorCodes.UNABLE_TO_CREATE_OR_UPDATE_DATASET) DataSetRow(org.talend.dataprep.api.dataset.row.DataSetRow) StrictlyBoundedInputStream(org.talend.dataprep.dataset.store.content.StrictlyBoundedInputStream) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) UNSUPPORTED_CONTENT(org.talend.dataprep.exception.error.DataSetErrorCodes.UNSUPPORTED_CONTENT) TimeToLive(org.talend.dataprep.cache.ContentCache.TimeToLive) Order(org.talend.dataprep.util.SortAndOrderHelper.Order) Collections.emptyList(java.util.Collections.emptyList) PublicAPI(org.talend.dataprep.security.PublicAPI) RequestMethod(org.springframework.web.bind.annotation.RequestMethod) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) ContentCache(org.talend.dataprep.cache.ContentCache) INVALID_DATASET_NAME(org.talend.dataprep.exception.error.DataSetErrorCodes.INVALID_DATASET_NAME) List(java.util.List) Optional(java.util.Optional) Analyzer(org.talend.dataquality.common.inference.Analyzer) RequestHeader(org.springframework.web.bind.annotation.RequestHeader) Pattern(java.util.regex.Pattern) Security(org.talend.dataprep.security.Security) Spliterator(java.util.Spliterator) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) ComponentProperties(org.talend.dataprep.parameters.jsonschema.ComponentProperties) TDPException(org.talend.dataprep.exception.TDPException) JsonErrorCodeDescription(org.talend.dataprep.exception.json.JsonErrorCodeDescription) RequestMapping(org.springframework.web.bind.annotation.RequestMapping) UNABLE_CREATE_DATASET(org.talend.dataprep.exception.error.DataSetErrorCodes.UNABLE_CREATE_DATASET) HashMap(java.util.HashMap) GET(org.springframework.web.bind.annotation.RequestMethod.GET) Import(org.talend.dataprep.api.dataset.Import) ExceptionContext.build(org.talend.daikon.exception.ExceptionContext.build) ExceptionContext(org.talend.daikon.exception.ExceptionContext) Charset(java.nio.charset.Charset) UpdateColumnParameters(org.talend.dataprep.dataset.service.api.UpdateColumnParameters) VersionService(org.talend.dataprep.api.service.info.VersionService) POST(org.springframework.web.bind.annotation.RequestMethod.POST) OutputStream(java.io.OutputStream) DataSetLocation(org.talend.dataprep.api.dataset.DataSetLocation) Logger(org.slf4j.Logger) LocaleContextHolder.getLocale(org.springframework.context.i18n.LocaleContextHolder.getLocale) UpdateDataSetCacheKey(org.talend.dataprep.dataset.service.cache.UpdateDataSetCacheKey) IOUtils(org.apache.commons.compress.utils.IOUtils) APPLICATION_JSON_VALUE(org.springframework.http.MediaType.APPLICATION_JSON_VALUE) ResponseBody(org.springframework.web.bind.annotation.ResponseBody) Certification(org.talend.dataprep.api.dataset.DataSetGovernance.Certification) EncodingSupport(org.talend.dataprep.configuration.EncodingSupport) Comparator(java.util.Comparator) InputStream(java.io.InputStream) ColumnMetadata(org.talend.dataprep.api.dataset.ColumnMetadata) FormatAnalysis(org.talend.dataprep.dataset.service.analysis.synchronous.FormatAnalysis) FormatFamily(org.talend.dataprep.schema.FormatFamily) DataSetMetadata(org.talend.dataprep.api.dataset.DataSetMetadata) IOException(java.io.IOException) TDPException(org.talend.dataprep.exception.TDPException) TDPException(org.talend.dataprep.exception.TDPException) DistributedLock(org.talend.dataprep.lock.DistributedLock) DatasetUpdatedEvent(org.talend.dataprep.dataset.event.DatasetUpdatedEvent) DraftValidator(org.talend.dataprep.schema.DraftValidator) RowMetadata(org.talend.dataprep.api.dataset.RowMetadata) Timed(org.talend.dataprep.metrics.Timed) ApiOperation(io.swagger.annotations.ApiOperation) RequestMapping(org.springframework.web.bind.annotation.RequestMapping)

Aggregations

Api (io.swagger.annotations.Api)1 ApiOperation (io.swagger.annotations.ApiOperation)1 ApiParam (io.swagger.annotations.ApiParam)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 PipedInputStream (java.io.PipedInputStream)1 PipedOutputStream (java.io.PipedOutputStream)1 URLDecoder (java.net.URLDecoder)1 Charset (java.nio.charset.Charset)1 ArrayList (java.util.ArrayList)1 Arrays.asList (java.util.Arrays.asList)1 Collections.emptyList (java.util.Collections.emptyList)1 Collections.singletonList (java.util.Collections.singletonList)1 Comparator (java.util.Comparator)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1 Set (java.util.Set)1