use of org.talend.dataprep.dataset.DataSetMetadataBuilder in project data-prep by Talend.
the class DataSetService method updateRawDataSet.
/**
* Updates a data set content and metadata. If no data set exists for given id, data set is silently created.
*
* @param dataSetId The id of data set to be updated.
* @param name The new name for the data set. Empty name (or <code>null</code>) does not update dataset name.
* @param dataSetContent The new content for the data set. If empty, existing content will <b>not</b> be replaced.
* For delete operation, look at {@link #delete(String)}.
*/
@RequestMapping(value = "/datasets/{id}/raw", method = PUT)
@ApiOperation(value = "Update a data set by id", notes = "Update a data set content based on provided id and PUT body. Id should be a UUID returned by the list operation. Not valid or non existing data set id returns empty content. For documentation purposes, body is typed as 'text/plain' but operation accepts binary content too.")
@Timed
@VolumeMetered
public String updateRawDataSet(//
@PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the data set to update") String dataSetId, //
@RequestParam(value = "name", required = false) @ApiParam(name = "name", value = "New value for the data set name") String name, //
@RequestParam(value = "size", required = false) @ApiParam(name = "size", value = "The size of the dataSet") Long size, @ApiParam(value = "content") InputStream dataSetContent) {
LOG.debug("updating dataset content #{}", dataSetId);
if (name != null) {
checkDataSetName(name);
}
DataSetMetadata currentDataSetMetadata = dataSetMetadataRepository.get(dataSetId);
if (currentDataSetMetadata == null) {
return create(name, null, size, TEXT_PLAIN_VALUE, dataSetContent);
} else {
// just like the creation, let's make sure invalid size forbids dataset creation
if (size != null && size < 0) {
LOG.warn("invalid size provided {}", size);
throw new TDPException(UNSUPPORTED_CONTENT);
}
final UpdateDataSetCacheKey cacheKey = new UpdateDataSetCacheKey(currentDataSetMetadata.getId());
final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(currentDataSetMetadata.getId());
try {
lock.lock();
// check the size if it's available (quick win)
if (size != null && size > 0) {
quotaService.checkIfAddingSizeExceedsAvailableStorage(Math.abs(size - currentDataSetMetadata.getDataSetSize()));
}
final DataSetMetadataBuilder datasetBuilder = metadataBuilder.metadata().id(currentDataSetMetadata.getId());
datasetBuilder.copyNonContentRelated(currentDataSetMetadata);
datasetBuilder.modified(System.currentTimeMillis());
if (!StringUtils.isEmpty(name)) {
datasetBuilder.name(name);
}
final DataSetMetadata updatedDataSetMetadata = datasetBuilder.build();
// Save data set content into cache to make sure there's enough space in the content store
final long maxDataSetSizeAllowed = getMaxDataSetSizeAllowed();
final StrictlyBoundedInputStream sizeCalculator = new StrictlyBoundedInputStream(dataSetContent, maxDataSetSizeAllowed);
try (OutputStream cacheEntry = cacheManager.put(cacheKey, TimeToLive.DEFAULT)) {
IOUtils.copy(sizeCalculator, cacheEntry);
}
// once fully copied to the cache, we know for sure that the content store has enough space, so let's copy
// from the cache to the content store
PipedInputStream toContentStore = new PipedInputStream();
PipedOutputStream fromCache = new PipedOutputStream(toContentStore);
Runnable r = () -> {
try (final InputStream input = cacheManager.get(cacheKey)) {
IOUtils.copy(input, fromCache);
// it's important to close this stream, otherwise the piped stream will never close
fromCache.close();
} catch (IOException e) {
throw new TDPException(UNABLE_TO_CREATE_OR_UPDATE_DATASET, e);
}
};
executor.execute(r);
contentStore.storeAsRaw(updatedDataSetMetadata, toContentStore);
// update the dataset metadata with its new size
updatedDataSetMetadata.setDataSetSize(sizeCalculator.getTotal());
dataSetMetadataRepository.save(updatedDataSetMetadata);
// publishing update event
publisher.publishEvent(new DatasetUpdatedEvent(updatedDataSetMetadata));
} catch (StrictlyBoundedInputStream.InputStreamTooLargeException e) {
LOG.warn("Dataset update {} cannot be done, new content is too big", currentDataSetMetadata.getId());
throw new TDPException(MAX_STORAGE_MAY_BE_EXCEEDED, e, build().put("limit", e.getMaxSize()));
} catch (IOException e) {
LOG.error("Error updating the dataset", e);
throw new TDPException(UNABLE_TO_CREATE_OR_UPDATE_DATASET, e);
} finally {
dataSetContentToNull(dataSetContent);
// whatever the outcome the cache needs to be cleaned
if (cacheManager.has(cacheKey)) {
cacheManager.evict(cacheKey);
}
lock.unlock();
}
// Content was changed, so queue events (format analysis, content indexing for search...)
analyzeDataSet(currentDataSetMetadata.getId(), true, emptyList());
return currentDataSetMetadata.getId();
}
}
use of org.talend.dataprep.dataset.DataSetMetadataBuilder in project data-prep by Talend.
the class DataSetServiceTest method getMetadata.
@Test
public void getMetadata() throws Exception {
DataSetMetadataBuilder builder = metadataBuilder.metadata().id("1234");
builder.row(//
ColumnMetadata.Builder.column().id(//
1234).name(//
"id").empty(//
0).invalid(//
0).valid(//
0).type(//
Type.STRING)).created(//
0).name(//
"name").author(//
"author").footerSize(//
0).headerSize(//
1).qualityAnalyzed(//
true).schemaAnalyzed(//
true).formatFamilyId(//
new CSVFormatFamily().getBeanId()).mediaType("text/csv");
DataSetMetadata metadata = builder.build();
metadata.getContent().addParameter(CSVFormatFamily.SEPARATOR_PARAMETER, ";");
dataSetMetadataRepository.save(metadata);
String contentAsString = when().get("/datasets/{id}/metadata", "1234").asString();
InputStream expected = this.getClass().getResourceAsStream("../metadata1.json");
assertThat(contentAsString, sameJSONAsFile(expected));
Boolean isFavorites = from(contentAsString).get("metadata.favorite");
assertFalse(isFavorites);
// add favorite
UserData userData = new UserData(security.getUserId(), versionService.version().getVersionId());
HashSet<String> favorites = new HashSet<>();
favorites.add("1234");
userData.setFavoritesDatasets(favorites);
userDataRepository.save(userData);
contentAsString = when().get("/datasets/{id}/metadata", "1234").asString();
isFavorites = from(contentAsString).get("metadata.favorite");
assertTrue(isFavorites);
}
use of org.talend.dataprep.dataset.DataSetMetadataBuilder in project data-prep by Talend.
the class DataSetMetadataRepositoryTestUtils method ensureThatOnlyCompatibleDataSetsAreReturned.
public static void ensureThatOnlyCompatibleDataSetsAreReturned(DataSetMetadataRepository repository, DataSetMetadataBuilder builder) {
// given
final DataSetMetadata metadata1 = //
builder.metadata().id("0001").row(column().type(Type.STRING).name("first"), //
column().type(Type.STRING).name("last")).build();
final DataSetMetadata metadata2 = //
builder.metadata().id("0002").row(column().type(Type.STRING).name("last"), //
column().type(Type.STRING).name("first")).build();
final DataSetMetadata metadata3 = //
builder.metadata().id("0003").row(column().type(Type.STRING).name("first"), //
column().type(Type.INTEGER).name("last")).build();
List<DataSetMetadata> metadatas = Arrays.asList(metadata1, metadata2, metadata3);
// retrieve set of data sets which are different from metadata1 but with similar schema
List<DataSetMetadata> expected = metadatas.stream().filter(m -> (!metadata1.equals(m) && metadata1.compatible(m))).sorted((m1, m2) -> m1.getId().compareTo(m2.getId())).collect(Collectors.toList());
// when
metadatas.stream().forEach(m -> repository.save(m));
Iterable<DataSetMetadata> iterable = repository.listCompatible(metadata1.getId());
List<DataSetMetadata> actual = StreamSupport.stream(iterable.spliterator(), false).sorted((m1, m2) -> m1.getId().compareTo(m2.getId())).collect(Collectors.toList());
// then
assertEquals(expected, actual);
}
Aggregations