use of org.talend.dataprep.lock.DistributedLock in project data-prep by Talend.
the class DataSetServiceTest method updateDatasetColumn_should_update_domain.
@Test
public void updateDatasetColumn_should_update_domain() throws Exception {
// given
final String dataSetId = //
given().body(//
IOUtils.toString(this.getClass().getResourceAsStream(TAGADA_CSV), UTF_8)).queryParam(CONTENT_TYPE, //
"text/csv").when().post(//
"/datasets").asString();
final ColumnMetadata column;
// update the metadata in the repository (lock mechanism is needed otherwise semantic domain will be erased by
// analysis)
final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(dataSetId);
DataSetMetadata dataSetMetadata;
RowMetadata row;
lock.lock();
try {
dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
assertNotNull(dataSetMetadata);
row = dataSetMetadata.getRowMetadata();
assertNotNull(row);
column = row.getById("0002");
final SemanticDomain jsoDomain = new SemanticDomain("JSO", "JSO label", 1.0F);
column.getSemanticDomains().add(jsoDomain);
dataSetMetadataRepository.save(dataSetMetadata);
} finally {
lock.unlock();
}
assertThat(column.getDomain(), is("FIRST_NAME"));
assertThat(column.getDomainLabel(), is("First Name"));
assertThat(column.getDomainFrequency(), is(100.0F));
// when
final Response res = //
given().body(//
"{\"domain\": \"JSO\"}").when().contentType(//
JSON).post("/datasets/{dataSetId}/column/{columnId}", dataSetId, "0002");
// then
res.then().statusCode(200);
dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
assertNotNull(dataSetMetadata);
row = dataSetMetadata.getRowMetadata();
assertNotNull(row);
final ColumnMetadata actual = row.getById("0002");
assertThat(actual.getDomain(), is("JSO"));
assertThat(actual.getDomainLabel(), is("JSO label"));
assertThat(actual.getDomainFrequency(), is(1.0F));
}
use of org.talend.dataprep.lock.DistributedLock in project data-prep by Talend.
the class SynchronousAnalysisEnd method analyze.
@Override
public void analyze(String dataSetId) {
if (StringUtils.isEmpty(dataSetId)) {
throw new IllegalArgumentException("Data set id cannot be null or empty.");
}
DistributedLock datasetLock = repository.createDatasetMetadataLock(dataSetId);
datasetLock.lock();
try {
DataSetMetadata metadata = repository.get(dataSetId);
if (metadata != null) {
metadata.getLifecycle().setImporting(false);
LOG.info("Finished content import of data set #{}.", dataSetId);
repository.save(metadata);
} else {
// $NON-NLS-1$
LOG.info("Data set #{} no longer exists.", dataSetId);
}
} finally {
datasetLock.unlock();
}
}
use of org.talend.dataprep.lock.DistributedLock in project data-prep by Talend.
the class DataSetService method updateRawDataSet.
/**
* Updates a data set content and metadata. If no data set exists for given id, data set is silently created.
*
* @param dataSetId The id of data set to be updated.
* @param name The new name for the data set. Empty name (or <code>null</code>) does not update dataset name.
* @param dataSetContent The new content for the data set. If empty, existing content will <b>not</b> be replaced.
* For delete operation, look at {@link #delete(String)}.
*/
@RequestMapping(value = "/datasets/{id}/raw", method = PUT)
@ApiOperation(value = "Update a data set by id", notes = "Update a data set content based on provided id and PUT body. Id should be a UUID returned by the list operation. Not valid or non existing data set id returns empty content. For documentation purposes, body is typed as 'text/plain' but operation accepts binary content too.")
@Timed
@VolumeMetered
public String updateRawDataSet(//
@PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the data set to update") String dataSetId, //
@RequestParam(value = "name", required = false) @ApiParam(name = "name", value = "New value for the data set name") String name, //
@RequestParam(value = "size", required = false) @ApiParam(name = "size", value = "The size of the dataSet") Long size, @ApiParam(value = "content") InputStream dataSetContent) {
LOG.debug("updating dataset content #{}", dataSetId);
if (name != null) {
checkDataSetName(name);
}
DataSetMetadata currentDataSetMetadata = dataSetMetadataRepository.get(dataSetId);
if (currentDataSetMetadata == null) {
return create(name, null, size, TEXT_PLAIN_VALUE, dataSetContent);
} else {
// just like the creation, let's make sure invalid size forbids dataset creation
if (size != null && size < 0) {
LOG.warn("invalid size provided {}", size);
throw new TDPException(UNSUPPORTED_CONTENT);
}
final UpdateDataSetCacheKey cacheKey = new UpdateDataSetCacheKey(currentDataSetMetadata.getId());
final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(currentDataSetMetadata.getId());
try {
lock.lock();
// check the size if it's available (quick win)
if (size != null && size > 0) {
quotaService.checkIfAddingSizeExceedsAvailableStorage(Math.abs(size - currentDataSetMetadata.getDataSetSize()));
}
final DataSetMetadataBuilder datasetBuilder = metadataBuilder.metadata().id(currentDataSetMetadata.getId());
datasetBuilder.copyNonContentRelated(currentDataSetMetadata);
datasetBuilder.modified(System.currentTimeMillis());
if (!StringUtils.isEmpty(name)) {
datasetBuilder.name(name);
}
final DataSetMetadata updatedDataSetMetadata = datasetBuilder.build();
// Save data set content into cache to make sure there's enough space in the content store
final long maxDataSetSizeAllowed = getMaxDataSetSizeAllowed();
final StrictlyBoundedInputStream sizeCalculator = new StrictlyBoundedInputStream(dataSetContent, maxDataSetSizeAllowed);
try (OutputStream cacheEntry = cacheManager.put(cacheKey, TimeToLive.DEFAULT)) {
IOUtils.copy(sizeCalculator, cacheEntry);
}
// once fully copied to the cache, we know for sure that the content store has enough space, so let's copy
// from the cache to the content store
PipedInputStream toContentStore = new PipedInputStream();
PipedOutputStream fromCache = new PipedOutputStream(toContentStore);
Runnable r = () -> {
try (final InputStream input = cacheManager.get(cacheKey)) {
IOUtils.copy(input, fromCache);
// it's important to close this stream, otherwise the piped stream will never close
fromCache.close();
} catch (IOException e) {
throw new TDPException(UNABLE_TO_CREATE_OR_UPDATE_DATASET, e);
}
};
executor.execute(r);
contentStore.storeAsRaw(updatedDataSetMetadata, toContentStore);
// update the dataset metadata with its new size
updatedDataSetMetadata.setDataSetSize(sizeCalculator.getTotal());
dataSetMetadataRepository.save(updatedDataSetMetadata);
// publishing update event
publisher.publishEvent(new DatasetUpdatedEvent(updatedDataSetMetadata));
} catch (StrictlyBoundedInputStream.InputStreamTooLargeException e) {
LOG.warn("Dataset update {} cannot be done, new content is too big", currentDataSetMetadata.getId());
throw new TDPException(MAX_STORAGE_MAY_BE_EXCEEDED, e, build().put("limit", e.getMaxSize()));
} catch (IOException e) {
LOG.error("Error updating the dataset", e);
throw new TDPException(UNABLE_TO_CREATE_OR_UPDATE_DATASET, e);
} finally {
dataSetContentToNull(dataSetContent);
// whatever the outcome the cache needs to be cleaned
if (cacheManager.has(cacheKey)) {
cacheManager.evict(cacheKey);
}
lock.unlock();
}
// Content was changed, so queue events (format analysis, content indexing for search...)
analyzeDataSet(currentDataSetMetadata.getId(), true, emptyList());
return currentDataSetMetadata.getId();
}
}
use of org.talend.dataprep.lock.DistributedLock in project data-prep by Talend.
the class DataSetService method copy.
/**
* Copy this dataset to a new one and returns the new data set id as text in the response.
*
* @param copyName the name of the copy
* @return The new data id.
*/
@RequestMapping(value = "/datasets/{id}/copy", method = POST, produces = TEXT_PLAIN_VALUE)
@ApiOperation(value = "Copy a data set", produces = TEXT_PLAIN_VALUE, notes = "Copy a new data set based on the given id. Returns the id of the newly created data set.")
@Timed
public String copy(@PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the data set to clone") String dataSetId, @ApiParam(value = "The name of the cloned dataset.") @RequestParam(required = false) String copyName) throws IOException {
if (copyName != null) {
checkDataSetName(copyName);
}
HttpResponseContext.contentType(TEXT_PLAIN_VALUE);
DataSetMetadata original = dataSetMetadataRepository.get(dataSetId);
if (original == null) {
return StringUtils.EMPTY;
}
// use a default name if empty (original name + " Copy" )
final String newName;
if (StringUtils.isBlank(copyName)) {
newName = message("dataset.copy.newname", original.getName());
} else {
newName = copyName;
}
final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(dataSetId);
try {
// lock to ensure any asynchronous analysis is completed.
lock.lock();
// check that the name is not already taken
checkIfNameIsAvailable(newName);
// check that there's enough space
final long maxDataSetSizeAllowed = getMaxDataSetSizeAllowed();
if (maxDataSetSizeAllowed < original.getDataSetSize()) {
throw new TDPException(MAX_STORAGE_MAY_BE_EXCEEDED, build().put("limit", maxDataSetSizeAllowed));
}
// Create copy (based on original data set metadata)
final String newId = UUID.randomUUID().toString();
final Marker marker = Markers.dataset(newId);
LOG.debug(marker, "Cloning...");
DataSetMetadata target = //
metadataBuilder.metadata().copy(//
original).id(//
newId).name(//
newName).author(//
security.getUserId()).location(//
original.getLocation()).created(//
System.currentTimeMillis()).build();
// Save data set content
LOG.debug(marker, "Storing content...");
try (InputStream content = contentStore.getAsRaw(original)) {
contentStore.storeAsRaw(target, content);
}
LOG.debug(marker, "Content stored.");
// Create the new data set
dataSetMetadataRepository.save(target);
LOG.info(marker, "Copy done --> {}", newId);
return newId;
} finally {
lock.unlock();
}
}
use of org.talend.dataprep.lock.DistributedLock in project data-prep by Talend.
the class DataSetService method delete.
/**
* Deletes a data set with provided id.
*
* @param dataSetId A data set id. If data set id is unknown, no exception nor status code to indicate this is set.
*/
@RequestMapping(value = "/datasets/{id}", method = RequestMethod.DELETE)
@ApiOperation(value = "Delete a data set by id", notes = "Delete a data set content based on provided id. Id should be a UUID returned by the list operation. Not valid or non existing data set id returns empty content.")
@Timed
public void delete(@PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the data set to delete") String dataSetId) {
DataSetMetadata metadata = dataSetMetadataRepository.get(dataSetId);
final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(dataSetId);
try {
lock.lock();
if (metadata != null) {
// first remove the metadata as there may be additional check
dataSetMetadataRepository.remove(dataSetId);
contentStore.delete(metadata);
} else {
HttpResponseContext.status(HttpStatus.NOT_FOUND);
}
} finally {
lock.unlock();
}
}
Aggregations