use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class SynchronousAnalysisEnd method analyze.
@Override
public void analyze(String dataSetId) {
if (StringUtils.isEmpty(dataSetId)) {
throw new IllegalArgumentException("Data set id cannot be null or empty.");
}
DistributedLock datasetLock = repository.createDatasetMetadataLock(dataSetId);
datasetLock.lock();
try {
DataSetMetadata metadata = repository.get(dataSetId);
if (metadata != null) {
metadata.getLifecycle().setImporting(false);
LOG.info("Finished content import of data set #{}.", dataSetId);
repository.save(metadata);
} else {
// $NON-NLS-1$
LOG.info("Data set #{} no longer exists.", dataSetId);
}
} finally {
datasetLock.unlock();
}
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetService method updateRawDataSet.
/**
* Updates a data set content and metadata. If no data set exists for given id, data set is silently created.
*
* @param dataSetId The id of data set to be updated.
* @param name The new name for the data set. Empty name (or <code>null</code>) does not update dataset name.
* @param dataSetContent The new content for the data set. If empty, existing content will <b>not</b> be replaced.
* For delete operation, look at {@link #delete(String)}.
*/
@RequestMapping(value = "/datasets/{id}/raw", method = PUT)
@ApiOperation(value = "Update a data set by id", notes = "Update a data set content based on provided id and PUT body. Id should be a UUID returned by the list operation. Not valid or non existing data set id returns empty content. For documentation purposes, body is typed as 'text/plain' but operation accepts binary content too.")
@Timed
@VolumeMetered
public String updateRawDataSet(//
@PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the data set to update") String dataSetId, //
@RequestParam(value = "name", required = false) @ApiParam(name = "name", value = "New value for the data set name") String name, //
@RequestParam(value = "size", required = false) @ApiParam(name = "size", value = "The size of the dataSet") Long size, @ApiParam(value = "content") InputStream dataSetContent) {
LOG.debug("updating dataset content #{}", dataSetId);
if (name != null) {
checkDataSetName(name);
}
DataSetMetadata currentDataSetMetadata = dataSetMetadataRepository.get(dataSetId);
if (currentDataSetMetadata == null) {
return create(name, null, size, TEXT_PLAIN_VALUE, dataSetContent);
} else {
// just like the creation, let's make sure invalid size forbids dataset creation
if (size != null && size < 0) {
LOG.warn("invalid size provided {}", size);
throw new TDPException(UNSUPPORTED_CONTENT);
}
final UpdateDataSetCacheKey cacheKey = new UpdateDataSetCacheKey(currentDataSetMetadata.getId());
final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(currentDataSetMetadata.getId());
try {
lock.lock();
// check the size if it's available (quick win)
if (size != null && size > 0) {
quotaService.checkIfAddingSizeExceedsAvailableStorage(Math.abs(size - currentDataSetMetadata.getDataSetSize()));
}
final DataSetMetadataBuilder datasetBuilder = metadataBuilder.metadata().id(currentDataSetMetadata.getId());
datasetBuilder.copyNonContentRelated(currentDataSetMetadata);
datasetBuilder.modified(System.currentTimeMillis());
if (!StringUtils.isEmpty(name)) {
datasetBuilder.name(name);
}
final DataSetMetadata updatedDataSetMetadata = datasetBuilder.build();
// Save data set content into cache to make sure there's enough space in the content store
final long maxDataSetSizeAllowed = getMaxDataSetSizeAllowed();
final StrictlyBoundedInputStream sizeCalculator = new StrictlyBoundedInputStream(dataSetContent, maxDataSetSizeAllowed);
try (OutputStream cacheEntry = cacheManager.put(cacheKey, TimeToLive.DEFAULT)) {
IOUtils.copy(sizeCalculator, cacheEntry);
}
// once fully copied to the cache, we know for sure that the content store has enough space, so let's copy
// from the cache to the content store
PipedInputStream toContentStore = new PipedInputStream();
PipedOutputStream fromCache = new PipedOutputStream(toContentStore);
Runnable r = () -> {
try (final InputStream input = cacheManager.get(cacheKey)) {
IOUtils.copy(input, fromCache);
// it's important to close this stream, otherwise the piped stream will never close
fromCache.close();
} catch (IOException e) {
throw new TDPException(UNABLE_TO_CREATE_OR_UPDATE_DATASET, e);
}
};
executor.execute(r);
contentStore.storeAsRaw(updatedDataSetMetadata, toContentStore);
// update the dataset metadata with its new size
updatedDataSetMetadata.setDataSetSize(sizeCalculator.getTotal());
dataSetMetadataRepository.save(updatedDataSetMetadata);
// publishing update event
publisher.publishEvent(new DatasetUpdatedEvent(updatedDataSetMetadata));
} catch (StrictlyBoundedInputStream.InputStreamTooLargeException e) {
LOG.warn("Dataset update {} cannot be done, new content is too big", currentDataSetMetadata.getId());
throw new TDPException(MAX_STORAGE_MAY_BE_EXCEEDED, e, build().put("limit", e.getMaxSize()));
} catch (IOException e) {
LOG.error("Error updating the dataset", e);
throw new TDPException(UNABLE_TO_CREATE_OR_UPDATE_DATASET, e);
} finally {
dataSetContentToNull(dataSetContent);
// whatever the outcome the cache needs to be cleaned
if (cacheManager.has(cacheKey)) {
cacheManager.evict(cacheKey);
}
lock.unlock();
}
// Content was changed, so queue events (format analysis, content indexing for search...)
analyzeDataSet(currentDataSetMetadata.getId(), true, emptyList());
return currentDataSetMetadata.getId();
}
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetService method getDataStoreParameters.
@RequestMapping(value = "/datasets/{id}/datastore/properties", method = GET)
@ApiOperation(value = "Get the dataset import parameters", notes = "This list can be used by user to change dataset encoding.")
@Timed
public // ComponentProperties
Object getDataStoreParameters(@PathVariable("id") final String dataSetId) {
DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
Object parametersToReturn = null;
if (dataSetMetadata != null) {
DataSetLocation matchingDatasetLocation = locationsService.findLocation(dataSetMetadata.getLocation().getLocationType());
if (matchingDatasetLocation == null) {
parametersToReturn = emptyList();
} else {
if (matchingDatasetLocation.isSchemaOriented()) {
ComponentProperties parametersAsSchema = matchingDatasetLocation.getParametersAsSchema(getLocale());
parametersAsSchema.setProperties(dataSetMetadata.getLocation().getParametersAsSchema(getLocale()).getProperties());
parametersToReturn = parametersAsSchema;
} else {
parametersToReturn = matchingDatasetLocation.getParameters(getLocale());
}
}
}
return parametersToReturn;
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetService method create.
/**
* Creates a new data set and returns the new data set id as text in the response.
*
* @param name An optional name for the new data set (might be <code>null</code>).
* @param size An optional size for the newly created data set.
* @param contentType the request content type.
* @param content The raw content of the data set (might be a CSV, XLS...) or the connection parameter in case of a
* remote csv.
* @return The new data id.
* @see DataSetService#get(boolean, boolean, String, String)
*/
// @formatter:off
@RequestMapping(value = "/datasets", method = POST, produces = TEXT_PLAIN_VALUE)
@ApiOperation(value = "Create a data set", produces = TEXT_PLAIN_VALUE, notes = "Create a new data set based on content provided in POST body. For documentation purposes, body is typed as 'text/plain' but operation accepts binary content too. Returns the id of the newly created data set.")
@Timed
@VolumeMetered
public String create(@ApiParam(value = "User readable name of the data set (e.g. 'Finance Report 2015', 'Test Data Set').") @RequestParam(defaultValue = "") String name, @ApiParam(value = "An optional tag to be added in data set metadata once created.") @RequestParam(defaultValue = "") String tag, @ApiParam(value = "Size of the data set, in bytes.") @RequestParam(required = false) Long size, @RequestHeader(CONTENT_TYPE) String contentType, @ApiParam(value = "content") InputStream content) {
// @formatter:on
checkDataSetName(name);
final String id = UUID.randomUUID().toString();
final Marker marker = Markers.dataset(id);
LOG.debug(marker, "Creating...");
// sanity check
if (size != null && size < 0) {
LOG.warn("invalid size provided {}", size);
throw new TDPException(UNEXPECTED_CONTENT, build().put("size", size));
}
// check that the name is not already taken
checkIfNameIsAvailable(name);
// get the location out of the content type and the request body
final DataSetLocation location;
try {
location = datasetLocator.getDataSetLocation(contentType, content);
} catch (IOException e) {
throw new TDPException(DataSetErrorCodes.UNABLE_TO_READ_DATASET_LOCATION, e);
}
DataSetMetadata dataSetMetadata = null;
final TDPException hypotheticalException;
try {
// if the size is provided, let's check if the quota will not be exceeded
if (size != null && size > 0) {
quotaService.checkIfAddingSizeExceedsAvailableStorage(size);
}
dataSetMetadata = //
metadataBuilder.metadata().id(//
id).name(//
name).author(//
security.getUserId()).location(//
location).created(//
System.currentTimeMillis()).tag(//
tag).build();
// Indicate data set is being imported
dataSetMetadata.getLifecycle().setImporting(true);
// Save data set content
LOG.debug(marker, "Storing content...");
final long maxDataSetSizeAllowed = getMaxDataSetSizeAllowed();
final StrictlyBoundedInputStream sizeCalculator = new StrictlyBoundedInputStream(content, maxDataSetSizeAllowed);
contentStore.storeAsRaw(dataSetMetadata, sizeCalculator);
dataSetMetadata.setDataSetSize(sizeCalculator.getTotal());
LOG.debug(marker, "Content stored.");
// Create the new data set
dataSetMetadataRepository.save(dataSetMetadata);
LOG.debug(marker, "dataset metadata stored {}", dataSetMetadata);
// Queue events (format analysis, content indexing for search...)
analyzeDataSet(id, true, emptyList());
LOG.debug(marker, "Created!");
return id;
} catch (StrictlyBoundedInputStream.InputStreamTooLargeException e) {
hypotheticalException = new TDPException(MAX_STORAGE_MAY_BE_EXCEEDED, e, build().put("limit", e.getMaxSize()));
} catch (TDPException e) {
hypotheticalException = e;
} catch (Exception e) {
hypotheticalException = new TDPException(UNABLE_CREATE_DATASET, e);
} finally {
// because the client might still be writing the request content, closing the connexion right now
// might end up in a 'connection reset' or a 'broken pipe' error in API.
//
// So, let's read fully the request content before closing the connection.
dataSetContentToNull(content);
}
dataSetMetadataRepository.remove(id);
if (dataSetMetadata != null) {
try {
contentStore.delete(dataSetMetadata);
} catch (Exception e) {
LOG.error("Unable to delete uploaded data.", e);
}
}
throw hypotheticalException;
}
use of org.talend.dataprep.api.dataset.DataSetMetadata in project data-prep by Talend.
the class DataSetService method preview.
/**
* Returns preview of the the data set content for given id (first 100 rows). Service might return
* {@link org.apache.http.HttpStatus#SC_ACCEPTED} if the data set exists but analysis is not yet fully
* completed so content is not yet ready to be served.
*
* @param metadata If <code>true</code>, includes data set metadata information.
* @param sheetName the sheet name to preview
* @param dataSetId A data set id.
*/
@RequestMapping(value = "/datasets/{id}/preview", method = RequestMethod.GET)
@ApiOperation(value = "Get a data preview set by id", notes = "Get a data set preview content based on provided id. Not valid or non existing data set id returns empty content. Data set not in drat status will return a redirect 301")
@Timed
@ResponseBody
public DataSet preview(@RequestParam(defaultValue = "true") @ApiParam(name = "metadata", value = "Include metadata information in the response") boolean metadata, @RequestParam(defaultValue = "") @ApiParam(name = "sheetName", value = "Sheet name to preview") String sheetName, @PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the requested data set") String dataSetId) {
DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
if (dataSetMetadata == null) {
HttpResponseContext.status(HttpStatus.NO_CONTENT);
// No data set, returns empty content.
return DataSet.empty();
}
if (!dataSetMetadata.isDraft()) {
// Moved to get data set content operation
HttpResponseContext.status(HttpStatus.MOVED_PERMANENTLY);
HttpResponseContext.header("Location", "/datasets/" + dataSetId + "/content");
// dataset not anymore a draft so preview doesn't make sense.
return DataSet.empty();
}
if (StringUtils.isNotEmpty(sheetName)) {
dataSetMetadata.setSheetName(sheetName);
}
// take care of previous data without schema parser result
if (dataSetMetadata.getSchemaParserResult() != null) {
// sheet not yet set correctly so use the first one
if (StringUtils.isEmpty(dataSetMetadata.getSheetName())) {
String theSheetName = dataSetMetadata.getSchemaParserResult().getSheetContents().get(0).getName();
LOG.debug("preview for dataSetMetadata: {} with sheetName: {}", dataSetId, theSheetName);
dataSetMetadata.setSheetName(theSheetName);
}
String theSheetName = dataSetMetadata.getSheetName();
Optional<Schema.SheetContent> sheetContentFound = dataSetMetadata.getSchemaParserResult().getSheetContents().stream().filter(sheetContent -> theSheetName.equals(sheetContent.getName())).findFirst();
if (!sheetContentFound.isPresent()) {
HttpResponseContext.status(HttpStatus.NO_CONTENT);
// No sheet found, returns empty content.
return DataSet.empty();
}
List<ColumnMetadata> columnMetadatas = sheetContentFound.get().getColumnMetadatas();
if (dataSetMetadata.getRowMetadata() == null) {
dataSetMetadata.setRowMetadata(new RowMetadata(emptyList()));
}
dataSetMetadata.getRowMetadata().setColumns(columnMetadatas);
} else {
LOG.warn("dataset#{} has draft status but any SchemaParserResult");
}
// Build the result
DataSet dataSet = new DataSet();
if (metadata) {
dataSet.setMetadata(conversionService.convert(dataSetMetadata, UserDataSetMetadata.class));
}
dataSet.setRecords(contentStore.stream(dataSetMetadata).limit(100));
return dataSet;
}
Aggregations