use of org.talend.dataprep.metrics.Timed in project data-prep by Talend.
the class DataSetService method list.
@RequestMapping(value = "/datasets", method = RequestMethod.GET)
@ApiOperation(value = "List all data sets and filters on certified, or favorite or a limited number when asked", notes = "Returns the list of data sets (and filters) the current user is allowed to see. Creation date is a Epoch time value (in UTC time zone).")
@Timed
public Stream<UserDataSetMetadata> list(@ApiParam(value = "Sort key (by name, creation or modification date)") @RequestParam(defaultValue = "creationDate") Sort sort, @ApiParam(value = "Order for sort key (desc or asc or modif)") @RequestParam(defaultValue = "desc") Order order, @ApiParam(value = "Filter on name containing the specified name") @RequestParam(required = false) String name, @ApiParam(value = "Filter on name containing the specified name strictness") @RequestParam(defaultValue = "false") boolean nameStrict, @ApiParam(value = "Filter on certified data sets") @RequestParam(defaultValue = "false") boolean certified, @ApiParam(value = "Filter on favorite data sets") @RequestParam(defaultValue = "false") boolean favorite, @ApiParam(value = "Only return a limited number of data sets") @RequestParam(defaultValue = "false") boolean limit) {
// Build filter for data sets
String userId = security.getUserId();
final UserData userData = userDataRepository.get(userId);
final List<String> predicates = new ArrayList<>();
predicates.add("lifecycle.importing = false");
if (favorite) {
if (userData != null && !userData.getFavoritesDatasets().isEmpty()) {
predicates.add("id in [" + userData.getFavoritesDatasets().stream().map(ds -> '\'' + ds + '\'').collect(Collectors.joining(",")) + "]");
} else {
// Wants favorites but user has no favorite
return Stream.empty();
}
}
if (certified) {
predicates.add("governance.certificationStep = '" + Certification.CERTIFIED + "'");
}
if (StringUtils.isNotEmpty(name)) {
final String regex = "(?i)" + Pattern.quote(name);
final String filter;
if (nameStrict) {
filter = "name ~ '^" + regex + "$'";
} else {
filter = "name ~ '.*" + regex + ".*'";
}
predicates.add(filter);
}
final String tqlFilter = predicates.stream().collect(Collectors.joining(" and "));
LOG.debug("TQL Filter in use: {}", tqlFilter);
// Get all data sets according to filter
try (Stream<DataSetMetadata> stream = dataSetMetadataRepository.list(tqlFilter, sort, order)) {
Stream<UserDataSetMetadata> userDataSetMetadataStream = stream.map(m -> conversionService.convert(m, UserDataSetMetadata.class));
if (sort == Sort.AUTHOR || sort == Sort.NAME) {
// As theses are not well handled by mongo repository
userDataSetMetadataStream = userDataSetMetadataStream.sorted(getDataSetMetadataComparator(sort, order));
}
return userDataSetMetadataStream.limit(limit ? datasetListLimit : Long.MAX_VALUE);
}
}
use of org.talend.dataprep.metrics.Timed in project data-prep by Talend.
the class DataSetService method updateDataSet.
/**
* Updates a data set metadata. If no data set exists for given id, a {@link TDPException} is thrown.
*
* @param dataSetId The id of data set to be updated.
* @param dataSetMetadata The new content for the data set. If empty, existing content will <b>not</b> be replaced.
* For delete operation, look at {@link #delete(String)}.
*/
@RequestMapping(value = "/datasets/{id}", method = PUT)
@ApiOperation(value = "Update a data set metadata by id", notes = "Update a data set metadata according to the content of the PUT body. Id should be a UUID returned by the list operation. Not valid or non existing data set id return an error response.")
@Timed
public void updateDataSet(@PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the data set to update") String dataSetId, @RequestBody DataSetMetadata dataSetMetadata) {
if (dataSetMetadata != null && dataSetMetadata.getName() != null) {
checkDataSetName(dataSetMetadata.getName());
}
final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(dataSetId);
lock.lock();
try {
DataSetMetadata metadataForUpdate = dataSetMetadataRepository.get(dataSetId);
if (metadataForUpdate == null) {
// No need to silently create the data set metadata: associated content will most likely not exist.
throw new TDPException(DataSetErrorCodes.DATASET_DOES_NOT_EXIST, build().put("id", dataSetId));
}
LOG.debug("updateDataSet: {}", dataSetMetadata);
publisher.publishEvent(new DatasetUpdatedEvent(dataSetMetadata));
//
// Only part of the metadata can be updated, so the original dataset metadata is loaded and updated
//
DataSetMetadata original = metadataBuilder.metadata().copy(metadataForUpdate).build();
try {
// update the name
metadataForUpdate.setName(dataSetMetadata.getName());
// update the sheet content (in case of a multi-sheet excel file)
if (metadataForUpdate.getSchemaParserResult() != null) {
Optional<Schema.SheetContent> sheetContentFound = metadataForUpdate.getSchemaParserResult().getSheetContents().stream().filter(sheetContent -> dataSetMetadata.getSheetName().equals(sheetContent.getName())).findFirst();
if (sheetContentFound.isPresent()) {
List<ColumnMetadata> columnMetadatas = sheetContentFound.get().getColumnMetadatas();
if (metadataForUpdate.getRowMetadata() == null) {
metadataForUpdate.setRowMetadata(new RowMetadata(emptyList()));
}
metadataForUpdate.getRowMetadata().setColumns(columnMetadatas);
}
metadataForUpdate.setSheetName(dataSetMetadata.getSheetName());
metadataForUpdate.setSchemaParserResult(null);
}
// Location updates
metadataForUpdate.setLocation(dataSetMetadata.getLocation());
// update parameters & encoding (so that user can change import parameters for CSV)
metadataForUpdate.getContent().setParameters(dataSetMetadata.getContent().getParameters());
metadataForUpdate.setEncoding(dataSetMetadata.getEncoding());
// update limit
final Optional<Long> newLimit = dataSetMetadata.getContent().getLimit();
newLimit.ifPresent(limit -> metadataForUpdate.getContent().setLimit(limit));
// Validate that the new data set metadata and removes the draft status
final String formatFamilyId = dataSetMetadata.getContent().getFormatFamilyId();
if (formatFamilyFactory.hasFormatFamily(formatFamilyId)) {
FormatFamily format = formatFamilyFactory.getFormatFamily(formatFamilyId);
try {
DraftValidator draftValidator = format.getDraftValidator();
DraftValidator.Result result = draftValidator.validate(dataSetMetadata);
if (result.isDraft()) {
// This is not an exception case: data set may remain a draft after update (although rather
// unusual)
LOG.warn("Data set #{} is still a draft after update.", dataSetId);
return;
}
// Data set metadata to update is no longer a draft
metadataForUpdate.setDraft(false);
} catch (UnsupportedOperationException e) {
// no need to validate draft here
}
}
// update schema
formatAnalyzer.update(original, metadataForUpdate);
// save the result
dataSetMetadataRepository.save(metadataForUpdate);
// all good mate!! so send that to jms
// Asks for a in depth schema analysis (for column type information).
analyzeDataSet(dataSetId, true, singletonList(FormatAnalysis.class));
} catch (TDPException e) {
throw e;
} catch (Exception e) {
throw new TDPException(UNABLE_TO_CREATE_OR_UPDATE_DATASET, e);
}
} finally {
lock.unlock();
}
}
use of org.talend.dataprep.metrics.Timed in project data-prep by Talend.
the class DataSetService method updateDatasetColumn.
/**
* Update the column of the data set and computes the
*
* @param dataSetId the dataset id.
* @param columnId the column id.
* @param parameters the new type and domain.
*/
@RequestMapping(value = "/datasets/{datasetId}/column/{columnId}", method = POST)
@ApiOperation(value = "Update a column type and/or domain")
@Timed
public void updateDatasetColumn(@PathVariable(value = "datasetId") @ApiParam(name = "datasetId", value = "Id of the dataset") final String dataSetId, @PathVariable(value = "columnId") @ApiParam(name = "columnId", value = "Id of the column") final String columnId, @RequestBody final UpdateColumnParameters parameters) {
final DistributedLock lock = dataSetMetadataRepository.createDatasetMetadataLock(dataSetId);
lock.lock();
try {
// check that dataset exists
final DataSetMetadata dataSetMetadata = dataSetMetadataRepository.get(dataSetId);
if (dataSetMetadata == null) {
throw new TDPException(DataSetErrorCodes.DATASET_DOES_NOT_EXIST, build().put("id", dataSetId));
}
LOG.debug("update dataset column for #{} with type {} and/or domain {}", dataSetId, parameters.getType(), parameters.getDomain());
// get the column
final ColumnMetadata column = dataSetMetadata.getRowMetadata().getById(columnId);
if (column == null) {
throw new //
TDPException(//
DataSetErrorCodes.COLUMN_DOES_NOT_EXIST, //
build().put("id", //
dataSetId).put("columnid", columnId));
}
// update type/domain
if (parameters.getType() != null) {
column.setType(parameters.getType());
}
if (parameters.getDomain() != null) {
// erase domain to let only type
if (parameters.getDomain().isEmpty()) {
column.setDomain("");
column.setDomainLabel("");
column.setDomainFrequency(0);
} else // change domain
{
final SemanticDomain semanticDomain = column.getSemanticDomains().stream().filter(//
dom -> StringUtils.equals(dom.getId(), parameters.getDomain())).findFirst().orElse(null);
if (semanticDomain != null) {
column.setDomain(semanticDomain.getId());
column.setDomainLabel(semanticDomain.getLabel());
column.setDomainFrequency(semanticDomain.getScore());
}
}
}
// save
dataSetMetadataRepository.save(dataSetMetadata);
// analyze the updated dataset (not all analysis are performed)
analyzeDataSet(//
dataSetId, //
false, asList(ContentAnalysis.class, FormatAnalysis.class, SchemaAnalysis.class));
} finally {
lock.unlock();
}
}
use of org.talend.dataprep.metrics.Timed in project data-prep by Talend.
the class DataSetService method getMetadata.
/**
* Returns the data set {@link DataSetMetadata metadata} for given <code>dataSetId</code>.
*
* @param dataSetId A data set id. If <code>null</code> <b>or</b> if no data set with provided id exits, operation
* returns {@link org.apache.http.HttpStatus#SC_NO_CONTENT} if metadata does not exist.
*/
@RequestMapping(value = "/datasets/{id}/metadata", method = RequestMethod.GET)
@ApiOperation(value = "Get metadata information of a data set by id", notes = "Get metadata information of a data set by id. Not valid or non existing data set id returns empty content.")
@Timed
@ResponseBody
public DataSet getMetadata(@PathVariable(value = "id") @ApiParam(name = "id", value = "Id of the data set metadata") String dataSetId) {
if (dataSetId == null) {
HttpResponseContext.status(HttpStatus.NO_CONTENT);
return null;
}
LOG.debug("get dataset metadata for {}", dataSetId);
DataSetMetadata metadata = dataSetMetadataRepository.get(dataSetId);
if (metadata == null) {
throw new TDPException(DataSetErrorCodes.DATASET_DOES_NOT_EXIST, build().put("id", dataSetId));
}
if (!metadata.getLifecycle().schemaAnalyzed()) {
HttpResponseContext.status(HttpStatus.ACCEPTED);
return DataSet.empty();
}
DataSet dataSet = new DataSet();
dataSet.setMetadata(conversionService.convert(metadata, UserDataSetMetadata.class));
LOG.info("found dataset {} for #{}", dataSet.getMetadata().getName(), dataSetId);
return dataSet;
}
use of org.talend.dataprep.metrics.Timed in project data-prep by Talend.
the class DataSetService method favorites.
/**
* list all the favorites dataset for the current user
*
* @return a list of the dataset Ids of all the favorites dataset for the current user or an empty list if none
* found
*/
@RequestMapping(value = "/datasets/favorites", method = RequestMethod.GET)
@ApiOperation(value = "return all favorites datasets of the current user", notes = "Returns the list of favorites datasets.")
@Timed
public Iterable<String> favorites() {
String userId = security.getUserId();
UserData userData = userDataRepository.get(userId);
return userData != null ? userData.getFavoritesDatasets() : emptyList();
}
Aggregations