use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.
the class DatasetFieldServiceApi method showControlledVocabularyForSubject.
/**
* See also http://irclog.greptilian.com/rest/2015-02-07#i_95635
*
* @todo is our convention camelCase? Or lisp-case? Or snake_case?
*/
@GET
@Path("controlledVocabulary/subject")
public Response showControlledVocabularyForSubject() {
DatasetFieldType subjectDatasetField = datasetFieldService.findByName(DatasetFieldConstant.subject);
JsonArrayBuilder possibleSubjects = Json.createArrayBuilder();
for (ControlledVocabularyValue subjectValue : controlledVocabularyValueService.findByDatasetFieldTypeId(subjectDatasetField.getId())) {
String subject = subjectValue.getStrValue();
if (subject != null) {
possibleSubjects.add(subject);
}
}
return ok(possibleSubjects);
}
use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.
the class IndexServiceBean method addOrUpdateDataset.
private String addOrUpdateDataset(IndexableDataset indexableDataset) {
IndexableDataset.DatasetState state = indexableDataset.getDatasetState();
Dataset dataset = indexableDataset.getDatasetVersion().getDataset();
logger.fine("adding or updating Solr document for dataset id " + dataset.getId());
Collection<SolrInputDocument> docs = new ArrayList<>();
List<String> dataversePathSegmentsAccumulator = new ArrayList<>();
List<String> dataverseSegments = new ArrayList<>();
try {
dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator);
} catch (Exception ex) {
logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex);
}
List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments);
// Add Paths for linking dataverses
for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) {
List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>();
List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator);
List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments);
for (String dvPath : linkingDataversePaths) {
dataversePaths.add(dvPath);
}
}
SolrInputDocument solrInputDocument = new SolrInputDocument();
String datasetSolrDocId = indexableDataset.getSolrDocId();
solrInputDocument.addField(SearchFields.ID, datasetSolrDocId);
solrInputDocument.addField(SearchFields.ENTITY_ID, dataset.getId());
String dataverseVersion = systemConfig.getVersion();
solrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
solrInputDocument.addField(SearchFields.IDENTIFIER, dataset.getGlobalId());
solrInputDocument.addField(SearchFields.DATASET_PERSISTENT_ID, dataset.getGlobalId());
solrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
solrInputDocument.addField(SearchFields.TYPE, "datasets");
Date datasetSortByDate = new Date();
Date majorVersionReleaseDate = dataset.getMostRecentMajorVersionReleaseDate();
if (majorVersionReleaseDate != null) {
if (true) {
String msg = "major release date found: " + majorVersionReleaseDate.toString();
logger.fine(msg);
}
datasetSortByDate = majorVersionReleaseDate;
} else {
if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.WORKING_COPY)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
} else if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.DEACCESSIONED)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DEACCESSIONED_STRING);
}
Date createDate = dataset.getCreateDate();
if (createDate != null) {
if (true) {
String msg = "can't find major release date, using create date: " + createDate;
logger.fine(msg);
}
datasetSortByDate = createDate;
} else {
String msg = "can't find major release date or create date, using \"now\"";
logger.info(msg);
datasetSortByDate = new Date();
}
}
solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, datasetSortByDate);
solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(datasetSortByDate));
if (state.equals(indexableDataset.getDatasetState().PUBLISHED)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
// solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataset.getPublicationDate());
} else if (state.equals(indexableDataset.getDatasetState().WORKING_COPY)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
}
addDatasetReleaseDateToSolrDoc(solrInputDocument, dataset);
if (dataset.isHarvested()) {
solrInputDocument.addField(SearchFields.IS_HARVESTED, true);
solrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
} else {
solrInputDocument.addField(SearchFields.IS_HARVESTED, false);
// rootDataverseName);
solrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
}
DatasetVersion datasetVersion = indexableDataset.getDatasetVersion();
String parentDatasetTitle = "TBD";
if (datasetVersion != null) {
solrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(false));
solrInputDocument.addField(SearchFields.DATASET_CITATION_HTML, datasetVersion.getCitation(true));
if (datasetVersion.isInReview()) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
}
for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) {
DatasetFieldType dsfType = dsf.getDatasetFieldType();
String solrFieldSearchable = dsfType.getSolrField().getNameSearchable();
String solrFieldFacetable = dsfType.getSolrField().getNameFacetable();
if (dsf.getValues() != null && !dsf.getValues().isEmpty() && dsf.getValues().get(0) != null && solrFieldSearchable != null) {
logger.fine("indexing " + dsf.getDatasetFieldType().getName() + ":" + dsf.getValues() + " into " + solrFieldSearchable + " and maybe " + solrFieldFacetable);
// if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.INTEGER)) {
if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.EMAIL)) {
// no-op. we want to keep email address out of Solr per https://github.com/IQSS/dataverse/issues/759
} else if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.DATE)) {
String dateAsString = dsf.getValues().get(0);
logger.fine("date as string: " + dateAsString);
if (dateAsString != null && !dateAsString.isEmpty()) {
SimpleDateFormat inputDateyyyy = new SimpleDateFormat("yyyy", Locale.ENGLISH);
try {
/**
* @todo when bean validation is working we
* won't have to convert strings into dates
*/
logger.fine("Trying to convert " + dateAsString + " to a YYYY date from dataset " + dataset.getId());
Date dateAsDate = inputDateyyyy.parse(dateAsString);
SimpleDateFormat yearOnly = new SimpleDateFormat("yyyy");
String datasetFieldFlaggedAsDate = yearOnly.format(dateAsDate);
logger.fine("YYYY only: " + datasetFieldFlaggedAsDate);
// solrInputDocument.addField(solrFieldSearchable, Integer.parseInt(datasetFieldFlaggedAsDate));
solrInputDocument.addField(solrFieldSearchable, datasetFieldFlaggedAsDate);
if (dsfType.getSolrField().isFacetable()) {
// solrInputDocument.addField(solrFieldFacetable, Integer.parseInt(datasetFieldFlaggedAsDate));
solrInputDocument.addField(solrFieldFacetable, datasetFieldFlaggedAsDate);
}
} catch (Exception ex) {
logger.info("unable to convert " + dateAsString + " into YYYY format and couldn't index it (" + dsfType.getName() + ")");
}
}
} else {
if (dsf.getDatasetFieldType().getName().equals("authorAffiliation")) {
/**
* @todo think about how to tie the fact that this
* needs to be multivalued (_ss) because a
* multivalued facet (authorAffilition_ss) is being
* collapsed into here at index time. The business
* logic to determine if a data-driven metadata
* field should be indexed into Solr as a single or
* multiple value lives in the getSolrField() method
* of DatasetField.java
*/
solrInputDocument.addField(SearchFields.AFFILIATION, dsf.getValuesWithoutNaValues());
} else if (dsf.getDatasetFieldType().getName().equals("title")) {
// datasets have titles not names but index title under name as well so we can sort datasets by name along dataverses and files
List<String> possibleTitles = dsf.getValues();
String firstTitle = possibleTitles.get(0);
if (firstTitle != null) {
parentDatasetTitle = firstTitle;
}
solrInputDocument.addField(SearchFields.NAME_SORT, dsf.getValues());
}
if (dsfType.isControlledVocabulary()) {
for (ControlledVocabularyValue controlledVocabularyValue : dsf.getControlledVocabularyValues()) {
if (controlledVocabularyValue.getStrValue().equals(DatasetField.NA_VALUE)) {
continue;
}
solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getStrValue());
if (dsfType.getSolrField().isFacetable()) {
solrInputDocument.addField(solrFieldFacetable, controlledVocabularyValue.getStrValue());
}
}
} else if (dsfType.getFieldType().equals(DatasetFieldType.FieldType.TEXTBOX)) {
// strip HTML
List<String> htmlFreeText = StringUtil.htmlArray2textArray(dsf.getValuesWithoutNaValues());
solrInputDocument.addField(solrFieldSearchable, htmlFreeText);
if (dsfType.getSolrField().isFacetable()) {
solrInputDocument.addField(solrFieldFacetable, htmlFreeText);
}
} else {
// do not strip HTML
solrInputDocument.addField(solrFieldSearchable, dsf.getValuesWithoutNaValues());
if (dsfType.getSolrField().isFacetable()) {
if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassValue)) {
String topicClassificationTerm = getTopicClassificationTermOrTermAndVocabulary(dsf);
if (topicClassificationTerm != null) {
logger.fine(solrFieldFacetable + " gets " + topicClassificationTerm);
solrInputDocument.addField(solrFieldFacetable, topicClassificationTerm);
}
} else {
solrInputDocument.addField(solrFieldFacetable, dsf.getValuesWithoutNaValues());
}
}
}
}
}
}
}
solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
// solrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataset.getOwner().getName());
solrInputDocument.addField(SearchFields.PARENT_ID, dataset.getOwner().getId());
solrInputDocument.addField(SearchFields.PARENT_NAME, dataset.getOwner().getName());
if (state.equals(indexableDataset.getDatasetState().DEACCESSIONED)) {
String deaccessionNote = datasetVersion.getVersionNote();
if (deaccessionNote != null) {
solrInputDocument.addField(SearchFields.DATASET_DEACCESSION_REASON, deaccessionNote);
}
}
docs.add(solrInputDocument);
List<String> filesIndexed = new ArrayList<>();
if (datasetVersion != null) {
List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas();
boolean checkForDuplicateMetadata = false;
if (datasetVersion.isDraft() && dataset.isReleased() && dataset.getReleasedVersion() != null) {
checkForDuplicateMetadata = true;
logger.fine("We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions.");
}
for (FileMetadata fileMetadata : fileMetadatas) {
boolean indexThisMetadata = true;
if (checkForDuplicateMetadata) {
logger.fine("Checking if this file metadata is a duplicate.");
for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) {
if (fileMetadata.getDataFile() != null && fileMetadata.getDataFile().equals(releasedFileMetadata.getDataFile())) {
if (fileMetadata.contentEquals(releasedFileMetadata)) {
indexThisMetadata = false;
logger.fine("This file metadata hasn't changed since the released version; skipping indexing.");
} else {
logger.fine("This file metadata has changed since the released version; we want to index it!");
}
break;
}
}
}
if (indexThisMetadata) {
SolrInputDocument datafileSolrInputDocument = new SolrInputDocument();
Long fileEntityId = fileMetadata.getDataFile().getId();
datafileSolrInputDocument.addField(SearchFields.ENTITY_ID, fileEntityId);
datafileSolrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
datafileSolrInputDocument.addField(SearchFields.IDENTIFIER, fileEntityId);
datafileSolrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
datafileSolrInputDocument.addField(SearchFields.TYPE, "files");
String filenameCompleteFinal = "";
if (fileMetadata != null) {
String filenameComplete = fileMetadata.getLabel();
if (filenameComplete != null) {
String filenameWithoutExtension = "";
// String extension = "";
int i = filenameComplete.lastIndexOf('.');
if (i > 0) {
// extension = filenameComplete.substring(i + 1);
try {
filenameWithoutExtension = filenameComplete.substring(0, i);
datafileSolrInputDocument.addField(SearchFields.FILENAME_WITHOUT_EXTENSION, filenameWithoutExtension);
datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameWithoutExtension);
} catch (IndexOutOfBoundsException ex) {
filenameWithoutExtension = "";
}
} else {
logger.fine("problem with filename '" + filenameComplete + "': no extension? empty string as filename?");
filenameWithoutExtension = filenameComplete;
}
filenameCompleteFinal = filenameComplete;
}
for (String tag : fileMetadata.getCategoriesByName()) {
datafileSolrInputDocument.addField(SearchFields.FILE_TAG, tag);
datafileSolrInputDocument.addField(SearchFields.FILE_TAG_SEARCHABLE, tag);
}
}
datafileSolrInputDocument.addField(SearchFields.NAME, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.NAME_SORT, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
/**
* for rules on sorting files see
* https://docs.google.com/a/harvard.edu/document/d/1DWsEqT8KfheKZmMB3n_VhJpl9nIxiUjai_AIQPAjiyA/edit?usp=sharing
* via https://redmine.hmdc.harvard.edu/issues/3701
*/
Date fileSortByDate = new Date();
DataFile datafile = fileMetadata.getDataFile();
if (datafile != null) {
boolean fileHasBeenReleased = datafile.isReleased();
if (fileHasBeenReleased) {
logger.fine("indexing file with filePublicationTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
Timestamp filePublicationTimestamp = datafile.getPublicationDate();
if (filePublicationTimestamp != null) {
fileSortByDate = filePublicationTimestamp;
} else {
String msg = "filePublicationTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
logger.info(msg);
}
datafileSolrInputDocument.addField(SearchFields.ACCESS, datafile.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
} else {
logger.fine("indexing file with fileCreateTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
Timestamp fileCreateTimestamp = datafile.getCreateDate();
if (fileCreateTimestamp != null) {
fileSortByDate = fileCreateTimestamp;
} else {
String msg = "fileCreateTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
logger.info(msg);
}
datafileSolrInputDocument.addField(SearchFields.ACCESS, fileMetadata.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
}
if (datafile.isHarvested()) {
datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, true);
datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
} else {
datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, false);
datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
}
}
if (fileSortByDate == null) {
if (datasetSortByDate != null) {
logger.info("fileSortByDate was null, assigning datasetSortByDate");
fileSortByDate = datasetSortByDate;
} else {
logger.info("fileSortByDate and datasetSortByDate were null, assigning 'now'");
fileSortByDate = new Date();
}
}
datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, fileSortByDate);
datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(fileSortByDate));
if (majorVersionReleaseDate == null && !datafile.isHarvested()) {
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
}
if (datasetVersion.isInReview()) {
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
}
String fileSolrDocId = solrDocIdentifierFile + fileEntityId;
if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().PUBLISHED)) {
fileSolrDocId = solrDocIdentifierFile + fileEntityId;
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
// datafileSolrInputDocument.addField(SearchFields.PERMS, publicGroupString);
addDatasetReleaseDateToSolrDoc(datafileSolrInputDocument, dataset);
} else if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().WORKING_COPY)) {
fileSolrDocId = solrDocIdentifierFile + fileEntityId + indexableDataset.getDatasetState().getSuffix();
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
}
datafileSolrInputDocument.addField(SearchFields.ID, fileSolrDocId);
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_FRIENDLY, fileMetadata.getDataFile().getFriendlyType());
datafileSolrInputDocument.addField(SearchFields.FILE_CONTENT_TYPE, fileMetadata.getDataFile().getContentType());
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, fileMetadata.getDataFile().getFriendlyType());
// For the file type facets, we have a property file that maps mime types
// to facet-friendly names; "application/fits" should become "FITS", etc.:
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
datafileSolrInputDocument.addField(SearchFields.FILE_SIZE_IN_BYTES, fileMetadata.getDataFile().getFilesize());
if (DataFile.ChecksumType.MD5.equals(fileMetadata.getDataFile().getChecksumType())) {
/**
* @todo Someday we should probably deprecate this
* FILE_MD5 in favor of a combination of
* FILE_CHECKSUM_TYPE and FILE_CHECKSUM_VALUE.
*/
datafileSolrInputDocument.addField(SearchFields.FILE_MD5, fileMetadata.getDataFile().getChecksumValue());
}
datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_TYPE, fileMetadata.getDataFile().getChecksumType().toString());
datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_VALUE, fileMetadata.getDataFile().getChecksumValue());
datafileSolrInputDocument.addField(SearchFields.DESCRIPTION, fileMetadata.getDescription());
datafileSolrInputDocument.addField(SearchFields.FILE_DESCRIPTION, fileMetadata.getDescription());
datafileSolrInputDocument.addField(SearchFields.UNF, fileMetadata.getDataFile().getUnf());
datafileSolrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
// datafileSolrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataFile.getOwner().getOwner().getName());
// datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, dataFile.getDataset().getTitle());
datafileSolrInputDocument.addField(SearchFields.PARENT_ID, fileMetadata.getDataFile().getOwner().getId());
datafileSolrInputDocument.addField(SearchFields.PARENT_IDENTIFIER, fileMetadata.getDataFile().getOwner().getGlobalId());
datafileSolrInputDocument.addField(SearchFields.PARENT_CITATION, fileMetadata.getDataFile().getOwner().getCitation());
datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, parentDatasetTitle);
// names and labels:
if (fileMetadata.getDataFile().isTabularData()) {
List<DataVariable> variables = fileMetadata.getDataFile().getDataTable().getDataVariables();
for (DataVariable var : variables) {
if (var.getName() != null && !var.getName().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName());
}
if (var.getLabel() != null && !var.getLabel().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_LABEL, var.getLabel());
}
}
// (not to be confused with the file categories, indexed above!)
for (DataFileTag tag : fileMetadata.getDataFile().getTags()) {
String tagLabel = tag.getTypeLabel();
datafileSolrInputDocument.addField(SearchFields.TABDATA_TAG, tagLabel);
}
}
if (indexableDataset.isFilesShouldBeIndexed()) {
filesIndexed.add(fileSolrDocId);
docs.add(datafileSolrInputDocument);
}
}
}
}
try {
solrServer.add(docs);
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
try {
solrServer.commit();
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
Long dsId = dataset.getId();
// /Dataset updatedDataset = (Dataset)dvObjectService.updateContentIndexTime(dataset);
// /updatedDataset = null;
// instead of making a call to dvObjectService, let's try and
// modify the index time stamp using the local EntityManager:
DvObject dvObjectToModify = em.find(DvObject.class, dsId);
dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime()));
dvObjectToModify = em.merge(dvObjectToModify);
dvObjectToModify = null;
// return "indexed dataset " + dataset.getId() + " as " + solrDocId + "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString();
return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
}
use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.
the class DDIExporterTest method setUp.
@Before
public void setUp() {
datasetFieldTypeSvc = new MockDatasetFieldSvc();
DatasetFieldType titleType = datasetFieldTypeSvc.add(new DatasetFieldType("title", FieldType.TEXTBOX, false));
DatasetFieldType authorType = datasetFieldTypeSvc.add(new DatasetFieldType("author", FieldType.TEXT, true));
Set<DatasetFieldType> authorChildTypes = new HashSet<>();
authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorName", FieldType.TEXT, false)));
authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorAffiliation", FieldType.TEXT, false)));
authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifier", FieldType.TEXT, false)));
DatasetFieldType authorIdentifierSchemeType = datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifierScheme", FieldType.TEXT, false));
authorIdentifierSchemeType.setAllowControlledVocabulary(true);
authorIdentifierSchemeType.setControlledVocabularyValues(Arrays.asList(// Why aren't these enforced? Should be ORCID, etc.
new ControlledVocabularyValue(1l, "ark", authorIdentifierSchemeType), new ControlledVocabularyValue(2l, "doi", authorIdentifierSchemeType), new ControlledVocabularyValue(3l, "url", authorIdentifierSchemeType)));
authorChildTypes.add(datasetFieldTypeSvc.add(authorIdentifierSchemeType));
for (DatasetFieldType t : authorChildTypes) {
t.setParentDatasetFieldType(authorType);
}
authorType.setChildDatasetFieldTypes(authorChildTypes);
DatasetFieldType datasetContactType = datasetFieldTypeSvc.add(new DatasetFieldType("datasetContact", FieldType.TEXT, true));
Set<DatasetFieldType> datasetContactTypes = new HashSet<>();
datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactEmail", FieldType.TEXT, false)));
datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactName", FieldType.TEXT, false)));
datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactAffiliation", FieldType.TEXT, false)));
for (DatasetFieldType t : datasetContactTypes) {
t.setParentDatasetFieldType(datasetContactType);
}
datasetContactType.setChildDatasetFieldTypes(datasetContactTypes);
DatasetFieldType dsDescriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("dsDescription", FieldType.TEXT, true));
Set<DatasetFieldType> dsDescriptionTypes = new HashSet<>();
dsDescriptionTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("dsDescriptionValue", FieldType.TEXT, false)));
for (DatasetFieldType t : dsDescriptionTypes) {
t.setParentDatasetFieldType(dsDescriptionType);
}
dsDescriptionType.setChildDatasetFieldTypes(dsDescriptionTypes);
DatasetFieldType keywordType = datasetFieldTypeSvc.add(new DatasetFieldType("keyword", DatasetFieldType.FieldType.TEXT, true));
DatasetFieldType descriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("description", DatasetFieldType.FieldType.TEXTBOX, false));
DatasetFieldType subjectType = datasetFieldTypeSvc.add(new DatasetFieldType("subject", DatasetFieldType.FieldType.TEXT, true));
subjectType.setAllowControlledVocabulary(true);
subjectType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "mgmt", subjectType), new ControlledVocabularyValue(2l, "law", subjectType), new ControlledVocabularyValue(3l, "cs", subjectType)));
DatasetFieldType pubIdType = datasetFieldTypeSvc.add(new DatasetFieldType("publicationIdType", DatasetFieldType.FieldType.TEXT, false));
pubIdType.setAllowControlledVocabulary(true);
pubIdType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "ark", pubIdType), new ControlledVocabularyValue(2l, "doi", pubIdType), new ControlledVocabularyValue(3l, "url", pubIdType)));
DatasetFieldType compoundSingleType = datasetFieldTypeSvc.add(new DatasetFieldType("coordinate", DatasetFieldType.FieldType.TEXT, true));
Set<DatasetFieldType> childTypes = new HashSet<>();
childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lat", DatasetFieldType.FieldType.TEXT, false)));
childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lon", DatasetFieldType.FieldType.TEXT, false)));
for (DatasetFieldType t : childTypes) {
t.setParentDatasetFieldType(compoundSingleType);
}
compoundSingleType.setChildDatasetFieldTypes(childTypes);
}
use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.
the class SchemaDotOrgExporterTest method setUp.
@Before
public void setUp() {
datasetFieldTypeSvc = new DDIExporterTest.MockDatasetFieldSvc();
DatasetFieldType titleType = datasetFieldTypeSvc.add(new DatasetFieldType("title", DatasetFieldType.FieldType.TEXTBOX, false));
DatasetFieldType authorType = datasetFieldTypeSvc.add(new DatasetFieldType("author", DatasetFieldType.FieldType.TEXT, true));
Set<DatasetFieldType> authorChildTypes = new HashSet<>();
authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorName", DatasetFieldType.FieldType.TEXT, false)));
authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorAffiliation", DatasetFieldType.FieldType.TEXT, false)));
authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifier", DatasetFieldType.FieldType.TEXT, false)));
DatasetFieldType authorIdentifierSchemeType = datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifierScheme", DatasetFieldType.FieldType.TEXT, false));
authorIdentifierSchemeType.setAllowControlledVocabulary(true);
authorIdentifierSchemeType.setControlledVocabularyValues(Arrays.asList(// Why aren't these enforced? Should be ORCID, etc.
new ControlledVocabularyValue(1l, "ark", authorIdentifierSchemeType), new ControlledVocabularyValue(2l, "doi", authorIdentifierSchemeType), new ControlledVocabularyValue(3l, "url", authorIdentifierSchemeType)));
authorChildTypes.add(datasetFieldTypeSvc.add(authorIdentifierSchemeType));
for (DatasetFieldType t : authorChildTypes) {
t.setParentDatasetFieldType(authorType);
}
authorType.setChildDatasetFieldTypes(authorChildTypes);
DatasetFieldType datasetContactType = datasetFieldTypeSvc.add(new DatasetFieldType("datasetContact", DatasetFieldType.FieldType.TEXT, true));
Set<DatasetFieldType> datasetContactTypes = new HashSet<>();
datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactEmail", DatasetFieldType.FieldType.TEXT, false)));
datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactName", DatasetFieldType.FieldType.TEXT, false)));
datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactAffiliation", DatasetFieldType.FieldType.TEXT, false)));
for (DatasetFieldType t : datasetContactTypes) {
t.setParentDatasetFieldType(datasetContactType);
}
datasetContactType.setChildDatasetFieldTypes(datasetContactTypes);
DatasetFieldType dsDescriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("dsDescription", DatasetFieldType.FieldType.TEXT, true));
Set<DatasetFieldType> dsDescriptionTypes = new HashSet<>();
dsDescriptionTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("dsDescriptionValue", DatasetFieldType.FieldType.TEXT, false)));
for (DatasetFieldType t : dsDescriptionTypes) {
t.setParentDatasetFieldType(dsDescriptionType);
}
dsDescriptionType.setChildDatasetFieldTypes(dsDescriptionTypes);
DatasetFieldType keywordType = datasetFieldTypeSvc.add(new DatasetFieldType("keyword", DatasetFieldType.FieldType.TEXT, true));
DatasetFieldType descriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("description", DatasetFieldType.FieldType.TEXTBOX, false));
DatasetFieldType subjectType = datasetFieldTypeSvc.add(new DatasetFieldType("subject", DatasetFieldType.FieldType.TEXT, true));
subjectType.setAllowControlledVocabulary(true);
subjectType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "mgmt", subjectType), new ControlledVocabularyValue(2l, "law", subjectType), new ControlledVocabularyValue(3l, "cs", subjectType)));
DatasetFieldType pubIdType = datasetFieldTypeSvc.add(new DatasetFieldType("publicationIdType", DatasetFieldType.FieldType.TEXT, false));
pubIdType.setAllowControlledVocabulary(true);
pubIdType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "ark", pubIdType), new ControlledVocabularyValue(2l, "doi", pubIdType), new ControlledVocabularyValue(3l, "url", pubIdType)));
DatasetFieldType compoundSingleType = datasetFieldTypeSvc.add(new DatasetFieldType("coordinate", DatasetFieldType.FieldType.TEXT, true));
Set<DatasetFieldType> childTypes = new HashSet<>();
childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lat", DatasetFieldType.FieldType.TEXT, false)));
childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lon", DatasetFieldType.FieldType.TEXT, false)));
for (DatasetFieldType t : childTypes) {
t.setParentDatasetFieldType(compoundSingleType);
}
compoundSingleType.setChildDatasetFieldTypes(childTypes);
}
use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.
the class JsonParser method parseField.
public DatasetField parseField(JsonObject json) throws JsonParseException {
if (json == null) {
return null;
}
DatasetField ret = new DatasetField();
DatasetFieldType type = datasetFieldSvc.findByNameOpt(json.getString("typeName", ""));
if (type == null) {
throw new JsonParseException("Can't find type '" + json.getString("typeName", "") + "'");
}
if (type.isAllowMultiples() != json.getBoolean("multiple")) {
throw new JsonParseException("incorrect multiple for field " + json.getString("typeName", ""));
}
if (type.isCompound() && !json.getString("typeClass").equals("compound")) {
throw new JsonParseException("incorrect typeClass for field " + json.getString("typeName", "") + ", should be compound.");
}
if (!type.isControlledVocabulary() && type.isPrimitive() && !json.getString("typeClass").equals("primitive")) {
throw new JsonParseException("incorrect typeClass for field: " + json.getString("typeName", "") + ", should be primitive");
}
if (type.isControlledVocabulary() && !json.getString("typeClass").equals("controlledVocabulary")) {
throw new JsonParseException("incorrect typeClass for field " + json.getString("typeName", "") + ", should be controlledVocabulary");
}
ret.setDatasetFieldType(type);
if (type.isCompound()) {
List<DatasetFieldCompoundValue> vals = parseCompoundValue(type, json);
for (DatasetFieldCompoundValue dsfcv : vals) {
dsfcv.setParentDatasetField(ret);
}
ret.setDatasetFieldCompoundValues(vals);
} else if (type.isControlledVocabulary()) {
List<ControlledVocabularyValue> vals = parseControlledVocabularyValue(type, json);
for (ControlledVocabularyValue cvv : vals) {
cvv.setDatasetFieldType(type);
}
ret.setControlledVocabularyValues(vals);
} else {
// primitive
List<DatasetFieldValue> values = parsePrimitiveValue(json);
for (DatasetFieldValue val : values) {
val.setDatasetField(ret);
}
ret.setDatasetFieldValues(values);
}
return ret;
}
Aggregations