Search in sources :

Example 11 with ControlledVocabularyValue

use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.

the class DatasetFieldServiceApi method showControlledVocabularyForSubject.

/**
 * See also http://irclog.greptilian.com/rest/2015-02-07#i_95635
 *
 * @todo is our convention camelCase? Or lisp-case? Or snake_case?
 */
@GET
@Path("controlledVocabulary/subject")
public Response showControlledVocabularyForSubject() {
    DatasetFieldType subjectDatasetField = datasetFieldService.findByName(DatasetFieldConstant.subject);
    JsonArrayBuilder possibleSubjects = Json.createArrayBuilder();
    for (ControlledVocabularyValue subjectValue : controlledVocabularyValueService.findByDatasetFieldTypeId(subjectDatasetField.getId())) {
        String subject = subjectValue.getStrValue();
        if (subject != null) {
            possibleSubjects.add(subject);
        }
    }
    return ok(possibleSubjects);
}
Also used : JsonArrayBuilder(javax.json.JsonArrayBuilder) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 12 with ControlledVocabularyValue

use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.

the class IndexServiceBean method addOrUpdateDataset.

private String addOrUpdateDataset(IndexableDataset indexableDataset) {
    IndexableDataset.DatasetState state = indexableDataset.getDatasetState();
    Dataset dataset = indexableDataset.getDatasetVersion().getDataset();
    logger.fine("adding or updating Solr document for dataset id " + dataset.getId());
    Collection<SolrInputDocument> docs = new ArrayList<>();
    List<String> dataversePathSegmentsAccumulator = new ArrayList<>();
    List<String> dataverseSegments = new ArrayList<>();
    try {
        dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator);
    } catch (Exception ex) {
        logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex);
    }
    List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments);
    // Add Paths for linking dataverses
    for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) {
        List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>();
        List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator);
        List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments);
        for (String dvPath : linkingDataversePaths) {
            dataversePaths.add(dvPath);
        }
    }
    SolrInputDocument solrInputDocument = new SolrInputDocument();
    String datasetSolrDocId = indexableDataset.getSolrDocId();
    solrInputDocument.addField(SearchFields.ID, datasetSolrDocId);
    solrInputDocument.addField(SearchFields.ENTITY_ID, dataset.getId());
    String dataverseVersion = systemConfig.getVersion();
    solrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
    solrInputDocument.addField(SearchFields.IDENTIFIER, dataset.getGlobalId());
    solrInputDocument.addField(SearchFields.DATASET_PERSISTENT_ID, dataset.getGlobalId());
    solrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
    solrInputDocument.addField(SearchFields.TYPE, "datasets");
    Date datasetSortByDate = new Date();
    Date majorVersionReleaseDate = dataset.getMostRecentMajorVersionReleaseDate();
    if (majorVersionReleaseDate != null) {
        if (true) {
            String msg = "major release date found: " + majorVersionReleaseDate.toString();
            logger.fine(msg);
        }
        datasetSortByDate = majorVersionReleaseDate;
    } else {
        if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.WORKING_COPY)) {
            solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
        } else if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.DEACCESSIONED)) {
            solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DEACCESSIONED_STRING);
        }
        Date createDate = dataset.getCreateDate();
        if (createDate != null) {
            if (true) {
                String msg = "can't find major release date, using create date: " + createDate;
                logger.fine(msg);
            }
            datasetSortByDate = createDate;
        } else {
            String msg = "can't find major release date or create date, using \"now\"";
            logger.info(msg);
            datasetSortByDate = new Date();
        }
    }
    solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, datasetSortByDate);
    solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(datasetSortByDate));
    if (state.equals(indexableDataset.getDatasetState().PUBLISHED)) {
        solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
    // solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataset.getPublicationDate());
    } else if (state.equals(indexableDataset.getDatasetState().WORKING_COPY)) {
        solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
    }
    addDatasetReleaseDateToSolrDoc(solrInputDocument, dataset);
    if (dataset.isHarvested()) {
        solrInputDocument.addField(SearchFields.IS_HARVESTED, true);
        solrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
    } else {
        solrInputDocument.addField(SearchFields.IS_HARVESTED, false);
        // rootDataverseName);
        solrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
    }
    DatasetVersion datasetVersion = indexableDataset.getDatasetVersion();
    String parentDatasetTitle = "TBD";
    if (datasetVersion != null) {
        solrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
        solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(false));
        solrInputDocument.addField(SearchFields.DATASET_CITATION_HTML, datasetVersion.getCitation(true));
        if (datasetVersion.isInReview()) {
            solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
        }
        for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) {
            DatasetFieldType dsfType = dsf.getDatasetFieldType();
            String solrFieldSearchable = dsfType.getSolrField().getNameSearchable();
            String solrFieldFacetable = dsfType.getSolrField().getNameFacetable();
            if (dsf.getValues() != null && !dsf.getValues().isEmpty() && dsf.getValues().get(0) != null && solrFieldSearchable != null) {
                logger.fine("indexing " + dsf.getDatasetFieldType().getName() + ":" + dsf.getValues() + " into " + solrFieldSearchable + " and maybe " + solrFieldFacetable);
                // if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.INTEGER)) {
                if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.EMAIL)) {
                // no-op. we want to keep email address out of Solr per https://github.com/IQSS/dataverse/issues/759
                } else if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.DATE)) {
                    String dateAsString = dsf.getValues().get(0);
                    logger.fine("date as string: " + dateAsString);
                    if (dateAsString != null && !dateAsString.isEmpty()) {
                        SimpleDateFormat inputDateyyyy = new SimpleDateFormat("yyyy", Locale.ENGLISH);
                        try {
                            /**
                             * @todo when bean validation is working we
                             * won't have to convert strings into dates
                             */
                            logger.fine("Trying to convert " + dateAsString + " to a YYYY date from dataset " + dataset.getId());
                            Date dateAsDate = inputDateyyyy.parse(dateAsString);
                            SimpleDateFormat yearOnly = new SimpleDateFormat("yyyy");
                            String datasetFieldFlaggedAsDate = yearOnly.format(dateAsDate);
                            logger.fine("YYYY only: " + datasetFieldFlaggedAsDate);
                            // solrInputDocument.addField(solrFieldSearchable, Integer.parseInt(datasetFieldFlaggedAsDate));
                            solrInputDocument.addField(solrFieldSearchable, datasetFieldFlaggedAsDate);
                            if (dsfType.getSolrField().isFacetable()) {
                                // solrInputDocument.addField(solrFieldFacetable, Integer.parseInt(datasetFieldFlaggedAsDate));
                                solrInputDocument.addField(solrFieldFacetable, datasetFieldFlaggedAsDate);
                            }
                        } catch (Exception ex) {
                            logger.info("unable to convert " + dateAsString + " into YYYY format and couldn't index it (" + dsfType.getName() + ")");
                        }
                    }
                } else {
                    if (dsf.getDatasetFieldType().getName().equals("authorAffiliation")) {
                        /**
                         * @todo think about how to tie the fact that this
                         * needs to be multivalued (_ss) because a
                         * multivalued facet (authorAffilition_ss) is being
                         * collapsed into here at index time. The business
                         * logic to determine if a data-driven metadata
                         * field should be indexed into Solr as a single or
                         * multiple value lives in the getSolrField() method
                         * of DatasetField.java
                         */
                        solrInputDocument.addField(SearchFields.AFFILIATION, dsf.getValuesWithoutNaValues());
                    } else if (dsf.getDatasetFieldType().getName().equals("title")) {
                        // datasets have titles not names but index title under name as well so we can sort datasets by name along dataverses and files
                        List<String> possibleTitles = dsf.getValues();
                        String firstTitle = possibleTitles.get(0);
                        if (firstTitle != null) {
                            parentDatasetTitle = firstTitle;
                        }
                        solrInputDocument.addField(SearchFields.NAME_SORT, dsf.getValues());
                    }
                    if (dsfType.isControlledVocabulary()) {
                        for (ControlledVocabularyValue controlledVocabularyValue : dsf.getControlledVocabularyValues()) {
                            if (controlledVocabularyValue.getStrValue().equals(DatasetField.NA_VALUE)) {
                                continue;
                            }
                            solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getStrValue());
                            if (dsfType.getSolrField().isFacetable()) {
                                solrInputDocument.addField(solrFieldFacetable, controlledVocabularyValue.getStrValue());
                            }
                        }
                    } else if (dsfType.getFieldType().equals(DatasetFieldType.FieldType.TEXTBOX)) {
                        // strip HTML
                        List<String> htmlFreeText = StringUtil.htmlArray2textArray(dsf.getValuesWithoutNaValues());
                        solrInputDocument.addField(solrFieldSearchable, htmlFreeText);
                        if (dsfType.getSolrField().isFacetable()) {
                            solrInputDocument.addField(solrFieldFacetable, htmlFreeText);
                        }
                    } else {
                        // do not strip HTML
                        solrInputDocument.addField(solrFieldSearchable, dsf.getValuesWithoutNaValues());
                        if (dsfType.getSolrField().isFacetable()) {
                            if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassValue)) {
                                String topicClassificationTerm = getTopicClassificationTermOrTermAndVocabulary(dsf);
                                if (topicClassificationTerm != null) {
                                    logger.fine(solrFieldFacetable + " gets " + topicClassificationTerm);
                                    solrInputDocument.addField(solrFieldFacetable, topicClassificationTerm);
                                }
                            } else {
                                solrInputDocument.addField(solrFieldFacetable, dsf.getValuesWithoutNaValues());
                            }
                        }
                    }
                }
            }
        }
    }
    solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
    // solrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataset.getOwner().getName());
    solrInputDocument.addField(SearchFields.PARENT_ID, dataset.getOwner().getId());
    solrInputDocument.addField(SearchFields.PARENT_NAME, dataset.getOwner().getName());
    if (state.equals(indexableDataset.getDatasetState().DEACCESSIONED)) {
        String deaccessionNote = datasetVersion.getVersionNote();
        if (deaccessionNote != null) {
            solrInputDocument.addField(SearchFields.DATASET_DEACCESSION_REASON, deaccessionNote);
        }
    }
    docs.add(solrInputDocument);
    List<String> filesIndexed = new ArrayList<>();
    if (datasetVersion != null) {
        List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas();
        boolean checkForDuplicateMetadata = false;
        if (datasetVersion.isDraft() && dataset.isReleased() && dataset.getReleasedVersion() != null) {
            checkForDuplicateMetadata = true;
            logger.fine("We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions.");
        }
        for (FileMetadata fileMetadata : fileMetadatas) {
            boolean indexThisMetadata = true;
            if (checkForDuplicateMetadata) {
                logger.fine("Checking if this file metadata is a duplicate.");
                for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) {
                    if (fileMetadata.getDataFile() != null && fileMetadata.getDataFile().equals(releasedFileMetadata.getDataFile())) {
                        if (fileMetadata.contentEquals(releasedFileMetadata)) {
                            indexThisMetadata = false;
                            logger.fine("This file metadata hasn't changed since the released version; skipping indexing.");
                        } else {
                            logger.fine("This file metadata has changed since the released version; we want to index it!");
                        }
                        break;
                    }
                }
            }
            if (indexThisMetadata) {
                SolrInputDocument datafileSolrInputDocument = new SolrInputDocument();
                Long fileEntityId = fileMetadata.getDataFile().getId();
                datafileSolrInputDocument.addField(SearchFields.ENTITY_ID, fileEntityId);
                datafileSolrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
                datafileSolrInputDocument.addField(SearchFields.IDENTIFIER, fileEntityId);
                datafileSolrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
                datafileSolrInputDocument.addField(SearchFields.TYPE, "files");
                String filenameCompleteFinal = "";
                if (fileMetadata != null) {
                    String filenameComplete = fileMetadata.getLabel();
                    if (filenameComplete != null) {
                        String filenameWithoutExtension = "";
                        // String extension = "";
                        int i = filenameComplete.lastIndexOf('.');
                        if (i > 0) {
                            // extension = filenameComplete.substring(i + 1);
                            try {
                                filenameWithoutExtension = filenameComplete.substring(0, i);
                                datafileSolrInputDocument.addField(SearchFields.FILENAME_WITHOUT_EXTENSION, filenameWithoutExtension);
                                datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameWithoutExtension);
                            } catch (IndexOutOfBoundsException ex) {
                                filenameWithoutExtension = "";
                            }
                        } else {
                            logger.fine("problem with filename '" + filenameComplete + "': no extension? empty string as filename?");
                            filenameWithoutExtension = filenameComplete;
                        }
                        filenameCompleteFinal = filenameComplete;
                    }
                    for (String tag : fileMetadata.getCategoriesByName()) {
                        datafileSolrInputDocument.addField(SearchFields.FILE_TAG, tag);
                        datafileSolrInputDocument.addField(SearchFields.FILE_TAG_SEARCHABLE, tag);
                    }
                }
                datafileSolrInputDocument.addField(SearchFields.NAME, filenameCompleteFinal);
                datafileSolrInputDocument.addField(SearchFields.NAME_SORT, filenameCompleteFinal);
                datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameCompleteFinal);
                datafileSolrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
                /**
                 * for rules on sorting files see
                 * https://docs.google.com/a/harvard.edu/document/d/1DWsEqT8KfheKZmMB3n_VhJpl9nIxiUjai_AIQPAjiyA/edit?usp=sharing
                 * via https://redmine.hmdc.harvard.edu/issues/3701
                 */
                Date fileSortByDate = new Date();
                DataFile datafile = fileMetadata.getDataFile();
                if (datafile != null) {
                    boolean fileHasBeenReleased = datafile.isReleased();
                    if (fileHasBeenReleased) {
                        logger.fine("indexing file with filePublicationTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
                        Timestamp filePublicationTimestamp = datafile.getPublicationDate();
                        if (filePublicationTimestamp != null) {
                            fileSortByDate = filePublicationTimestamp;
                        } else {
                            String msg = "filePublicationTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
                            logger.info(msg);
                        }
                        datafileSolrInputDocument.addField(SearchFields.ACCESS, datafile.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
                    } else {
                        logger.fine("indexing file with fileCreateTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
                        Timestamp fileCreateTimestamp = datafile.getCreateDate();
                        if (fileCreateTimestamp != null) {
                            fileSortByDate = fileCreateTimestamp;
                        } else {
                            String msg = "fileCreateTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
                            logger.info(msg);
                        }
                        datafileSolrInputDocument.addField(SearchFields.ACCESS, fileMetadata.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
                    }
                    if (datafile.isHarvested()) {
                        datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, true);
                        datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
                    } else {
                        datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, false);
                        datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
                    }
                }
                if (fileSortByDate == null) {
                    if (datasetSortByDate != null) {
                        logger.info("fileSortByDate was null, assigning datasetSortByDate");
                        fileSortByDate = datasetSortByDate;
                    } else {
                        logger.info("fileSortByDate and datasetSortByDate were null, assigning 'now'");
                        fileSortByDate = new Date();
                    }
                }
                datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, fileSortByDate);
                datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(fileSortByDate));
                if (majorVersionReleaseDate == null && !datafile.isHarvested()) {
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
                }
                if (datasetVersion.isInReview()) {
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
                }
                String fileSolrDocId = solrDocIdentifierFile + fileEntityId;
                if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().PUBLISHED)) {
                    fileSolrDocId = solrDocIdentifierFile + fileEntityId;
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
                    // datafileSolrInputDocument.addField(SearchFields.PERMS, publicGroupString);
                    addDatasetReleaseDateToSolrDoc(datafileSolrInputDocument, dataset);
                } else if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().WORKING_COPY)) {
                    fileSolrDocId = solrDocIdentifierFile + fileEntityId + indexableDataset.getDatasetState().getSuffix();
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
                }
                datafileSolrInputDocument.addField(SearchFields.ID, fileSolrDocId);
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_FRIENDLY, fileMetadata.getDataFile().getFriendlyType());
                datafileSolrInputDocument.addField(SearchFields.FILE_CONTENT_TYPE, fileMetadata.getDataFile().getContentType());
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, fileMetadata.getDataFile().getFriendlyType());
                // For the file type facets, we have a property file that maps mime types
                // to facet-friendly names; "application/fits" should become "FITS", etc.:
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
                datafileSolrInputDocument.addField(SearchFields.FILE_SIZE_IN_BYTES, fileMetadata.getDataFile().getFilesize());
                if (DataFile.ChecksumType.MD5.equals(fileMetadata.getDataFile().getChecksumType())) {
                    /**
                     * @todo Someday we should probably deprecate this
                     * FILE_MD5 in favor of a combination of
                     * FILE_CHECKSUM_TYPE and FILE_CHECKSUM_VALUE.
                     */
                    datafileSolrInputDocument.addField(SearchFields.FILE_MD5, fileMetadata.getDataFile().getChecksumValue());
                }
                datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_TYPE, fileMetadata.getDataFile().getChecksumType().toString());
                datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_VALUE, fileMetadata.getDataFile().getChecksumValue());
                datafileSolrInputDocument.addField(SearchFields.DESCRIPTION, fileMetadata.getDescription());
                datafileSolrInputDocument.addField(SearchFields.FILE_DESCRIPTION, fileMetadata.getDescription());
                datafileSolrInputDocument.addField(SearchFields.UNF, fileMetadata.getDataFile().getUnf());
                datafileSolrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
                // datafileSolrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataFile.getOwner().getOwner().getName());
                // datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, dataFile.getDataset().getTitle());
                datafileSolrInputDocument.addField(SearchFields.PARENT_ID, fileMetadata.getDataFile().getOwner().getId());
                datafileSolrInputDocument.addField(SearchFields.PARENT_IDENTIFIER, fileMetadata.getDataFile().getOwner().getGlobalId());
                datafileSolrInputDocument.addField(SearchFields.PARENT_CITATION, fileMetadata.getDataFile().getOwner().getCitation());
                datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, parentDatasetTitle);
                // names and labels:
                if (fileMetadata.getDataFile().isTabularData()) {
                    List<DataVariable> variables = fileMetadata.getDataFile().getDataTable().getDataVariables();
                    for (DataVariable var : variables) {
                        if (var.getName() != null && !var.getName().equals("")) {
                            datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName());
                        }
                        if (var.getLabel() != null && !var.getLabel().equals("")) {
                            datafileSolrInputDocument.addField(SearchFields.VARIABLE_LABEL, var.getLabel());
                        }
                    }
                    // (not to be confused with the file categories, indexed above!)
                    for (DataFileTag tag : fileMetadata.getDataFile().getTags()) {
                        String tagLabel = tag.getTypeLabel();
                        datafileSolrInputDocument.addField(SearchFields.TABDATA_TAG, tagLabel);
                    }
                }
                if (indexableDataset.isFilesShouldBeIndexed()) {
                    filesIndexed.add(fileSolrDocId);
                    docs.add(datafileSolrInputDocument);
                }
            }
        }
    }
    try {
        solrServer.add(docs);
    } catch (SolrServerException | IOException ex) {
        return ex.toString();
    }
    try {
        solrServer.commit();
    } catch (SolrServerException | IOException ex) {
        return ex.toString();
    }
    Long dsId = dataset.getId();
    // /Dataset updatedDataset = (Dataset)dvObjectService.updateContentIndexTime(dataset);
    // /updatedDataset = null;
    // instead of making a call to dvObjectService, let's try and
    // modify the index time stamp using the local EntityManager:
    DvObject dvObjectToModify = em.find(DvObject.class, dsId);
    dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime()));
    dvObjectToModify = em.merge(dvObjectToModify);
    dvObjectToModify = null;
    // return "indexed dataset " + dataset.getId() + " as " + solrDocId + "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString();
    return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) DvObject(edu.harvard.iq.dataverse.DvObject) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) DatasetVersion(edu.harvard.iq.dataverse.DatasetVersion) DataVariable(edu.harvard.iq.dataverse.datavariable.DataVariable) Timestamp(java.sql.Timestamp) DataFile(edu.harvard.iq.dataverse.DataFile) SolrInputDocument(org.apache.solr.common.SolrInputDocument) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue) Dataset(edu.harvard.iq.dataverse.Dataset) IOException(java.io.IOException) Dataverse(edu.harvard.iq.dataverse.Dataverse) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) SolrServerException(org.apache.solr.client.solrj.SolrServerException) EJBException(javax.ejb.EJBException) IOException(java.io.IOException) Date(java.util.Date) SimpleDateFormat(java.text.SimpleDateFormat) DataFileTag(edu.harvard.iq.dataverse.DataFileTag)

Example 13 with ControlledVocabularyValue

use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.

the class DDIExporterTest method setUp.

@Before
public void setUp() {
    datasetFieldTypeSvc = new MockDatasetFieldSvc();
    DatasetFieldType titleType = datasetFieldTypeSvc.add(new DatasetFieldType("title", FieldType.TEXTBOX, false));
    DatasetFieldType authorType = datasetFieldTypeSvc.add(new DatasetFieldType("author", FieldType.TEXT, true));
    Set<DatasetFieldType> authorChildTypes = new HashSet<>();
    authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorName", FieldType.TEXT, false)));
    authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorAffiliation", FieldType.TEXT, false)));
    authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifier", FieldType.TEXT, false)));
    DatasetFieldType authorIdentifierSchemeType = datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifierScheme", FieldType.TEXT, false));
    authorIdentifierSchemeType.setAllowControlledVocabulary(true);
    authorIdentifierSchemeType.setControlledVocabularyValues(Arrays.asList(// Why aren't these enforced? Should be ORCID, etc.
    new ControlledVocabularyValue(1l, "ark", authorIdentifierSchemeType), new ControlledVocabularyValue(2l, "doi", authorIdentifierSchemeType), new ControlledVocabularyValue(3l, "url", authorIdentifierSchemeType)));
    authorChildTypes.add(datasetFieldTypeSvc.add(authorIdentifierSchemeType));
    for (DatasetFieldType t : authorChildTypes) {
        t.setParentDatasetFieldType(authorType);
    }
    authorType.setChildDatasetFieldTypes(authorChildTypes);
    DatasetFieldType datasetContactType = datasetFieldTypeSvc.add(new DatasetFieldType("datasetContact", FieldType.TEXT, true));
    Set<DatasetFieldType> datasetContactTypes = new HashSet<>();
    datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactEmail", FieldType.TEXT, false)));
    datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactName", FieldType.TEXT, false)));
    datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactAffiliation", FieldType.TEXT, false)));
    for (DatasetFieldType t : datasetContactTypes) {
        t.setParentDatasetFieldType(datasetContactType);
    }
    datasetContactType.setChildDatasetFieldTypes(datasetContactTypes);
    DatasetFieldType dsDescriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("dsDescription", FieldType.TEXT, true));
    Set<DatasetFieldType> dsDescriptionTypes = new HashSet<>();
    dsDescriptionTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("dsDescriptionValue", FieldType.TEXT, false)));
    for (DatasetFieldType t : dsDescriptionTypes) {
        t.setParentDatasetFieldType(dsDescriptionType);
    }
    dsDescriptionType.setChildDatasetFieldTypes(dsDescriptionTypes);
    DatasetFieldType keywordType = datasetFieldTypeSvc.add(new DatasetFieldType("keyword", DatasetFieldType.FieldType.TEXT, true));
    DatasetFieldType descriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("description", DatasetFieldType.FieldType.TEXTBOX, false));
    DatasetFieldType subjectType = datasetFieldTypeSvc.add(new DatasetFieldType("subject", DatasetFieldType.FieldType.TEXT, true));
    subjectType.setAllowControlledVocabulary(true);
    subjectType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "mgmt", subjectType), new ControlledVocabularyValue(2l, "law", subjectType), new ControlledVocabularyValue(3l, "cs", subjectType)));
    DatasetFieldType pubIdType = datasetFieldTypeSvc.add(new DatasetFieldType("publicationIdType", DatasetFieldType.FieldType.TEXT, false));
    pubIdType.setAllowControlledVocabulary(true);
    pubIdType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "ark", pubIdType), new ControlledVocabularyValue(2l, "doi", pubIdType), new ControlledVocabularyValue(3l, "url", pubIdType)));
    DatasetFieldType compoundSingleType = datasetFieldTypeSvc.add(new DatasetFieldType("coordinate", DatasetFieldType.FieldType.TEXT, true));
    Set<DatasetFieldType> childTypes = new HashSet<>();
    childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lat", DatasetFieldType.FieldType.TEXT, false)));
    childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lon", DatasetFieldType.FieldType.TEXT, false)));
    for (DatasetFieldType t : childTypes) {
        t.setParentDatasetFieldType(compoundSingleType);
    }
    compoundSingleType.setChildDatasetFieldTypes(childTypes);
}
Also used : DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) HashSet(java.util.HashSet) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue) Before(org.junit.Before)

Example 14 with ControlledVocabularyValue

use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.

the class SchemaDotOrgExporterTest method setUp.

@Before
public void setUp() {
    datasetFieldTypeSvc = new DDIExporterTest.MockDatasetFieldSvc();
    DatasetFieldType titleType = datasetFieldTypeSvc.add(new DatasetFieldType("title", DatasetFieldType.FieldType.TEXTBOX, false));
    DatasetFieldType authorType = datasetFieldTypeSvc.add(new DatasetFieldType("author", DatasetFieldType.FieldType.TEXT, true));
    Set<DatasetFieldType> authorChildTypes = new HashSet<>();
    authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorName", DatasetFieldType.FieldType.TEXT, false)));
    authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorAffiliation", DatasetFieldType.FieldType.TEXT, false)));
    authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifier", DatasetFieldType.FieldType.TEXT, false)));
    DatasetFieldType authorIdentifierSchemeType = datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifierScheme", DatasetFieldType.FieldType.TEXT, false));
    authorIdentifierSchemeType.setAllowControlledVocabulary(true);
    authorIdentifierSchemeType.setControlledVocabularyValues(Arrays.asList(// Why aren't these enforced? Should be ORCID, etc.
    new ControlledVocabularyValue(1l, "ark", authorIdentifierSchemeType), new ControlledVocabularyValue(2l, "doi", authorIdentifierSchemeType), new ControlledVocabularyValue(3l, "url", authorIdentifierSchemeType)));
    authorChildTypes.add(datasetFieldTypeSvc.add(authorIdentifierSchemeType));
    for (DatasetFieldType t : authorChildTypes) {
        t.setParentDatasetFieldType(authorType);
    }
    authorType.setChildDatasetFieldTypes(authorChildTypes);
    DatasetFieldType datasetContactType = datasetFieldTypeSvc.add(new DatasetFieldType("datasetContact", DatasetFieldType.FieldType.TEXT, true));
    Set<DatasetFieldType> datasetContactTypes = new HashSet<>();
    datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactEmail", DatasetFieldType.FieldType.TEXT, false)));
    datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactName", DatasetFieldType.FieldType.TEXT, false)));
    datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactAffiliation", DatasetFieldType.FieldType.TEXT, false)));
    for (DatasetFieldType t : datasetContactTypes) {
        t.setParentDatasetFieldType(datasetContactType);
    }
    datasetContactType.setChildDatasetFieldTypes(datasetContactTypes);
    DatasetFieldType dsDescriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("dsDescription", DatasetFieldType.FieldType.TEXT, true));
    Set<DatasetFieldType> dsDescriptionTypes = new HashSet<>();
    dsDescriptionTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("dsDescriptionValue", DatasetFieldType.FieldType.TEXT, false)));
    for (DatasetFieldType t : dsDescriptionTypes) {
        t.setParentDatasetFieldType(dsDescriptionType);
    }
    dsDescriptionType.setChildDatasetFieldTypes(dsDescriptionTypes);
    DatasetFieldType keywordType = datasetFieldTypeSvc.add(new DatasetFieldType("keyword", DatasetFieldType.FieldType.TEXT, true));
    DatasetFieldType descriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("description", DatasetFieldType.FieldType.TEXTBOX, false));
    DatasetFieldType subjectType = datasetFieldTypeSvc.add(new DatasetFieldType("subject", DatasetFieldType.FieldType.TEXT, true));
    subjectType.setAllowControlledVocabulary(true);
    subjectType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "mgmt", subjectType), new ControlledVocabularyValue(2l, "law", subjectType), new ControlledVocabularyValue(3l, "cs", subjectType)));
    DatasetFieldType pubIdType = datasetFieldTypeSvc.add(new DatasetFieldType("publicationIdType", DatasetFieldType.FieldType.TEXT, false));
    pubIdType.setAllowControlledVocabulary(true);
    pubIdType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "ark", pubIdType), new ControlledVocabularyValue(2l, "doi", pubIdType), new ControlledVocabularyValue(3l, "url", pubIdType)));
    DatasetFieldType compoundSingleType = datasetFieldTypeSvc.add(new DatasetFieldType("coordinate", DatasetFieldType.FieldType.TEXT, true));
    Set<DatasetFieldType> childTypes = new HashSet<>();
    childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lat", DatasetFieldType.FieldType.TEXT, false)));
    childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lon", DatasetFieldType.FieldType.TEXT, false)));
    for (DatasetFieldType t : childTypes) {
        t.setParentDatasetFieldType(compoundSingleType);
    }
    compoundSingleType.setChildDatasetFieldTypes(childTypes);
}
Also used : DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) HashSet(java.util.HashSet) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue) Before(org.junit.Before)

Example 15 with ControlledVocabularyValue

use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.

the class JsonParser method parseField.

public DatasetField parseField(JsonObject json) throws JsonParseException {
    if (json == null) {
        return null;
    }
    DatasetField ret = new DatasetField();
    DatasetFieldType type = datasetFieldSvc.findByNameOpt(json.getString("typeName", ""));
    if (type == null) {
        throw new JsonParseException("Can't find type '" + json.getString("typeName", "") + "'");
    }
    if (type.isAllowMultiples() != json.getBoolean("multiple")) {
        throw new JsonParseException("incorrect multiple   for field " + json.getString("typeName", ""));
    }
    if (type.isCompound() && !json.getString("typeClass").equals("compound")) {
        throw new JsonParseException("incorrect  typeClass for field " + json.getString("typeName", "") + ", should be compound.");
    }
    if (!type.isControlledVocabulary() && type.isPrimitive() && !json.getString("typeClass").equals("primitive")) {
        throw new JsonParseException("incorrect  typeClass for field: " + json.getString("typeName", "") + ", should be primitive");
    }
    if (type.isControlledVocabulary() && !json.getString("typeClass").equals("controlledVocabulary")) {
        throw new JsonParseException("incorrect  typeClass for field " + json.getString("typeName", "") + ", should be controlledVocabulary");
    }
    ret.setDatasetFieldType(type);
    if (type.isCompound()) {
        List<DatasetFieldCompoundValue> vals = parseCompoundValue(type, json);
        for (DatasetFieldCompoundValue dsfcv : vals) {
            dsfcv.setParentDatasetField(ret);
        }
        ret.setDatasetFieldCompoundValues(vals);
    } else if (type.isControlledVocabulary()) {
        List<ControlledVocabularyValue> vals = parseControlledVocabularyValue(type, json);
        for (ControlledVocabularyValue cvv : vals) {
            cvv.setDatasetFieldType(type);
        }
        ret.setControlledVocabularyValues(vals);
    } else {
        // primitive
        List<DatasetFieldValue> values = parsePrimitiveValue(json);
        for (DatasetFieldValue val : values) {
            val.setDatasetField(ret);
        }
        ret.setDatasetFieldValues(values);
    }
    return ret;
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) DatasetFieldValue(edu.harvard.iq.dataverse.DatasetFieldValue) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) DatasetFieldCompoundValue(edu.harvard.iq.dataverse.DatasetFieldCompoundValue) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue)

Aggregations

ControlledVocabularyValue (edu.harvard.iq.dataverse.ControlledVocabularyValue)16 DatasetFieldType (edu.harvard.iq.dataverse.DatasetFieldType)12 DatasetField (edu.harvard.iq.dataverse.DatasetField)7 DatasetFieldCompoundValue (edu.harvard.iq.dataverse.DatasetFieldCompoundValue)5 DatasetFieldValue (edu.harvard.iq.dataverse.DatasetFieldValue)4 ArrayList (java.util.ArrayList)4 HashSet (java.util.HashSet)4 Before (org.junit.Before)4 LinkedList (java.util.LinkedList)3 Dataverse (edu.harvard.iq.dataverse.Dataverse)2 IOException (java.io.IOException)2 List (java.util.List)2 GET (javax.ws.rs.GET)2 Path (javax.ws.rs.Path)2 SolrServerException (org.apache.solr.client.solrj.SolrServerException)2 SolrInputDocument (org.apache.solr.common.SolrInputDocument)2 ControlledVocabAlternate (edu.harvard.iq.dataverse.ControlledVocabAlternate)1 DataFile (edu.harvard.iq.dataverse.DataFile)1 DataFileTag (edu.harvard.iq.dataverse.DataFileTag)1 Dataset (edu.harvard.iq.dataverse.Dataset)1