Search in sources :

Example 1 with ControlledVocabularyValue

use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.

the class IndexServiceBean method indexDataverse.

public Future<String> indexDataverse(Dataverse dataverse) {
    logger.fine("indexDataverse called on dataverse id " + dataverse.getId() + "(" + dataverse.getAlias() + ")");
    if (dataverse.getId() == null) {
        String msg = "unable to index dataverse. id was null (alias: " + dataverse.getAlias() + ")";
        logger.info(msg);
        return new AsyncResult<>(msg);
    }
    Dataverse rootDataverse = findRootDataverseCached();
    if (rootDataverse == null) {
        String msg = "Could not find root dataverse and the root dataverse should not be indexed. Returning.";
        return new AsyncResult<>(msg);
    } else if (dataverse.getId() == rootDataverse.getId()) {
        String msg = "The root dataverse should not be indexed. Returning.";
        return new AsyncResult<>(msg);
    }
    Collection<SolrInputDocument> docs = new ArrayList<>();
    SolrInputDocument solrInputDocument = new SolrInputDocument();
    solrInputDocument.addField(SearchFields.ID, solrDocIdentifierDataverse + dataverse.getId());
    solrInputDocument.addField(SearchFields.ENTITY_ID, dataverse.getId());
    solrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, systemConfig.getVersion());
    solrInputDocument.addField(SearchFields.IDENTIFIER, dataverse.getAlias());
    solrInputDocument.addField(SearchFields.TYPE, "dataverses");
    solrInputDocument.addField(SearchFields.NAME, dataverse.getName());
    solrInputDocument.addField(SearchFields.NAME_SORT, dataverse.getName());
    solrInputDocument.addField(SearchFields.DATAVERSE_NAME, dataverse.getName());
    solrInputDocument.addField(SearchFields.DATAVERSE_CATEGORY, dataverse.getIndexableCategoryName());
    if (dataverse.isReleased()) {
        solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
        solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataverse.getPublicationDate());
        solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(dataverse.getPublicationDate()));
    } else {
        solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
        solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataverse.getCreateDate());
        solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(dataverse.getCreateDate()));
    }
    /* We don't really have harvested dataverses yet; 
           (I have in fact just removed the isHarvested() method from the Dataverse object) -- L.A.
        if (dataverse.isHarvested()) {
            solrInputDocument.addField(SearchFields.IS_HARVESTED, true);
            solrInputDocument.addField(SearchFields.SOURCE, HARVESTED);
        } else { (this means that all dataverses are "local" - should this be removed? */
    solrInputDocument.addField(SearchFields.IS_HARVESTED, false);
    // rootDataverseName);
    solrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
    /*}*/
    addDataverseReleaseDateToSolrDoc(solrInputDocument, dataverse);
    // if (dataverse.getOwner() != null) {
    // solrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataverse.getOwner().getName());
    // }
    solrInputDocument.addField(SearchFields.DESCRIPTION, StringUtil.html2text(dataverse.getDescription()));
    solrInputDocument.addField(SearchFields.DATAVERSE_DESCRIPTION, StringUtil.html2text(dataverse.getDescription()));
    // logger.info("dataverse affiliation: " + dataverse.getAffiliation());
    if (dataverse.getAffiliation() != null && !dataverse.getAffiliation().isEmpty()) {
        /**
         * @todo: stop using affiliation as category
         */
        // solrInputDocument.addField(SearchFields.CATEGORY, dataverse.getAffiliation());
        solrInputDocument.addField(SearchFields.AFFILIATION, dataverse.getAffiliation());
        solrInputDocument.addField(SearchFields.DATAVERSE_AFFILIATION, dataverse.getAffiliation());
    }
    for (ControlledVocabularyValue dataverseSubject : dataverse.getDataverseSubjects()) {
        String subject = dataverseSubject.getStrValue();
        if (!subject.equals(DatasetField.NA_VALUE)) {
            solrInputDocument.addField(SearchFields.DATAVERSE_SUBJECT, subject);
            // collapse into shared "subject" field used as a facet
            solrInputDocument.addField(SearchFields.SUBJECT, subject);
        }
    }
    // checking for NPE is important so we can create the root dataverse
    if (rootDataverse != null && !dataverse.equals(rootDataverse)) {
        // important when creating root dataverse
        if (dataverse.getOwner() != null) {
            solrInputDocument.addField(SearchFields.PARENT_ID, dataverse.getOwner().getId());
            solrInputDocument.addField(SearchFields.PARENT_NAME, dataverse.getOwner().getName());
        }
    }
    List<String> dataversePathSegmentsAccumulator = new ArrayList<>();
    List<String> dataverseSegments = findPathSegments(dataverse, dataversePathSegmentsAccumulator);
    List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments);
    if (dataversePaths.size() > 0) {
        // don't show yourself while indexing or in search results: https://redmine.hmdc.harvard.edu/issues/3613
        // logger.info(dataverse.getName() + " size " + dataversePaths.size());
        dataversePaths.remove(dataversePaths.size() - 1);
    }
    // Add paths for linking dataverses
    for (Dataverse linkingDataverse : dvLinkingService.findLinkingDataverses(dataverse.getId())) {
        List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>();
        List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator);
        List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments);
        for (String dvPath : linkingDataversePaths) {
            dataversePaths.add(dvPath);
        }
    }
    solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
    docs.add(solrInputDocument);
    String status;
    try {
        if (dataverse.getId() != null) {
            solrServer.add(docs);
        } else {
            logger.info("WARNING: indexing of a dataverse with no id attempted");
        }
    } catch (SolrServerException | IOException ex) {
        status = ex.toString();
        logger.info(status);
        return new AsyncResult<>(status);
    }
    try {
        solrServer.commit();
    } catch (SolrServerException | IOException ex) {
        status = ex.toString();
        logger.info(status);
        return new AsyncResult<>(status);
    }
    dvObjectService.updateContentIndexTime(dataverse);
    IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dataverse);
    String msg = "indexed dataverse " + dataverse.getId() + ":" + dataverse.getAlias() + ". Response from permission indexing: " + indexResponse.getMessage();
    return new AsyncResult<>(msg);
}
Also used : SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Dataverse(edu.harvard.iq.dataverse.Dataverse) SolrInputDocument(org.apache.solr.common.SolrInputDocument) AsyncResult(javax.ejb.AsyncResult) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue)

Example 2 with ControlledVocabularyValue

use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.

the class DatasetFieldServiceApi method loadNAControlledVocabularyValue.

// TODO consider replacing with a @Startup method on the datasetFieldServiceBean
@GET
@Path("loadNAControlledVocabularyValue")
public Response loadNAControlledVocabularyValue() {
    // the find will throw a javax.persistence.NoResultException if no values are in db
    // datasetFieldService.findNAControlledVocabularyValue();
    TypedQuery<ControlledVocabularyValue> naValueFinder = em.createQuery("SELECT OBJECT(o) FROM ControlledVocabularyValue AS o WHERE o.datasetFieldType is null AND o.strValue = :strvalue", ControlledVocabularyValue.class);
    naValueFinder.setParameter("strvalue", DatasetField.NA_VALUE);
    if (naValueFinder.getResultList().isEmpty()) {
        ControlledVocabularyValue naValue = new ControlledVocabularyValue();
        naValue.setStrValue(DatasetField.NA_VALUE);
        datasetFieldService.save(naValue);
        return ok("NA value created.");
    } else {
        return ok("NA value exists.");
    }
}
Also used : ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 3 with ControlledVocabularyValue

use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.

the class DatasetFieldServiceApi method parseControlledVocabulary.

private String parseControlledVocabulary(String[] values) {
    DatasetFieldType dsv = datasetFieldService.findByName(values[1]);
    // See if it already exists
    /*
         Matching relies on assumption that only one cv value will exist for a given identifier or display value
        If the lookup queries return multiple matches then retval is null 
        */
    // First see if cvv exists based on display name
    ControlledVocabularyValue cvv = datasetFieldService.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(dsv, values[2], true);
    // then see if there's a match on identifier
    ControlledVocabularyValue cvvi = null;
    if (values[3] != null && !values[3].trim().isEmpty()) {
        cvvi = datasetFieldService.findControlledVocabularyValueByDatasetFieldTypeAndIdentifier(dsv, values[3]);
    }
    // if there's a match on identifier use it
    if (cvvi != null) {
        cvv = cvvi;
    }
    // if there's no match create a new one
    if (cvv == null) {
        cvv = new ControlledVocabularyValue();
        cvv.setDatasetFieldType(dsv);
        // Alt is only for dataload so only add to new
        for (int i = 5; i < values.length; i++) {
            ControlledVocabAlternate alt = new ControlledVocabAlternate();
            alt.setDatasetFieldType(dsv);
            alt.setControlledVocabularyValue(cvv);
            alt.setStrValue(values[i]);
            cvv.getControlledVocabAlternates().add(alt);
        }
    }
    cvv.setStrValue(values[2]);
    cvv.setIdentifier(values[3]);
    cvv.setDisplayOrder(Integer.parseInt(values[4]));
    datasetFieldService.save(cvv);
    return cvv.getStrValue();
}
Also used : ControlledVocabAlternate(edu.harvard.iq.dataverse.ControlledVocabAlternate) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue)

Example 4 with ControlledVocabularyValue

use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.

the class JsonPrinterTest method setUp.

@Before
public void setUp() {
    datasetFieldTypeSvc = new JsonParserTest.MockDatasetFieldSvc();
    DatasetFieldType titleType = datasetFieldTypeSvc.add(new DatasetFieldType("title", FieldType.TEXTBOX, false));
    DatasetFieldType authorType = datasetFieldTypeSvc.add(new DatasetFieldType("author", FieldType.TEXT, true));
    Set<DatasetFieldType> authorChildTypes = new HashSet<>();
    authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorName", FieldType.TEXT, false)));
    authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorAffiliation", FieldType.TEXT, false)));
    authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifier", FieldType.TEXT, false)));
    DatasetFieldType authorIdentifierSchemeType = datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifierScheme", FieldType.TEXT, false));
    authorIdentifierSchemeType.setAllowControlledVocabulary(true);
    authorIdentifierSchemeType.setControlledVocabularyValues(Arrays.asList(// FIXME: Why aren't these enforced? Should be ORCID, etc.
    new ControlledVocabularyValue(1l, "foo", authorIdentifierSchemeType), new ControlledVocabularyValue(2l, "bar", authorIdentifierSchemeType), new ControlledVocabularyValue(3l, "baz", authorIdentifierSchemeType)));
    authorChildTypes.add(datasetFieldTypeSvc.add(authorIdentifierSchemeType));
    for (DatasetFieldType t : authorChildTypes) {
        t.setParentDatasetFieldType(authorType);
    }
    authorType.setChildDatasetFieldTypes(authorChildTypes);
    DatasetFieldType datasetContactType = datasetFieldTypeSvc.add(new DatasetFieldType("datasetContact", FieldType.TEXT, true));
    Set<DatasetFieldType> datasetContactTypes = new HashSet<>();
    datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType(DatasetFieldConstant.datasetContactEmail, FieldType.EMAIL, false)));
    datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactName", FieldType.TEXT, false)));
    datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactAffiliation", FieldType.TEXT, false)));
    for (DatasetFieldType t : datasetContactTypes) {
        t.setParentDatasetFieldType(datasetContactType);
    }
    datasetContactType.setChildDatasetFieldTypes(datasetContactTypes);
    DatasetFieldType keywordType = datasetFieldTypeSvc.add(new DatasetFieldType("keyword", DatasetFieldType.FieldType.TEXT, true));
    DatasetFieldType descriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("description", DatasetFieldType.FieldType.TEXTBOX, false));
    DatasetFieldType subjectType = datasetFieldTypeSvc.add(new DatasetFieldType("subject", DatasetFieldType.FieldType.TEXT, true));
    subjectType.setAllowControlledVocabulary(true);
    subjectType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "mgmt", subjectType), new ControlledVocabularyValue(2l, "law", subjectType), new ControlledVocabularyValue(3l, "cs", subjectType)));
    DatasetFieldType pubIdType = datasetFieldTypeSvc.add(new DatasetFieldType("publicationIdType", DatasetFieldType.FieldType.TEXT, false));
    pubIdType.setAllowControlledVocabulary(true);
    pubIdType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "ark", pubIdType), new ControlledVocabularyValue(2l, "doi", pubIdType), new ControlledVocabularyValue(3l, "url", pubIdType)));
    DatasetFieldType compoundSingleType = datasetFieldTypeSvc.add(new DatasetFieldType("coordinate", DatasetFieldType.FieldType.TEXT, true));
    Set<DatasetFieldType> childTypes = new HashSet<>();
    childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lat", DatasetFieldType.FieldType.TEXT, false)));
    childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lon", DatasetFieldType.FieldType.TEXT, false)));
    for (DatasetFieldType t : childTypes) {
        t.setParentDatasetFieldType(compoundSingleType);
    }
    compoundSingleType.setChildDatasetFieldTypes(childTypes);
// settingsSvc = new JsonParserTest.MockSettingsSvc();
// jsonPrinter = new JsonPrinter(settingsSvc);
}
Also used : DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) HashSet(java.util.HashSet) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue) Before(org.junit.Before)

Example 5 with ControlledVocabularyValue

use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.

the class JsonParserTest method setUp.

@Before
public void setUp() {
    datasetFieldTypeSvc = new MockDatasetFieldSvc();
    keywordType = datasetFieldTypeSvc.add(new DatasetFieldType("keyword", FieldType.TEXT, true));
    descriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("description", FieldType.TEXTBOX, false));
    subjectType = datasetFieldTypeSvc.add(new DatasetFieldType("subject", FieldType.TEXT, true));
    subjectType.setAllowControlledVocabulary(true);
    subjectType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "mgmt", subjectType), new ControlledVocabularyValue(2l, "law", subjectType), new ControlledVocabularyValue(3l, "cs", subjectType)));
    pubIdType = datasetFieldTypeSvc.add(new DatasetFieldType("publicationIdType", FieldType.TEXT, false));
    pubIdType.setAllowControlledVocabulary(true);
    pubIdType.setControlledVocabularyValues(Arrays.asList(new ControlledVocabularyValue(1l, "ark", pubIdType), new ControlledVocabularyValue(2l, "doi", pubIdType), new ControlledVocabularyValue(3l, "url", pubIdType)));
    compoundSingleType = datasetFieldTypeSvc.add(new DatasetFieldType("coordinate", FieldType.TEXT, true));
    Set<DatasetFieldType> childTypes = new HashSet<>();
    childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lat", FieldType.TEXT, false)));
    childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lon", FieldType.TEXT, false)));
    for (DatasetFieldType t : childTypes) {
        t.setParentDatasetFieldType(compoundSingleType);
    }
    compoundSingleType.setChildDatasetFieldTypes(childTypes);
    settingsSvc = new MockSettingsSvc();
    sut = new JsonParser(datasetFieldTypeSvc, null, settingsSvc);
}
Also used : DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue) HashSet(java.util.HashSet) Before(org.junit.Before)

Aggregations

ControlledVocabularyValue (edu.harvard.iq.dataverse.ControlledVocabularyValue)16 DatasetFieldType (edu.harvard.iq.dataverse.DatasetFieldType)12 DatasetField (edu.harvard.iq.dataverse.DatasetField)7 DatasetFieldCompoundValue (edu.harvard.iq.dataverse.DatasetFieldCompoundValue)5 DatasetFieldValue (edu.harvard.iq.dataverse.DatasetFieldValue)4 ArrayList (java.util.ArrayList)4 HashSet (java.util.HashSet)4 Before (org.junit.Before)4 LinkedList (java.util.LinkedList)3 Dataverse (edu.harvard.iq.dataverse.Dataverse)2 IOException (java.io.IOException)2 List (java.util.List)2 GET (javax.ws.rs.GET)2 Path (javax.ws.rs.Path)2 SolrServerException (org.apache.solr.client.solrj.SolrServerException)2 SolrInputDocument (org.apache.solr.common.SolrInputDocument)2 ControlledVocabAlternate (edu.harvard.iq.dataverse.ControlledVocabAlternate)1 DataFile (edu.harvard.iq.dataverse.DataFile)1 DataFileTag (edu.harvard.iq.dataverse.DataFileTag)1 Dataset (edu.harvard.iq.dataverse.Dataset)1