Search in sources :

Example 31 with DatasetFieldType

use of edu.harvard.iq.dataverse.DatasetFieldType in project dataverse by IQSS.

the class IndexServiceBean method getTopicClassificationTermOrTermAndVocabulary.

/**
 * If the "Topic Classification" has a "Vocabulary", return both the "Term"
 * and the "Vocabulary" with the latter in parentheses. For example, the
 * Murray Research Archive uses "1 (Generations)" and "yes (Follow-up
 * permitted)".
 */
private String getTopicClassificationTermOrTermAndVocabulary(DatasetField topicClassDatasetField) {
    String finalValue = null;
    String topicClassVocab = null;
    String topicClassValue = null;
    for (DatasetField sibling : topicClassDatasetField.getParentDatasetFieldCompoundValue().getChildDatasetFields()) {
        DatasetFieldType datasetFieldType = sibling.getDatasetFieldType();
        String name = datasetFieldType.getName();
        if (name.equals(DatasetFieldConstant.topicClassVocab)) {
            topicClassVocab = sibling.getDisplayValue();
        } else if (name.equals(DatasetFieldConstant.topicClassValue)) {
            topicClassValue = sibling.getDisplayValue();
        }
        if (topicClassValue != null) {
            if (topicClassVocab != null) {
                finalValue = topicClassValue + " (" + topicClassVocab + ")";
            } else {
                finalValue = topicClassValue;
            }
        }
    }
    return finalValue;
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType)

Example 32 with DatasetFieldType

use of edu.harvard.iq.dataverse.DatasetFieldType in project dataverse by IQSS.

the class ForeignMetadataImportServiceBean method processXMLElement.

private void processXMLElement(XMLStreamReader xmlr, String currentPath, String openingTag, ForeignMetadataFormatMapping foreignFormatMapping, DatasetVersion datasetVersion) throws XMLStreamException {
    logger.fine("entering processXMLElement; (" + currentPath + ")");
    for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) {
        if (event == XMLStreamConstants.START_ELEMENT) {
            String currentElement = xmlr.getLocalName();
            ForeignMetadataFieldMapping mappingDefined = datasetfieldService.findFieldMapping(foreignFormatMapping.getName(), currentPath + currentElement);
            if (mappingDefined != null) {
                DatasetFieldCompoundValue cachedCompoundValue = null;
                for (ForeignMetadataFieldMapping childMapping : mappingDefined.getChildFieldMappings()) {
                    if (childMapping.isAttribute()) {
                        String attributeName = childMapping.getForeignFieldXPath();
                        String attributeValue = xmlr.getAttributeValue(null, attributeName);
                        if (attributeValue != null) {
                            String mappedFieldName = childMapping.getDatasetfieldName();
                            logger.fine("looking up dataset field " + mappedFieldName);
                            DatasetFieldType mappedFieldType = datasetfieldService.findByNameOpt(mappedFieldName);
                            if (mappedFieldType != null) {
                                try {
                                    cachedCompoundValue = createDatasetFieldValue(mappedFieldType, cachedCompoundValue, attributeValue, datasetVersion);
                                } catch (Exception ex) {
                                    logger.warning("Caught unknown exception when processing attribute " + currentPath + currentElement + "{" + attributeName + "} (skipping);");
                                }
                            } else {
                                throw new EJBException("Bad foreign metadata field mapping: no such DatasetField " + mappedFieldName + "!");
                            }
                        }
                    }
                }
                // Process the payload of this XML element:
                String dataverseFieldName = mappingDefined.getDatasetfieldName();
                if (dataverseFieldName != null && !dataverseFieldName.equals("")) {
                    DatasetFieldType dataverseFieldType = datasetfieldService.findByNameOpt(dataverseFieldName);
                    if (dataverseFieldType != null) {
                        String elementTextPayload = parseText(xmlr);
                        createDatasetFieldValue(dataverseFieldType, cachedCompoundValue, elementTextPayload, datasetVersion);
                    } else {
                        throw new EJBException("Bad foreign metadata field mapping: no such DatasetField " + dataverseFieldName + "!");
                    }
                }
            } else {
                // recursively, process the xml stream further down:
                processXMLElement(xmlr, currentPath + currentElement + ":", currentElement, foreignFormatMapping, datasetVersion);
            }
        } else if (event == XMLStreamConstants.END_ELEMENT) {
            if (xmlr.getLocalName().equals(openingTag))
                return;
        }
    }
}
Also used : EJBException(javax.ejb.EJBException) DatasetFieldCompoundValue(edu.harvard.iq.dataverse.DatasetFieldCompoundValue) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) NoResultException(javax.persistence.NoResultException) XMLStreamException(javax.xml.stream.XMLStreamException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) EJBException(javax.ejb.EJBException) ForeignMetadataFieldMapping(edu.harvard.iq.dataverse.ForeignMetadataFieldMapping)

Example 33 with DatasetFieldType

use of edu.harvard.iq.dataverse.DatasetFieldType in project dataverse by IQSS.

the class ForeignMetadataImportServiceBean method createDatasetFieldValue.

private DatasetFieldCompoundValue createDatasetFieldValue(DatasetFieldType dsft, DatasetFieldCompoundValue savedCompoundValue, String elementText, DatasetVersion datasetVersion) {
    if (dsft.isPrimitive()) {
        if (!dsft.isHasParent()) {
            // simple primitive:
            DatasetField dsf = null;
            for (DatasetField existingDsf : datasetVersion.getFlatDatasetFields()) {
                if (existingDsf.getDatasetFieldType().equals(dsft)) {
                    dsf = existingDsf;
                }
            }
            // if doesn't exist, create a new one:
            if (dsf == null) {
                dsf = new DatasetField();
                dsf.setDatasetFieldType(dsft);
                datasetVersion.getDatasetFields().add(dsf);
                dsf.setDatasetVersion(datasetVersion);
            }
            String dsfName = dsft.getName();
            if (!dsft.isControlledVocabulary()) {
                logger.fine("Creating a new value for field " + dsfName + ": " + elementText);
                DatasetFieldValue newDsfv = new DatasetFieldValue(dsf);
                newDsfv.setValue(elementText);
                dsf.getDatasetFieldValues().add(newDsfv);
            } else {
            // A controlled vocabulary entry:
            // first, let's see if it's a legit control vocab. entry:
            /* not supported yet; though I expect the commented-out code
                       below to work;
                    ControlledVocabularyValue legitControlledVocabularyValue = null;
                    Collection<ControlledVocabularyValue> definedVocabularyValues = dsft.getControlledVocabularyValues();
                    if (definedVocabularyValues != null) {
                        for (ControlledVocabularyValue definedVocabValue : definedVocabularyValues) {
                            if (elementText.equals(definedVocabValue.getStrValue())) {
                                logger.fine("Yes, " + elementText + " is a valid controlled vocabulary value for the field " + dsfName);
                                legitControlledVocabularyValue = definedVocabValue;
                                break;
                            }
                        }
                    }
                    if (legitControlledVocabularyValue != null) {
                        logger.fine("Adding controlled vocabulary value " + elementText + " to field " + dsfName);
                        dsf.getControlledVocabularyValues().add(legitControlledVocabularyValue);
                    }
                    */
            }
            // No compound values had to be created; returning null:
            return null;
        } else {
            // a primitive that is part of a compound value:
            // first, let's create the field and the value, for the
            // primitive node itself:
            DatasetField childField = new DatasetField();
            childField.setDatasetFieldType(dsft);
            DatasetFieldValue childValue = new DatasetFieldValue(childField);
            childValue.setValue(elementText);
            childField.getDatasetFieldValues().add(childValue);
            // see if a compound value of the right type has already been
            // created and passed to us:
            DatasetFieldCompoundValue parentCompoundValue = null;
            DatasetFieldType parentFieldType = dsft.getParentDatasetFieldType();
            if (parentFieldType == null) {
                logger.severe("Child field type with no parent field type defined!");
                // could just skip this field and try to continue - ?
                return null;
            }
            if (savedCompoundValue != null) {
                if (parentFieldType.equals(savedCompoundValue.getParentDatasetField().getDatasetFieldType())) {
                    parentCompoundValue = savedCompoundValue;
                }
            }
            if (parentCompoundValue == null) {
                // and to do that, we need to find or create the "parent"
                // dataset field for this compoound value:
                // (I put quotes around "parent", because I really feel it
                // is a misnomer, and that the relationship between the compound value
                // and the corresponding dataset field should be called
                // "CompoundDatasetField", not "ParentDatasetField") (discuss?)
                DatasetField parentField = null;
                for (DatasetField existingDsf : datasetVersion.getFlatDatasetFields()) {
                    if (existingDsf.getDatasetFieldType().equals(parentFieldType)) {
                        parentField = existingDsf;
                    }
                }
                // if doesn't exist, create a new one:
                if (parentField == null) {
                    parentField = new DatasetField();
                    parentField.setDatasetFieldType(parentFieldType);
                    datasetVersion.getDatasetFields().add(parentField);
                    parentField.setDatasetVersion(datasetVersion);
                }
                // and then create new compound value:
                parentCompoundValue = new DatasetFieldCompoundValue();
                parentCompoundValue.setParentDatasetField(parentField);
                parentField.getDatasetFieldCompoundValues().add(parentCompoundValue);
            }
            childField.setParentDatasetFieldCompoundValue(parentCompoundValue);
            parentCompoundValue.getChildDatasetFields().add(childField);
            return parentCompoundValue;
        }
    }
    return null;
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) DatasetFieldValue(edu.harvard.iq.dataverse.DatasetFieldValue) DatasetFieldCompoundValue(edu.harvard.iq.dataverse.DatasetFieldCompoundValue) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType)

Example 34 with DatasetFieldType

use of edu.harvard.iq.dataverse.DatasetFieldType in project dataverse by IQSS.

the class IndexServiceBean method addOrUpdateDataset.

private String addOrUpdateDataset(IndexableDataset indexableDataset) {
    IndexableDataset.DatasetState state = indexableDataset.getDatasetState();
    Dataset dataset = indexableDataset.getDatasetVersion().getDataset();
    logger.fine("adding or updating Solr document for dataset id " + dataset.getId());
    Collection<SolrInputDocument> docs = new ArrayList<>();
    List<String> dataversePathSegmentsAccumulator = new ArrayList<>();
    List<String> dataverseSegments = new ArrayList<>();
    try {
        dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator);
    } catch (Exception ex) {
        logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex);
    }
    List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments);
    // Add Paths for linking dataverses
    for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) {
        List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>();
        List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator);
        List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments);
        for (String dvPath : linkingDataversePaths) {
            dataversePaths.add(dvPath);
        }
    }
    SolrInputDocument solrInputDocument = new SolrInputDocument();
    String datasetSolrDocId = indexableDataset.getSolrDocId();
    solrInputDocument.addField(SearchFields.ID, datasetSolrDocId);
    solrInputDocument.addField(SearchFields.ENTITY_ID, dataset.getId());
    String dataverseVersion = systemConfig.getVersion();
    solrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
    solrInputDocument.addField(SearchFields.IDENTIFIER, dataset.getGlobalId());
    solrInputDocument.addField(SearchFields.DATASET_PERSISTENT_ID, dataset.getGlobalId());
    solrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
    solrInputDocument.addField(SearchFields.TYPE, "datasets");
    Date datasetSortByDate = new Date();
    Date majorVersionReleaseDate = dataset.getMostRecentMajorVersionReleaseDate();
    if (majorVersionReleaseDate != null) {
        if (true) {
            String msg = "major release date found: " + majorVersionReleaseDate.toString();
            logger.fine(msg);
        }
        datasetSortByDate = majorVersionReleaseDate;
    } else {
        if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.WORKING_COPY)) {
            solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
        } else if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.DEACCESSIONED)) {
            solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DEACCESSIONED_STRING);
        }
        Date createDate = dataset.getCreateDate();
        if (createDate != null) {
            if (true) {
                String msg = "can't find major release date, using create date: " + createDate;
                logger.fine(msg);
            }
            datasetSortByDate = createDate;
        } else {
            String msg = "can't find major release date or create date, using \"now\"";
            logger.info(msg);
            datasetSortByDate = new Date();
        }
    }
    solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, datasetSortByDate);
    solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(datasetSortByDate));
    if (state.equals(indexableDataset.getDatasetState().PUBLISHED)) {
        solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
    // solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataset.getPublicationDate());
    } else if (state.equals(indexableDataset.getDatasetState().WORKING_COPY)) {
        solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
    }
    addDatasetReleaseDateToSolrDoc(solrInputDocument, dataset);
    if (dataset.isHarvested()) {
        solrInputDocument.addField(SearchFields.IS_HARVESTED, true);
        solrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
    } else {
        solrInputDocument.addField(SearchFields.IS_HARVESTED, false);
        // rootDataverseName);
        solrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
    }
    DatasetVersion datasetVersion = indexableDataset.getDatasetVersion();
    String parentDatasetTitle = "TBD";
    if (datasetVersion != null) {
        solrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
        solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(false));
        solrInputDocument.addField(SearchFields.DATASET_CITATION_HTML, datasetVersion.getCitation(true));
        if (datasetVersion.isInReview()) {
            solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
        }
        for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) {
            DatasetFieldType dsfType = dsf.getDatasetFieldType();
            String solrFieldSearchable = dsfType.getSolrField().getNameSearchable();
            String solrFieldFacetable = dsfType.getSolrField().getNameFacetable();
            if (dsf.getValues() != null && !dsf.getValues().isEmpty() && dsf.getValues().get(0) != null && solrFieldSearchable != null) {
                logger.fine("indexing " + dsf.getDatasetFieldType().getName() + ":" + dsf.getValues() + " into " + solrFieldSearchable + " and maybe " + solrFieldFacetable);
                // if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.INTEGER)) {
                if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.EMAIL)) {
                // no-op. we want to keep email address out of Solr per https://github.com/IQSS/dataverse/issues/759
                } else if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.DATE)) {
                    String dateAsString = dsf.getValues().get(0);
                    logger.fine("date as string: " + dateAsString);
                    if (dateAsString != null && !dateAsString.isEmpty()) {
                        SimpleDateFormat inputDateyyyy = new SimpleDateFormat("yyyy", Locale.ENGLISH);
                        try {
                            /**
                             * @todo when bean validation is working we
                             * won't have to convert strings into dates
                             */
                            logger.fine("Trying to convert " + dateAsString + " to a YYYY date from dataset " + dataset.getId());
                            Date dateAsDate = inputDateyyyy.parse(dateAsString);
                            SimpleDateFormat yearOnly = new SimpleDateFormat("yyyy");
                            String datasetFieldFlaggedAsDate = yearOnly.format(dateAsDate);
                            logger.fine("YYYY only: " + datasetFieldFlaggedAsDate);
                            // solrInputDocument.addField(solrFieldSearchable, Integer.parseInt(datasetFieldFlaggedAsDate));
                            solrInputDocument.addField(solrFieldSearchable, datasetFieldFlaggedAsDate);
                            if (dsfType.getSolrField().isFacetable()) {
                                // solrInputDocument.addField(solrFieldFacetable, Integer.parseInt(datasetFieldFlaggedAsDate));
                                solrInputDocument.addField(solrFieldFacetable, datasetFieldFlaggedAsDate);
                            }
                        } catch (Exception ex) {
                            logger.info("unable to convert " + dateAsString + " into YYYY format and couldn't index it (" + dsfType.getName() + ")");
                        }
                    }
                } else {
                    if (dsf.getDatasetFieldType().getName().equals("authorAffiliation")) {
                        /**
                         * @todo think about how to tie the fact that this
                         * needs to be multivalued (_ss) because a
                         * multivalued facet (authorAffilition_ss) is being
                         * collapsed into here at index time. The business
                         * logic to determine if a data-driven metadata
                         * field should be indexed into Solr as a single or
                         * multiple value lives in the getSolrField() method
                         * of DatasetField.java
                         */
                        solrInputDocument.addField(SearchFields.AFFILIATION, dsf.getValuesWithoutNaValues());
                    } else if (dsf.getDatasetFieldType().getName().equals("title")) {
                        // datasets have titles not names but index title under name as well so we can sort datasets by name along dataverses and files
                        List<String> possibleTitles = dsf.getValues();
                        String firstTitle = possibleTitles.get(0);
                        if (firstTitle != null) {
                            parentDatasetTitle = firstTitle;
                        }
                        solrInputDocument.addField(SearchFields.NAME_SORT, dsf.getValues());
                    }
                    if (dsfType.isControlledVocabulary()) {
                        for (ControlledVocabularyValue controlledVocabularyValue : dsf.getControlledVocabularyValues()) {
                            if (controlledVocabularyValue.getStrValue().equals(DatasetField.NA_VALUE)) {
                                continue;
                            }
                            solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getStrValue());
                            if (dsfType.getSolrField().isFacetable()) {
                                solrInputDocument.addField(solrFieldFacetable, controlledVocabularyValue.getStrValue());
                            }
                        }
                    } else if (dsfType.getFieldType().equals(DatasetFieldType.FieldType.TEXTBOX)) {
                        // strip HTML
                        List<String> htmlFreeText = StringUtil.htmlArray2textArray(dsf.getValuesWithoutNaValues());
                        solrInputDocument.addField(solrFieldSearchable, htmlFreeText);
                        if (dsfType.getSolrField().isFacetable()) {
                            solrInputDocument.addField(solrFieldFacetable, htmlFreeText);
                        }
                    } else {
                        // do not strip HTML
                        solrInputDocument.addField(solrFieldSearchable, dsf.getValuesWithoutNaValues());
                        if (dsfType.getSolrField().isFacetable()) {
                            if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassValue)) {
                                String topicClassificationTerm = getTopicClassificationTermOrTermAndVocabulary(dsf);
                                if (topicClassificationTerm != null) {
                                    logger.fine(solrFieldFacetable + " gets " + topicClassificationTerm);
                                    solrInputDocument.addField(solrFieldFacetable, topicClassificationTerm);
                                }
                            } else {
                                solrInputDocument.addField(solrFieldFacetable, dsf.getValuesWithoutNaValues());
                            }
                        }
                    }
                }
            }
        }
    }
    solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
    // solrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataset.getOwner().getName());
    solrInputDocument.addField(SearchFields.PARENT_ID, dataset.getOwner().getId());
    solrInputDocument.addField(SearchFields.PARENT_NAME, dataset.getOwner().getName());
    if (state.equals(indexableDataset.getDatasetState().DEACCESSIONED)) {
        String deaccessionNote = datasetVersion.getVersionNote();
        if (deaccessionNote != null) {
            solrInputDocument.addField(SearchFields.DATASET_DEACCESSION_REASON, deaccessionNote);
        }
    }
    docs.add(solrInputDocument);
    List<String> filesIndexed = new ArrayList<>();
    if (datasetVersion != null) {
        List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas();
        boolean checkForDuplicateMetadata = false;
        if (datasetVersion.isDraft() && dataset.isReleased() && dataset.getReleasedVersion() != null) {
            checkForDuplicateMetadata = true;
            logger.fine("We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions.");
        }
        for (FileMetadata fileMetadata : fileMetadatas) {
            boolean indexThisMetadata = true;
            if (checkForDuplicateMetadata) {
                logger.fine("Checking if this file metadata is a duplicate.");
                for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) {
                    if (fileMetadata.getDataFile() != null && fileMetadata.getDataFile().equals(releasedFileMetadata.getDataFile())) {
                        if (fileMetadata.contentEquals(releasedFileMetadata)) {
                            indexThisMetadata = false;
                            logger.fine("This file metadata hasn't changed since the released version; skipping indexing.");
                        } else {
                            logger.fine("This file metadata has changed since the released version; we want to index it!");
                        }
                        break;
                    }
                }
            }
            if (indexThisMetadata) {
                SolrInputDocument datafileSolrInputDocument = new SolrInputDocument();
                Long fileEntityId = fileMetadata.getDataFile().getId();
                datafileSolrInputDocument.addField(SearchFields.ENTITY_ID, fileEntityId);
                datafileSolrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
                datafileSolrInputDocument.addField(SearchFields.IDENTIFIER, fileEntityId);
                datafileSolrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
                datafileSolrInputDocument.addField(SearchFields.TYPE, "files");
                String filenameCompleteFinal = "";
                if (fileMetadata != null) {
                    String filenameComplete = fileMetadata.getLabel();
                    if (filenameComplete != null) {
                        String filenameWithoutExtension = "";
                        // String extension = "";
                        int i = filenameComplete.lastIndexOf('.');
                        if (i > 0) {
                            // extension = filenameComplete.substring(i + 1);
                            try {
                                filenameWithoutExtension = filenameComplete.substring(0, i);
                                datafileSolrInputDocument.addField(SearchFields.FILENAME_WITHOUT_EXTENSION, filenameWithoutExtension);
                                datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameWithoutExtension);
                            } catch (IndexOutOfBoundsException ex) {
                                filenameWithoutExtension = "";
                            }
                        } else {
                            logger.fine("problem with filename '" + filenameComplete + "': no extension? empty string as filename?");
                            filenameWithoutExtension = filenameComplete;
                        }
                        filenameCompleteFinal = filenameComplete;
                    }
                    for (String tag : fileMetadata.getCategoriesByName()) {
                        datafileSolrInputDocument.addField(SearchFields.FILE_TAG, tag);
                        datafileSolrInputDocument.addField(SearchFields.FILE_TAG_SEARCHABLE, tag);
                    }
                }
                datafileSolrInputDocument.addField(SearchFields.NAME, filenameCompleteFinal);
                datafileSolrInputDocument.addField(SearchFields.NAME_SORT, filenameCompleteFinal);
                datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameCompleteFinal);
                datafileSolrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
                /**
                 * for rules on sorting files see
                 * https://docs.google.com/a/harvard.edu/document/d/1DWsEqT8KfheKZmMB3n_VhJpl9nIxiUjai_AIQPAjiyA/edit?usp=sharing
                 * via https://redmine.hmdc.harvard.edu/issues/3701
                 */
                Date fileSortByDate = new Date();
                DataFile datafile = fileMetadata.getDataFile();
                if (datafile != null) {
                    boolean fileHasBeenReleased = datafile.isReleased();
                    if (fileHasBeenReleased) {
                        logger.fine("indexing file with filePublicationTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
                        Timestamp filePublicationTimestamp = datafile.getPublicationDate();
                        if (filePublicationTimestamp != null) {
                            fileSortByDate = filePublicationTimestamp;
                        } else {
                            String msg = "filePublicationTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
                            logger.info(msg);
                        }
                        datafileSolrInputDocument.addField(SearchFields.ACCESS, datafile.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
                    } else {
                        logger.fine("indexing file with fileCreateTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
                        Timestamp fileCreateTimestamp = datafile.getCreateDate();
                        if (fileCreateTimestamp != null) {
                            fileSortByDate = fileCreateTimestamp;
                        } else {
                            String msg = "fileCreateTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
                            logger.info(msg);
                        }
                        datafileSolrInputDocument.addField(SearchFields.ACCESS, fileMetadata.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
                    }
                    if (datafile.isHarvested()) {
                        datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, true);
                        datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
                    } else {
                        datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, false);
                        datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
                    }
                }
                if (fileSortByDate == null) {
                    if (datasetSortByDate != null) {
                        logger.info("fileSortByDate was null, assigning datasetSortByDate");
                        fileSortByDate = datasetSortByDate;
                    } else {
                        logger.info("fileSortByDate and datasetSortByDate were null, assigning 'now'");
                        fileSortByDate = new Date();
                    }
                }
                datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, fileSortByDate);
                datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(fileSortByDate));
                if (majorVersionReleaseDate == null && !datafile.isHarvested()) {
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
                }
                if (datasetVersion.isInReview()) {
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
                }
                String fileSolrDocId = solrDocIdentifierFile + fileEntityId;
                if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().PUBLISHED)) {
                    fileSolrDocId = solrDocIdentifierFile + fileEntityId;
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
                    // datafileSolrInputDocument.addField(SearchFields.PERMS, publicGroupString);
                    addDatasetReleaseDateToSolrDoc(datafileSolrInputDocument, dataset);
                } else if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().WORKING_COPY)) {
                    fileSolrDocId = solrDocIdentifierFile + fileEntityId + indexableDataset.getDatasetState().getSuffix();
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
                }
                datafileSolrInputDocument.addField(SearchFields.ID, fileSolrDocId);
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_FRIENDLY, fileMetadata.getDataFile().getFriendlyType());
                datafileSolrInputDocument.addField(SearchFields.FILE_CONTENT_TYPE, fileMetadata.getDataFile().getContentType());
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, fileMetadata.getDataFile().getFriendlyType());
                // For the file type facets, we have a property file that maps mime types
                // to facet-friendly names; "application/fits" should become "FITS", etc.:
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
                datafileSolrInputDocument.addField(SearchFields.FILE_SIZE_IN_BYTES, fileMetadata.getDataFile().getFilesize());
                if (DataFile.ChecksumType.MD5.equals(fileMetadata.getDataFile().getChecksumType())) {
                    /**
                     * @todo Someday we should probably deprecate this
                     * FILE_MD5 in favor of a combination of
                     * FILE_CHECKSUM_TYPE and FILE_CHECKSUM_VALUE.
                     */
                    datafileSolrInputDocument.addField(SearchFields.FILE_MD5, fileMetadata.getDataFile().getChecksumValue());
                }
                datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_TYPE, fileMetadata.getDataFile().getChecksumType().toString());
                datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_VALUE, fileMetadata.getDataFile().getChecksumValue());
                datafileSolrInputDocument.addField(SearchFields.DESCRIPTION, fileMetadata.getDescription());
                datafileSolrInputDocument.addField(SearchFields.FILE_DESCRIPTION, fileMetadata.getDescription());
                datafileSolrInputDocument.addField(SearchFields.UNF, fileMetadata.getDataFile().getUnf());
                datafileSolrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
                // datafileSolrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataFile.getOwner().getOwner().getName());
                // datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, dataFile.getDataset().getTitle());
                datafileSolrInputDocument.addField(SearchFields.PARENT_ID, fileMetadata.getDataFile().getOwner().getId());
                datafileSolrInputDocument.addField(SearchFields.PARENT_IDENTIFIER, fileMetadata.getDataFile().getOwner().getGlobalId());
                datafileSolrInputDocument.addField(SearchFields.PARENT_CITATION, fileMetadata.getDataFile().getOwner().getCitation());
                datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, parentDatasetTitle);
                // names and labels:
                if (fileMetadata.getDataFile().isTabularData()) {
                    List<DataVariable> variables = fileMetadata.getDataFile().getDataTable().getDataVariables();
                    for (DataVariable var : variables) {
                        if (var.getName() != null && !var.getName().equals("")) {
                            datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName());
                        }
                        if (var.getLabel() != null && !var.getLabel().equals("")) {
                            datafileSolrInputDocument.addField(SearchFields.VARIABLE_LABEL, var.getLabel());
                        }
                    }
                    // (not to be confused with the file categories, indexed above!)
                    for (DataFileTag tag : fileMetadata.getDataFile().getTags()) {
                        String tagLabel = tag.getTypeLabel();
                        datafileSolrInputDocument.addField(SearchFields.TABDATA_TAG, tagLabel);
                    }
                }
                if (indexableDataset.isFilesShouldBeIndexed()) {
                    filesIndexed.add(fileSolrDocId);
                    docs.add(datafileSolrInputDocument);
                }
            }
        }
    }
    try {
        solrServer.add(docs);
    } catch (SolrServerException | IOException ex) {
        return ex.toString();
    }
    try {
        solrServer.commit();
    } catch (SolrServerException | IOException ex) {
        return ex.toString();
    }
    Long dsId = dataset.getId();
    // /Dataset updatedDataset = (Dataset)dvObjectService.updateContentIndexTime(dataset);
    // /updatedDataset = null;
    // instead of making a call to dvObjectService, let's try and
    // modify the index time stamp using the local EntityManager:
    DvObject dvObjectToModify = em.find(DvObject.class, dsId);
    dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime()));
    dvObjectToModify = em.merge(dvObjectToModify);
    dvObjectToModify = null;
    // return "indexed dataset " + dataset.getId() + " as " + solrDocId + "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString();
    return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) DvObject(edu.harvard.iq.dataverse.DvObject) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) DatasetVersion(edu.harvard.iq.dataverse.DatasetVersion) DataVariable(edu.harvard.iq.dataverse.datavariable.DataVariable) Timestamp(java.sql.Timestamp) DataFile(edu.harvard.iq.dataverse.DataFile) SolrInputDocument(org.apache.solr.common.SolrInputDocument) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue) Dataset(edu.harvard.iq.dataverse.Dataset) IOException(java.io.IOException) Dataverse(edu.harvard.iq.dataverse.Dataverse) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) SolrServerException(org.apache.solr.client.solrj.SolrServerException) EJBException(javax.ejb.EJBException) IOException(java.io.IOException) Date(java.util.Date) SimpleDateFormat(java.text.SimpleDateFormat) DataFileTag(edu.harvard.iq.dataverse.DataFileTag)

Example 35 with DatasetFieldType

use of edu.harvard.iq.dataverse.DatasetFieldType in project dataverse by IQSS.

the class BriefJsonPrinterTest method testJson_DatasetVersion.

/**
 * Test of json method, of class BriefJsonPrinter.
 */
@Test
public void testJson_DatasetVersion() {
    Dataset ds = MocksFactory.makeDataset();
    DatasetVersion dsv = MocksFactory.makeDatasetVersion(ds.getCategories());
    dsv.setId(1L);
    dsv.setVersion(2l);
    dsv.setVersionState(DatasetVersion.VersionState.DEACCESSIONED);
    DatasetField titleFld = new DatasetField();
    titleFld.setDatasetFieldType(new DatasetFieldType(DatasetFieldConstant.title, DatasetFieldType.FieldType.TEXT, false));
    titleFld.setSingleValue("Dataset Title");
    dsv.setDatasetFields(Collections.singletonList(titleFld));
    BriefJsonPrinter sut = new BriefJsonPrinter();
    JsonObject res = sut.json(dsv).build();
    assertEquals("Dataset Title", res.getString("title"));
    assertEquals(DatasetVersion.VersionState.DEACCESSIONED.name(), res.getString("versionState"));
    assertEquals(1, res.getInt("id"));
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) Dataset(edu.harvard.iq.dataverse.Dataset) DatasetVersion(edu.harvard.iq.dataverse.DatasetVersion) JsonObject(javax.json.JsonObject) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) Test(org.junit.Test)

Aggregations

DatasetFieldType (edu.harvard.iq.dataverse.DatasetFieldType)41 DatasetField (edu.harvard.iq.dataverse.DatasetField)16 ControlledVocabularyValue (edu.harvard.iq.dataverse.ControlledVocabularyValue)12 DatasetFieldCompoundValue (edu.harvard.iq.dataverse.DatasetFieldCompoundValue)9 ArrayList (java.util.ArrayList)9 JsonObject (javax.json.JsonObject)7 Dataverse (edu.harvard.iq.dataverse.Dataverse)6 LinkedList (java.util.LinkedList)6 Test (org.junit.Test)6 MetadataBlock (edu.harvard.iq.dataverse.MetadataBlock)5 EJBException (javax.ejb.EJBException)5 Path (javax.ws.rs.Path)5 DatasetFieldValue (edu.harvard.iq.dataverse.DatasetFieldValue)4 IOException (java.io.IOException)4 HashSet (java.util.HashSet)4 NoResultException (javax.persistence.NoResultException)4 GET (javax.ws.rs.GET)4 Before (org.junit.Before)4 DataverseFieldTypeInputLevel (edu.harvard.iq.dataverse.DataverseFieldTypeInputLevel)3 AuthenticatedUser (edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser)3