Search in sources :

Example 66 with Dataverse

use of edu.harvard.iq.dataverse.Dataverse in project dataverse by IQSS.

the class BatchImport method postImport.

/**
 * Import a new Dataset with DDI xml data posted in the request
 *
 * @param body the xml
 * @param parentIdtf the dataverse to import into (id or alias)
 * @param apiKey user's api key
 * @return import status (including id of the dataset created)
 */
@POST
@Path("import")
public Response postImport(String body, @QueryParam("dv") String parentIdtf, @QueryParam("key") String apiKey) {
    DataverseRequest dataverseRequest;
    try {
        dataverseRequest = createDataverseRequest(findAuthenticatedUserOrDie());
    } catch (WrappedResponse wr) {
        return wr.getResponse();
    }
    if (parentIdtf == null) {
        parentIdtf = "root";
    }
    Dataverse owner = findDataverse(parentIdtf);
    if (owner == null) {
        return error(Response.Status.NOT_FOUND, "Can't find dataverse with identifier='" + parentIdtf + "'");
    }
    try {
        // Cleanup log isn't needed for ImportType == NEW. We don't do any data cleanup in this mode.
        PrintWriter cleanupLog = null;
        // Since this is a single input from a POST, there is no file that we are reading from.
        String filename = null;
        JsonObjectBuilder status = importService.doImport(dataverseRequest, owner, body, filename, ImportType.NEW, cleanupLog);
        return this.ok(status);
    } catch (ImportException | IOException e) {
        return this.error(Response.Status.BAD_REQUEST, e.getMessage());
    }
}
Also used : DataverseRequest(edu.harvard.iq.dataverse.engine.command.DataverseRequest) ImportException(edu.harvard.iq.dataverse.api.imports.ImportException) IOException(java.io.IOException) JsonObjectBuilder(javax.json.JsonObjectBuilder) Dataverse(edu.harvard.iq.dataverse.Dataverse) PrintWriter(java.io.PrintWriter) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST)

Example 67 with Dataverse

use of edu.harvard.iq.dataverse.Dataverse in project dataverse by IQSS.

the class BatchServiceBean method handleDirectory.

public JsonArrayBuilder handleDirectory(DataverseRequest dataverseRequest, File dir, ImportUtil.ImportType importType, PrintWriter validationLog, PrintWriter cleanupLog, Boolean createDV) throws ImportException {
    JsonArrayBuilder status = Json.createArrayBuilder();
    Dataverse owner = dataverseService.findByAlias(dir.getName());
    if (owner == null) {
        if (createDV) {
            System.out.println("creating new dataverse: " + dir.getName());
            owner = importService.createDataverse(dir.getName(), dataverseRequest);
        } else {
            throw new ImportException("Can't find dataverse with identifier='" + dir.getName() + "'");
        }
    }
    for (File file : dir.listFiles()) {
        if (!file.isHidden()) {
            try {
                JsonObjectBuilder fileStatus = importService.handleFile(dataverseRequest, owner, file, importType, validationLog, cleanupLog);
                status.add(fileStatus);
            } catch (ImportException | IOException e) {
                status.add(Json.createObjectBuilder().add("importStatus", "Exception importing " + file.getName() + ", message = " + e.getMessage()));
            }
        }
    }
    return status;
}
Also used : ImportException(edu.harvard.iq.dataverse.api.imports.ImportException) JsonArrayBuilder(javax.json.JsonArrayBuilder) IOException(java.io.IOException) JsonObjectBuilder(javax.json.JsonObjectBuilder) Dataverse(edu.harvard.iq.dataverse.Dataverse) File(java.io.File)

Example 68 with Dataverse

use of edu.harvard.iq.dataverse.Dataverse in project dataverse by IQSS.

the class BuiltinUsers method internalSave.

private Response internalSave(BuiltinUser user, String password, String key) {
    String expectedKey = settingsSvc.get(API_KEY_IN_SETTINGS);
    if (expectedKey == null) {
        return error(Status.SERVICE_UNAVAILABLE, "Dataverse config issue: No API key defined for built in user management");
    }
    if (!expectedKey.equals(key)) {
        return badApiKey(key);
    }
    ActionLogRecord alr = new ActionLogRecord(ActionLogRecord.ActionType.BuiltinUser, "create");
    try {
        if (password != null) {
            user.updateEncryptedPassword(PasswordEncryption.get().encrypt(password), PasswordEncryption.getLatestVersionNumber());
        }
        // Make sure the identifier is unique
        if ((builtinUserSvc.findByUserName(user.getUserName()) != null) || (authSvc.identifierExists(user.getUserName()))) {
            return error(Status.BAD_REQUEST, "username '" + user.getUserName() + "' already exists");
        }
        user = builtinUserSvc.save(user);
        AuthenticatedUser au = authSvc.createAuthenticatedUser(new UserRecordIdentifier(BuiltinAuthenticationProvider.PROVIDER_ID, user.getUserName()), user.getUserName(), user.getDisplayInfo(), false);
        /**
         * @todo Move this to
         * AuthenticationServiceBean.createAuthenticatedUser
         */
        boolean rootDataversePresent = false;
        try {
            Dataverse rootDataverse = dataverseSvc.findRootDataverse();
            if (rootDataverse != null) {
                rootDataversePresent = true;
            }
        } catch (Exception e) {
            logger.info("The root dataverse is not present. Don't send a notification to dataverseAdmin.");
        }
        if (rootDataversePresent) {
            userNotificationSvc.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.CREATEACC, null);
        }
        ApiToken token = new ApiToken();
        token.setTokenString(java.util.UUID.randomUUID().toString());
        token.setAuthenticatedUser(au);
        Calendar c = Calendar.getInstance();
        token.setCreateTime(new Timestamp(c.getTimeInMillis()));
        c.roll(Calendar.YEAR, 1);
        token.setExpireTime(new Timestamp(c.getTimeInMillis()));
        authSvc.save(token);
        JsonObjectBuilder resp = Json.createObjectBuilder();
        resp.add("user", json(user));
        resp.add("authenticatedUser", json(au));
        resp.add("apiToken", token.getTokenString());
        alr.setInfo("builtinUser:" + user.getUserName() + " authenticatedUser:" + au.getIdentifier());
        return ok(resp);
    } catch (EJBException ejbx) {
        alr.setActionResult(ActionLogRecord.Result.InternalError);
        alr.setInfo(alr.getInfo() + "// " + ejbx.getMessage());
        if (ejbx.getCausedByException() instanceof IllegalArgumentException) {
            return error(Status.BAD_REQUEST, "Bad request: can't save user. " + ejbx.getCausedByException().getMessage());
        } else {
            logger.log(Level.WARNING, "Error saving user: ", ejbx);
            return error(Status.INTERNAL_SERVER_ERROR, "Can't save user: " + ejbx.getMessage());
        }
    } catch (Exception e) {
        logger.log(Level.WARNING, "Error saving user", e);
        alr.setActionResult(ActionLogRecord.Result.InternalError);
        alr.setInfo(alr.getInfo() + "// " + e.getMessage());
        return error(Status.INTERNAL_SERVER_ERROR, "Can't save user: " + e.getMessage());
    } finally {
        actionLogSvc.log(alr);
    }
}
Also used : UserRecordIdentifier(edu.harvard.iq.dataverse.authorization.UserRecordIdentifier) Calendar(java.util.Calendar) AuthenticatedUser(edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser) Dataverse(edu.harvard.iq.dataverse.Dataverse) Timestamp(java.sql.Timestamp) EJBException(javax.ejb.EJBException) Date(java.util.Date) ActionLogRecord(edu.harvard.iq.dataverse.actionlogging.ActionLogRecord) ApiToken(edu.harvard.iq.dataverse.authorization.users.ApiToken) JsonObjectBuilder(javax.json.JsonObjectBuilder) EJBException(javax.ejb.EJBException)

Example 69 with Dataverse

use of edu.harvard.iq.dataverse.Dataverse in project dataverse by IQSS.

the class IndexServiceBean method addOrUpdateDataset.

private String addOrUpdateDataset(IndexableDataset indexableDataset) {
    IndexableDataset.DatasetState state = indexableDataset.getDatasetState();
    Dataset dataset = indexableDataset.getDatasetVersion().getDataset();
    logger.fine("adding or updating Solr document for dataset id " + dataset.getId());
    Collection<SolrInputDocument> docs = new ArrayList<>();
    List<String> dataversePathSegmentsAccumulator = new ArrayList<>();
    List<String> dataverseSegments = new ArrayList<>();
    try {
        dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator);
    } catch (Exception ex) {
        logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex);
    }
    List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments);
    // Add Paths for linking dataverses
    for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) {
        List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>();
        List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator);
        List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments);
        for (String dvPath : linkingDataversePaths) {
            dataversePaths.add(dvPath);
        }
    }
    SolrInputDocument solrInputDocument = new SolrInputDocument();
    String datasetSolrDocId = indexableDataset.getSolrDocId();
    solrInputDocument.addField(SearchFields.ID, datasetSolrDocId);
    solrInputDocument.addField(SearchFields.ENTITY_ID, dataset.getId());
    String dataverseVersion = systemConfig.getVersion();
    solrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
    solrInputDocument.addField(SearchFields.IDENTIFIER, dataset.getGlobalId());
    solrInputDocument.addField(SearchFields.DATASET_PERSISTENT_ID, dataset.getGlobalId());
    solrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
    solrInputDocument.addField(SearchFields.TYPE, "datasets");
    Date datasetSortByDate = new Date();
    Date majorVersionReleaseDate = dataset.getMostRecentMajorVersionReleaseDate();
    if (majorVersionReleaseDate != null) {
        if (true) {
            String msg = "major release date found: " + majorVersionReleaseDate.toString();
            logger.fine(msg);
        }
        datasetSortByDate = majorVersionReleaseDate;
    } else {
        if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.WORKING_COPY)) {
            solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
        } else if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.DEACCESSIONED)) {
            solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DEACCESSIONED_STRING);
        }
        Date createDate = dataset.getCreateDate();
        if (createDate != null) {
            if (true) {
                String msg = "can't find major release date, using create date: " + createDate;
                logger.fine(msg);
            }
            datasetSortByDate = createDate;
        } else {
            String msg = "can't find major release date or create date, using \"now\"";
            logger.info(msg);
            datasetSortByDate = new Date();
        }
    }
    solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, datasetSortByDate);
    solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(datasetSortByDate));
    if (state.equals(indexableDataset.getDatasetState().PUBLISHED)) {
        solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
    // solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataset.getPublicationDate());
    } else if (state.equals(indexableDataset.getDatasetState().WORKING_COPY)) {
        solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
    }
    addDatasetReleaseDateToSolrDoc(solrInputDocument, dataset);
    if (dataset.isHarvested()) {
        solrInputDocument.addField(SearchFields.IS_HARVESTED, true);
        solrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
    } else {
        solrInputDocument.addField(SearchFields.IS_HARVESTED, false);
        // rootDataverseName);
        solrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
    }
    DatasetVersion datasetVersion = indexableDataset.getDatasetVersion();
    String parentDatasetTitle = "TBD";
    if (datasetVersion != null) {
        solrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
        solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(false));
        solrInputDocument.addField(SearchFields.DATASET_CITATION_HTML, datasetVersion.getCitation(true));
        if (datasetVersion.isInReview()) {
            solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
        }
        for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) {
            DatasetFieldType dsfType = dsf.getDatasetFieldType();
            String solrFieldSearchable = dsfType.getSolrField().getNameSearchable();
            String solrFieldFacetable = dsfType.getSolrField().getNameFacetable();
            if (dsf.getValues() != null && !dsf.getValues().isEmpty() && dsf.getValues().get(0) != null && solrFieldSearchable != null) {
                logger.fine("indexing " + dsf.getDatasetFieldType().getName() + ":" + dsf.getValues() + " into " + solrFieldSearchable + " and maybe " + solrFieldFacetable);
                // if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.INTEGER)) {
                if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.EMAIL)) {
                // no-op. we want to keep email address out of Solr per https://github.com/IQSS/dataverse/issues/759
                } else if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.DATE)) {
                    String dateAsString = dsf.getValues().get(0);
                    logger.fine("date as string: " + dateAsString);
                    if (dateAsString != null && !dateAsString.isEmpty()) {
                        SimpleDateFormat inputDateyyyy = new SimpleDateFormat("yyyy", Locale.ENGLISH);
                        try {
                            /**
                             * @todo when bean validation is working we
                             * won't have to convert strings into dates
                             */
                            logger.fine("Trying to convert " + dateAsString + " to a YYYY date from dataset " + dataset.getId());
                            Date dateAsDate = inputDateyyyy.parse(dateAsString);
                            SimpleDateFormat yearOnly = new SimpleDateFormat("yyyy");
                            String datasetFieldFlaggedAsDate = yearOnly.format(dateAsDate);
                            logger.fine("YYYY only: " + datasetFieldFlaggedAsDate);
                            // solrInputDocument.addField(solrFieldSearchable, Integer.parseInt(datasetFieldFlaggedAsDate));
                            solrInputDocument.addField(solrFieldSearchable, datasetFieldFlaggedAsDate);
                            if (dsfType.getSolrField().isFacetable()) {
                                // solrInputDocument.addField(solrFieldFacetable, Integer.parseInt(datasetFieldFlaggedAsDate));
                                solrInputDocument.addField(solrFieldFacetable, datasetFieldFlaggedAsDate);
                            }
                        } catch (Exception ex) {
                            logger.info("unable to convert " + dateAsString + " into YYYY format and couldn't index it (" + dsfType.getName() + ")");
                        }
                    }
                } else {
                    if (dsf.getDatasetFieldType().getName().equals("authorAffiliation")) {
                        /**
                         * @todo think about how to tie the fact that this
                         * needs to be multivalued (_ss) because a
                         * multivalued facet (authorAffilition_ss) is being
                         * collapsed into here at index time. The business
                         * logic to determine if a data-driven metadata
                         * field should be indexed into Solr as a single or
                         * multiple value lives in the getSolrField() method
                         * of DatasetField.java
                         */
                        solrInputDocument.addField(SearchFields.AFFILIATION, dsf.getValuesWithoutNaValues());
                    } else if (dsf.getDatasetFieldType().getName().equals("title")) {
                        // datasets have titles not names but index title under name as well so we can sort datasets by name along dataverses and files
                        List<String> possibleTitles = dsf.getValues();
                        String firstTitle = possibleTitles.get(0);
                        if (firstTitle != null) {
                            parentDatasetTitle = firstTitle;
                        }
                        solrInputDocument.addField(SearchFields.NAME_SORT, dsf.getValues());
                    }
                    if (dsfType.isControlledVocabulary()) {
                        for (ControlledVocabularyValue controlledVocabularyValue : dsf.getControlledVocabularyValues()) {
                            if (controlledVocabularyValue.getStrValue().equals(DatasetField.NA_VALUE)) {
                                continue;
                            }
                            solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getStrValue());
                            if (dsfType.getSolrField().isFacetable()) {
                                solrInputDocument.addField(solrFieldFacetable, controlledVocabularyValue.getStrValue());
                            }
                        }
                    } else if (dsfType.getFieldType().equals(DatasetFieldType.FieldType.TEXTBOX)) {
                        // strip HTML
                        List<String> htmlFreeText = StringUtil.htmlArray2textArray(dsf.getValuesWithoutNaValues());
                        solrInputDocument.addField(solrFieldSearchable, htmlFreeText);
                        if (dsfType.getSolrField().isFacetable()) {
                            solrInputDocument.addField(solrFieldFacetable, htmlFreeText);
                        }
                    } else {
                        // do not strip HTML
                        solrInputDocument.addField(solrFieldSearchable, dsf.getValuesWithoutNaValues());
                        if (dsfType.getSolrField().isFacetable()) {
                            if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassValue)) {
                                String topicClassificationTerm = getTopicClassificationTermOrTermAndVocabulary(dsf);
                                if (topicClassificationTerm != null) {
                                    logger.fine(solrFieldFacetable + " gets " + topicClassificationTerm);
                                    solrInputDocument.addField(solrFieldFacetable, topicClassificationTerm);
                                }
                            } else {
                                solrInputDocument.addField(solrFieldFacetable, dsf.getValuesWithoutNaValues());
                            }
                        }
                    }
                }
            }
        }
    }
    solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
    // solrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataset.getOwner().getName());
    solrInputDocument.addField(SearchFields.PARENT_ID, dataset.getOwner().getId());
    solrInputDocument.addField(SearchFields.PARENT_NAME, dataset.getOwner().getName());
    if (state.equals(indexableDataset.getDatasetState().DEACCESSIONED)) {
        String deaccessionNote = datasetVersion.getVersionNote();
        if (deaccessionNote != null) {
            solrInputDocument.addField(SearchFields.DATASET_DEACCESSION_REASON, deaccessionNote);
        }
    }
    docs.add(solrInputDocument);
    List<String> filesIndexed = new ArrayList<>();
    if (datasetVersion != null) {
        List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas();
        boolean checkForDuplicateMetadata = false;
        if (datasetVersion.isDraft() && dataset.isReleased() && dataset.getReleasedVersion() != null) {
            checkForDuplicateMetadata = true;
            logger.fine("We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions.");
        }
        for (FileMetadata fileMetadata : fileMetadatas) {
            boolean indexThisMetadata = true;
            if (checkForDuplicateMetadata) {
                logger.fine("Checking if this file metadata is a duplicate.");
                for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) {
                    if (fileMetadata.getDataFile() != null && fileMetadata.getDataFile().equals(releasedFileMetadata.getDataFile())) {
                        if (fileMetadata.contentEquals(releasedFileMetadata)) {
                            indexThisMetadata = false;
                            logger.fine("This file metadata hasn't changed since the released version; skipping indexing.");
                        } else {
                            logger.fine("This file metadata has changed since the released version; we want to index it!");
                        }
                        break;
                    }
                }
            }
            if (indexThisMetadata) {
                SolrInputDocument datafileSolrInputDocument = new SolrInputDocument();
                Long fileEntityId = fileMetadata.getDataFile().getId();
                datafileSolrInputDocument.addField(SearchFields.ENTITY_ID, fileEntityId);
                datafileSolrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
                datafileSolrInputDocument.addField(SearchFields.IDENTIFIER, fileEntityId);
                datafileSolrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
                datafileSolrInputDocument.addField(SearchFields.TYPE, "files");
                String filenameCompleteFinal = "";
                if (fileMetadata != null) {
                    String filenameComplete = fileMetadata.getLabel();
                    if (filenameComplete != null) {
                        String filenameWithoutExtension = "";
                        // String extension = "";
                        int i = filenameComplete.lastIndexOf('.');
                        if (i > 0) {
                            // extension = filenameComplete.substring(i + 1);
                            try {
                                filenameWithoutExtension = filenameComplete.substring(0, i);
                                datafileSolrInputDocument.addField(SearchFields.FILENAME_WITHOUT_EXTENSION, filenameWithoutExtension);
                                datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameWithoutExtension);
                            } catch (IndexOutOfBoundsException ex) {
                                filenameWithoutExtension = "";
                            }
                        } else {
                            logger.fine("problem with filename '" + filenameComplete + "': no extension? empty string as filename?");
                            filenameWithoutExtension = filenameComplete;
                        }
                        filenameCompleteFinal = filenameComplete;
                    }
                    for (String tag : fileMetadata.getCategoriesByName()) {
                        datafileSolrInputDocument.addField(SearchFields.FILE_TAG, tag);
                        datafileSolrInputDocument.addField(SearchFields.FILE_TAG_SEARCHABLE, tag);
                    }
                }
                datafileSolrInputDocument.addField(SearchFields.NAME, filenameCompleteFinal);
                datafileSolrInputDocument.addField(SearchFields.NAME_SORT, filenameCompleteFinal);
                datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameCompleteFinal);
                datafileSolrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
                /**
                 * for rules on sorting files see
                 * https://docs.google.com/a/harvard.edu/document/d/1DWsEqT8KfheKZmMB3n_VhJpl9nIxiUjai_AIQPAjiyA/edit?usp=sharing
                 * via https://redmine.hmdc.harvard.edu/issues/3701
                 */
                Date fileSortByDate = new Date();
                DataFile datafile = fileMetadata.getDataFile();
                if (datafile != null) {
                    boolean fileHasBeenReleased = datafile.isReleased();
                    if (fileHasBeenReleased) {
                        logger.fine("indexing file with filePublicationTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
                        Timestamp filePublicationTimestamp = datafile.getPublicationDate();
                        if (filePublicationTimestamp != null) {
                            fileSortByDate = filePublicationTimestamp;
                        } else {
                            String msg = "filePublicationTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
                            logger.info(msg);
                        }
                        datafileSolrInputDocument.addField(SearchFields.ACCESS, datafile.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
                    } else {
                        logger.fine("indexing file with fileCreateTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
                        Timestamp fileCreateTimestamp = datafile.getCreateDate();
                        if (fileCreateTimestamp != null) {
                            fileSortByDate = fileCreateTimestamp;
                        } else {
                            String msg = "fileCreateTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
                            logger.info(msg);
                        }
                        datafileSolrInputDocument.addField(SearchFields.ACCESS, fileMetadata.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
                    }
                    if (datafile.isHarvested()) {
                        datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, true);
                        datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
                    } else {
                        datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, false);
                        datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
                    }
                }
                if (fileSortByDate == null) {
                    if (datasetSortByDate != null) {
                        logger.info("fileSortByDate was null, assigning datasetSortByDate");
                        fileSortByDate = datasetSortByDate;
                    } else {
                        logger.info("fileSortByDate and datasetSortByDate were null, assigning 'now'");
                        fileSortByDate = new Date();
                    }
                }
                datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, fileSortByDate);
                datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(fileSortByDate));
                if (majorVersionReleaseDate == null && !datafile.isHarvested()) {
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
                }
                if (datasetVersion.isInReview()) {
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
                }
                String fileSolrDocId = solrDocIdentifierFile + fileEntityId;
                if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().PUBLISHED)) {
                    fileSolrDocId = solrDocIdentifierFile + fileEntityId;
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
                    // datafileSolrInputDocument.addField(SearchFields.PERMS, publicGroupString);
                    addDatasetReleaseDateToSolrDoc(datafileSolrInputDocument, dataset);
                } else if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().WORKING_COPY)) {
                    fileSolrDocId = solrDocIdentifierFile + fileEntityId + indexableDataset.getDatasetState().getSuffix();
                    datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
                }
                datafileSolrInputDocument.addField(SearchFields.ID, fileSolrDocId);
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_FRIENDLY, fileMetadata.getDataFile().getFriendlyType());
                datafileSolrInputDocument.addField(SearchFields.FILE_CONTENT_TYPE, fileMetadata.getDataFile().getContentType());
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, fileMetadata.getDataFile().getFriendlyType());
                // For the file type facets, we have a property file that maps mime types
                // to facet-friendly names; "application/fits" should become "FITS", etc.:
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
                datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
                datafileSolrInputDocument.addField(SearchFields.FILE_SIZE_IN_BYTES, fileMetadata.getDataFile().getFilesize());
                if (DataFile.ChecksumType.MD5.equals(fileMetadata.getDataFile().getChecksumType())) {
                    /**
                     * @todo Someday we should probably deprecate this
                     * FILE_MD5 in favor of a combination of
                     * FILE_CHECKSUM_TYPE and FILE_CHECKSUM_VALUE.
                     */
                    datafileSolrInputDocument.addField(SearchFields.FILE_MD5, fileMetadata.getDataFile().getChecksumValue());
                }
                datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_TYPE, fileMetadata.getDataFile().getChecksumType().toString());
                datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_VALUE, fileMetadata.getDataFile().getChecksumValue());
                datafileSolrInputDocument.addField(SearchFields.DESCRIPTION, fileMetadata.getDescription());
                datafileSolrInputDocument.addField(SearchFields.FILE_DESCRIPTION, fileMetadata.getDescription());
                datafileSolrInputDocument.addField(SearchFields.UNF, fileMetadata.getDataFile().getUnf());
                datafileSolrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
                // datafileSolrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataFile.getOwner().getOwner().getName());
                // datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, dataFile.getDataset().getTitle());
                datafileSolrInputDocument.addField(SearchFields.PARENT_ID, fileMetadata.getDataFile().getOwner().getId());
                datafileSolrInputDocument.addField(SearchFields.PARENT_IDENTIFIER, fileMetadata.getDataFile().getOwner().getGlobalId());
                datafileSolrInputDocument.addField(SearchFields.PARENT_CITATION, fileMetadata.getDataFile().getOwner().getCitation());
                datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, parentDatasetTitle);
                // names and labels:
                if (fileMetadata.getDataFile().isTabularData()) {
                    List<DataVariable> variables = fileMetadata.getDataFile().getDataTable().getDataVariables();
                    for (DataVariable var : variables) {
                        if (var.getName() != null && !var.getName().equals("")) {
                            datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName());
                        }
                        if (var.getLabel() != null && !var.getLabel().equals("")) {
                            datafileSolrInputDocument.addField(SearchFields.VARIABLE_LABEL, var.getLabel());
                        }
                    }
                    // (not to be confused with the file categories, indexed above!)
                    for (DataFileTag tag : fileMetadata.getDataFile().getTags()) {
                        String tagLabel = tag.getTypeLabel();
                        datafileSolrInputDocument.addField(SearchFields.TABDATA_TAG, tagLabel);
                    }
                }
                if (indexableDataset.isFilesShouldBeIndexed()) {
                    filesIndexed.add(fileSolrDocId);
                    docs.add(datafileSolrInputDocument);
                }
            }
        }
    }
    try {
        solrServer.add(docs);
    } catch (SolrServerException | IOException ex) {
        return ex.toString();
    }
    try {
        solrServer.commit();
    } catch (SolrServerException | IOException ex) {
        return ex.toString();
    }
    Long dsId = dataset.getId();
    // /Dataset updatedDataset = (Dataset)dvObjectService.updateContentIndexTime(dataset);
    // /updatedDataset = null;
    // instead of making a call to dvObjectService, let's try and
    // modify the index time stamp using the local EntityManager:
    DvObject dvObjectToModify = em.find(DvObject.class, dsId);
    dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime()));
    dvObjectToModify = em.merge(dvObjectToModify);
    dvObjectToModify = null;
    // return "indexed dataset " + dataset.getId() + " as " + solrDocId + "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString();
    return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) DvObject(edu.harvard.iq.dataverse.DvObject) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) DatasetVersion(edu.harvard.iq.dataverse.DatasetVersion) DataVariable(edu.harvard.iq.dataverse.datavariable.DataVariable) Timestamp(java.sql.Timestamp) DataFile(edu.harvard.iq.dataverse.DataFile) SolrInputDocument(org.apache.solr.common.SolrInputDocument) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue) Dataset(edu.harvard.iq.dataverse.Dataset) IOException(java.io.IOException) Dataverse(edu.harvard.iq.dataverse.Dataverse) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) SolrServerException(org.apache.solr.client.solrj.SolrServerException) EJBException(javax.ejb.EJBException) IOException(java.io.IOException) Date(java.util.Date) SimpleDateFormat(java.text.SimpleDateFormat) DataFileTag(edu.harvard.iq.dataverse.DataFileTag)

Example 70 with Dataverse

use of edu.harvard.iq.dataverse.Dataverse in project dataverse by IQSS.

the class SearchFilesServiceBean method getFileView.

public FileView getFileView(DatasetVersion datasetVersion, User user, String userSuppliedQuery) {
    Dataverse dataverse = null;
    List<String> filterQueries = new ArrayList<>();
    filterQueries.add(SearchFields.TYPE + ":" + SearchConstants.FILES);
    filterQueries.add(SearchFields.PARENT_ID + ":" + datasetVersion.getDataset().getId());
    /**
     * @todo In order to support searching for files based on dataset
     * version for https://github.com/IQSS/dataverse/issues/2455 we're going
     * to need to make the dataset version id searchable, perhaps as part of
     * https://github.com/IQSS/dataverse/issues/2038
     */
    // filterQueries.add(SearchFields.DATASET_VERSION_ID + ":" + datasetVersion.getId());
    String finalQuery = SearchUtil.determineFinalQuery(userSuppliedQuery);
    SortBy sortBy = getSortBy(finalQuery);
    String sortField = sortBy.getField();
    String sortOrder = sortBy.getOrder();
    int paginationStart = 0;
    boolean onlyDataRelatedToMe = false;
    int numResultsPerPage = 25;
    SolrQueryResponse solrQueryResponse = null;
    try {
        HttpServletRequest httpServletRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest();
        solrQueryResponse = searchService.search(new DataverseRequest(user, httpServletRequest), dataverse, finalQuery, filterQueries, sortField, sortOrder, paginationStart, onlyDataRelatedToMe, numResultsPerPage);
    } catch (SearchException ex) {
        logger.info(SearchException.class + " searching for files: " + ex);
        return null;
    } catch (Exception ex) {
        logger.info(Exception.class + " searching for files: " + ex);
        return null;
    }
    return new FileView(solrQueryResponse.getSolrSearchResults(), solrQueryResponse.getFacetCategoryList(), solrQueryResponse.getFilterQueriesActual(), solrQueryResponse.getSolrQuery().getQuery());
}
Also used : ArrayList(java.util.ArrayList) Dataverse(edu.harvard.iq.dataverse.Dataverse) HttpServletRequest(javax.servlet.http.HttpServletRequest) DataverseRequest(edu.harvard.iq.dataverse.engine.command.DataverseRequest)

Aggregations

Dataverse (edu.harvard.iq.dataverse.Dataverse)94 Dataset (edu.harvard.iq.dataverse.Dataset)34 AuthenticatedUser (edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser)31 Test (org.junit.Test)27 DataverseRequest (edu.harvard.iq.dataverse.engine.command.DataverseRequest)22 DataFile (edu.harvard.iq.dataverse.DataFile)18 IOException (java.io.IOException)18 Path (javax.ws.rs.Path)16 JsonObject (javax.json.JsonObject)15 CommandException (edu.harvard.iq.dataverse.engine.command.exception.CommandException)11 ArrayList (java.util.ArrayList)11 EJBException (javax.ejb.EJBException)11 JsonObjectBuilder (javax.json.JsonObjectBuilder)11 InputStream (java.io.InputStream)10 Date (java.util.Date)10 JsonArrayBuilder (javax.json.JsonArrayBuilder)10 POST (javax.ws.rs.POST)10 DataverseRole (edu.harvard.iq.dataverse.authorization.DataverseRole)9 User (edu.harvard.iq.dataverse.authorization.users.User)9 SwordError (org.swordapp.server.SwordError)9