use of edu.harvard.iq.dataverse.Dataverse in project dataverse by IQSS.
the class BatchImport method postImport.
/**
* Import a new Dataset with DDI xml data posted in the request
*
* @param body the xml
* @param parentIdtf the dataverse to import into (id or alias)
* @param apiKey user's api key
* @return import status (including id of the dataset created)
*/
@POST
@Path("import")
public Response postImport(String body, @QueryParam("dv") String parentIdtf, @QueryParam("key") String apiKey) {
DataverseRequest dataverseRequest;
try {
dataverseRequest = createDataverseRequest(findAuthenticatedUserOrDie());
} catch (WrappedResponse wr) {
return wr.getResponse();
}
if (parentIdtf == null) {
parentIdtf = "root";
}
Dataverse owner = findDataverse(parentIdtf);
if (owner == null) {
return error(Response.Status.NOT_FOUND, "Can't find dataverse with identifier='" + parentIdtf + "'");
}
try {
// Cleanup log isn't needed for ImportType == NEW. We don't do any data cleanup in this mode.
PrintWriter cleanupLog = null;
// Since this is a single input from a POST, there is no file that we are reading from.
String filename = null;
JsonObjectBuilder status = importService.doImport(dataverseRequest, owner, body, filename, ImportType.NEW, cleanupLog);
return this.ok(status);
} catch (ImportException | IOException e) {
return this.error(Response.Status.BAD_REQUEST, e.getMessage());
}
}
use of edu.harvard.iq.dataverse.Dataverse in project dataverse by IQSS.
the class BatchServiceBean method handleDirectory.
public JsonArrayBuilder handleDirectory(DataverseRequest dataverseRequest, File dir, ImportUtil.ImportType importType, PrintWriter validationLog, PrintWriter cleanupLog, Boolean createDV) throws ImportException {
JsonArrayBuilder status = Json.createArrayBuilder();
Dataverse owner = dataverseService.findByAlias(dir.getName());
if (owner == null) {
if (createDV) {
System.out.println("creating new dataverse: " + dir.getName());
owner = importService.createDataverse(dir.getName(), dataverseRequest);
} else {
throw new ImportException("Can't find dataverse with identifier='" + dir.getName() + "'");
}
}
for (File file : dir.listFiles()) {
if (!file.isHidden()) {
try {
JsonObjectBuilder fileStatus = importService.handleFile(dataverseRequest, owner, file, importType, validationLog, cleanupLog);
status.add(fileStatus);
} catch (ImportException | IOException e) {
status.add(Json.createObjectBuilder().add("importStatus", "Exception importing " + file.getName() + ", message = " + e.getMessage()));
}
}
}
return status;
}
use of edu.harvard.iq.dataverse.Dataverse in project dataverse by IQSS.
the class BuiltinUsers method internalSave.
private Response internalSave(BuiltinUser user, String password, String key) {
String expectedKey = settingsSvc.get(API_KEY_IN_SETTINGS);
if (expectedKey == null) {
return error(Status.SERVICE_UNAVAILABLE, "Dataverse config issue: No API key defined for built in user management");
}
if (!expectedKey.equals(key)) {
return badApiKey(key);
}
ActionLogRecord alr = new ActionLogRecord(ActionLogRecord.ActionType.BuiltinUser, "create");
try {
if (password != null) {
user.updateEncryptedPassword(PasswordEncryption.get().encrypt(password), PasswordEncryption.getLatestVersionNumber());
}
// Make sure the identifier is unique
if ((builtinUserSvc.findByUserName(user.getUserName()) != null) || (authSvc.identifierExists(user.getUserName()))) {
return error(Status.BAD_REQUEST, "username '" + user.getUserName() + "' already exists");
}
user = builtinUserSvc.save(user);
AuthenticatedUser au = authSvc.createAuthenticatedUser(new UserRecordIdentifier(BuiltinAuthenticationProvider.PROVIDER_ID, user.getUserName()), user.getUserName(), user.getDisplayInfo(), false);
/**
* @todo Move this to
* AuthenticationServiceBean.createAuthenticatedUser
*/
boolean rootDataversePresent = false;
try {
Dataverse rootDataverse = dataverseSvc.findRootDataverse();
if (rootDataverse != null) {
rootDataversePresent = true;
}
} catch (Exception e) {
logger.info("The root dataverse is not present. Don't send a notification to dataverseAdmin.");
}
if (rootDataversePresent) {
userNotificationSvc.sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.CREATEACC, null);
}
ApiToken token = new ApiToken();
token.setTokenString(java.util.UUID.randomUUID().toString());
token.setAuthenticatedUser(au);
Calendar c = Calendar.getInstance();
token.setCreateTime(new Timestamp(c.getTimeInMillis()));
c.roll(Calendar.YEAR, 1);
token.setExpireTime(new Timestamp(c.getTimeInMillis()));
authSvc.save(token);
JsonObjectBuilder resp = Json.createObjectBuilder();
resp.add("user", json(user));
resp.add("authenticatedUser", json(au));
resp.add("apiToken", token.getTokenString());
alr.setInfo("builtinUser:" + user.getUserName() + " authenticatedUser:" + au.getIdentifier());
return ok(resp);
} catch (EJBException ejbx) {
alr.setActionResult(ActionLogRecord.Result.InternalError);
alr.setInfo(alr.getInfo() + "// " + ejbx.getMessage());
if (ejbx.getCausedByException() instanceof IllegalArgumentException) {
return error(Status.BAD_REQUEST, "Bad request: can't save user. " + ejbx.getCausedByException().getMessage());
} else {
logger.log(Level.WARNING, "Error saving user: ", ejbx);
return error(Status.INTERNAL_SERVER_ERROR, "Can't save user: " + ejbx.getMessage());
}
} catch (Exception e) {
logger.log(Level.WARNING, "Error saving user", e);
alr.setActionResult(ActionLogRecord.Result.InternalError);
alr.setInfo(alr.getInfo() + "// " + e.getMessage());
return error(Status.INTERNAL_SERVER_ERROR, "Can't save user: " + e.getMessage());
} finally {
actionLogSvc.log(alr);
}
}
use of edu.harvard.iq.dataverse.Dataverse in project dataverse by IQSS.
the class IndexServiceBean method addOrUpdateDataset.
private String addOrUpdateDataset(IndexableDataset indexableDataset) {
IndexableDataset.DatasetState state = indexableDataset.getDatasetState();
Dataset dataset = indexableDataset.getDatasetVersion().getDataset();
logger.fine("adding or updating Solr document for dataset id " + dataset.getId());
Collection<SolrInputDocument> docs = new ArrayList<>();
List<String> dataversePathSegmentsAccumulator = new ArrayList<>();
List<String> dataverseSegments = new ArrayList<>();
try {
dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator);
} catch (Exception ex) {
logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex);
}
List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments);
// Add Paths for linking dataverses
for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) {
List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>();
List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator);
List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments);
for (String dvPath : linkingDataversePaths) {
dataversePaths.add(dvPath);
}
}
SolrInputDocument solrInputDocument = new SolrInputDocument();
String datasetSolrDocId = indexableDataset.getSolrDocId();
solrInputDocument.addField(SearchFields.ID, datasetSolrDocId);
solrInputDocument.addField(SearchFields.ENTITY_ID, dataset.getId());
String dataverseVersion = systemConfig.getVersion();
solrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
solrInputDocument.addField(SearchFields.IDENTIFIER, dataset.getGlobalId());
solrInputDocument.addField(SearchFields.DATASET_PERSISTENT_ID, dataset.getGlobalId());
solrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
solrInputDocument.addField(SearchFields.TYPE, "datasets");
Date datasetSortByDate = new Date();
Date majorVersionReleaseDate = dataset.getMostRecentMajorVersionReleaseDate();
if (majorVersionReleaseDate != null) {
if (true) {
String msg = "major release date found: " + majorVersionReleaseDate.toString();
logger.fine(msg);
}
datasetSortByDate = majorVersionReleaseDate;
} else {
if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.WORKING_COPY)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
} else if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.DEACCESSIONED)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DEACCESSIONED_STRING);
}
Date createDate = dataset.getCreateDate();
if (createDate != null) {
if (true) {
String msg = "can't find major release date, using create date: " + createDate;
logger.fine(msg);
}
datasetSortByDate = createDate;
} else {
String msg = "can't find major release date or create date, using \"now\"";
logger.info(msg);
datasetSortByDate = new Date();
}
}
solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, datasetSortByDate);
solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(datasetSortByDate));
if (state.equals(indexableDataset.getDatasetState().PUBLISHED)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
// solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataset.getPublicationDate());
} else if (state.equals(indexableDataset.getDatasetState().WORKING_COPY)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
}
addDatasetReleaseDateToSolrDoc(solrInputDocument, dataset);
if (dataset.isHarvested()) {
solrInputDocument.addField(SearchFields.IS_HARVESTED, true);
solrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
} else {
solrInputDocument.addField(SearchFields.IS_HARVESTED, false);
// rootDataverseName);
solrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
}
DatasetVersion datasetVersion = indexableDataset.getDatasetVersion();
String parentDatasetTitle = "TBD";
if (datasetVersion != null) {
solrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(false));
solrInputDocument.addField(SearchFields.DATASET_CITATION_HTML, datasetVersion.getCitation(true));
if (datasetVersion.isInReview()) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
}
for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) {
DatasetFieldType dsfType = dsf.getDatasetFieldType();
String solrFieldSearchable = dsfType.getSolrField().getNameSearchable();
String solrFieldFacetable = dsfType.getSolrField().getNameFacetable();
if (dsf.getValues() != null && !dsf.getValues().isEmpty() && dsf.getValues().get(0) != null && solrFieldSearchable != null) {
logger.fine("indexing " + dsf.getDatasetFieldType().getName() + ":" + dsf.getValues() + " into " + solrFieldSearchable + " and maybe " + solrFieldFacetable);
// if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.INTEGER)) {
if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.EMAIL)) {
// no-op. we want to keep email address out of Solr per https://github.com/IQSS/dataverse/issues/759
} else if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.DATE)) {
String dateAsString = dsf.getValues().get(0);
logger.fine("date as string: " + dateAsString);
if (dateAsString != null && !dateAsString.isEmpty()) {
SimpleDateFormat inputDateyyyy = new SimpleDateFormat("yyyy", Locale.ENGLISH);
try {
/**
* @todo when bean validation is working we
* won't have to convert strings into dates
*/
logger.fine("Trying to convert " + dateAsString + " to a YYYY date from dataset " + dataset.getId());
Date dateAsDate = inputDateyyyy.parse(dateAsString);
SimpleDateFormat yearOnly = new SimpleDateFormat("yyyy");
String datasetFieldFlaggedAsDate = yearOnly.format(dateAsDate);
logger.fine("YYYY only: " + datasetFieldFlaggedAsDate);
// solrInputDocument.addField(solrFieldSearchable, Integer.parseInt(datasetFieldFlaggedAsDate));
solrInputDocument.addField(solrFieldSearchable, datasetFieldFlaggedAsDate);
if (dsfType.getSolrField().isFacetable()) {
// solrInputDocument.addField(solrFieldFacetable, Integer.parseInt(datasetFieldFlaggedAsDate));
solrInputDocument.addField(solrFieldFacetable, datasetFieldFlaggedAsDate);
}
} catch (Exception ex) {
logger.info("unable to convert " + dateAsString + " into YYYY format and couldn't index it (" + dsfType.getName() + ")");
}
}
} else {
if (dsf.getDatasetFieldType().getName().equals("authorAffiliation")) {
/**
* @todo think about how to tie the fact that this
* needs to be multivalued (_ss) because a
* multivalued facet (authorAffilition_ss) is being
* collapsed into here at index time. The business
* logic to determine if a data-driven metadata
* field should be indexed into Solr as a single or
* multiple value lives in the getSolrField() method
* of DatasetField.java
*/
solrInputDocument.addField(SearchFields.AFFILIATION, dsf.getValuesWithoutNaValues());
} else if (dsf.getDatasetFieldType().getName().equals("title")) {
// datasets have titles not names but index title under name as well so we can sort datasets by name along dataverses and files
List<String> possibleTitles = dsf.getValues();
String firstTitle = possibleTitles.get(0);
if (firstTitle != null) {
parentDatasetTitle = firstTitle;
}
solrInputDocument.addField(SearchFields.NAME_SORT, dsf.getValues());
}
if (dsfType.isControlledVocabulary()) {
for (ControlledVocabularyValue controlledVocabularyValue : dsf.getControlledVocabularyValues()) {
if (controlledVocabularyValue.getStrValue().equals(DatasetField.NA_VALUE)) {
continue;
}
solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getStrValue());
if (dsfType.getSolrField().isFacetable()) {
solrInputDocument.addField(solrFieldFacetable, controlledVocabularyValue.getStrValue());
}
}
} else if (dsfType.getFieldType().equals(DatasetFieldType.FieldType.TEXTBOX)) {
// strip HTML
List<String> htmlFreeText = StringUtil.htmlArray2textArray(dsf.getValuesWithoutNaValues());
solrInputDocument.addField(solrFieldSearchable, htmlFreeText);
if (dsfType.getSolrField().isFacetable()) {
solrInputDocument.addField(solrFieldFacetable, htmlFreeText);
}
} else {
// do not strip HTML
solrInputDocument.addField(solrFieldSearchable, dsf.getValuesWithoutNaValues());
if (dsfType.getSolrField().isFacetable()) {
if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassValue)) {
String topicClassificationTerm = getTopicClassificationTermOrTermAndVocabulary(dsf);
if (topicClassificationTerm != null) {
logger.fine(solrFieldFacetable + " gets " + topicClassificationTerm);
solrInputDocument.addField(solrFieldFacetable, topicClassificationTerm);
}
} else {
solrInputDocument.addField(solrFieldFacetable, dsf.getValuesWithoutNaValues());
}
}
}
}
}
}
}
solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
// solrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataset.getOwner().getName());
solrInputDocument.addField(SearchFields.PARENT_ID, dataset.getOwner().getId());
solrInputDocument.addField(SearchFields.PARENT_NAME, dataset.getOwner().getName());
if (state.equals(indexableDataset.getDatasetState().DEACCESSIONED)) {
String deaccessionNote = datasetVersion.getVersionNote();
if (deaccessionNote != null) {
solrInputDocument.addField(SearchFields.DATASET_DEACCESSION_REASON, deaccessionNote);
}
}
docs.add(solrInputDocument);
List<String> filesIndexed = new ArrayList<>();
if (datasetVersion != null) {
List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas();
boolean checkForDuplicateMetadata = false;
if (datasetVersion.isDraft() && dataset.isReleased() && dataset.getReleasedVersion() != null) {
checkForDuplicateMetadata = true;
logger.fine("We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions.");
}
for (FileMetadata fileMetadata : fileMetadatas) {
boolean indexThisMetadata = true;
if (checkForDuplicateMetadata) {
logger.fine("Checking if this file metadata is a duplicate.");
for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) {
if (fileMetadata.getDataFile() != null && fileMetadata.getDataFile().equals(releasedFileMetadata.getDataFile())) {
if (fileMetadata.contentEquals(releasedFileMetadata)) {
indexThisMetadata = false;
logger.fine("This file metadata hasn't changed since the released version; skipping indexing.");
} else {
logger.fine("This file metadata has changed since the released version; we want to index it!");
}
break;
}
}
}
if (indexThisMetadata) {
SolrInputDocument datafileSolrInputDocument = new SolrInputDocument();
Long fileEntityId = fileMetadata.getDataFile().getId();
datafileSolrInputDocument.addField(SearchFields.ENTITY_ID, fileEntityId);
datafileSolrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
datafileSolrInputDocument.addField(SearchFields.IDENTIFIER, fileEntityId);
datafileSolrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
datafileSolrInputDocument.addField(SearchFields.TYPE, "files");
String filenameCompleteFinal = "";
if (fileMetadata != null) {
String filenameComplete = fileMetadata.getLabel();
if (filenameComplete != null) {
String filenameWithoutExtension = "";
// String extension = "";
int i = filenameComplete.lastIndexOf('.');
if (i > 0) {
// extension = filenameComplete.substring(i + 1);
try {
filenameWithoutExtension = filenameComplete.substring(0, i);
datafileSolrInputDocument.addField(SearchFields.FILENAME_WITHOUT_EXTENSION, filenameWithoutExtension);
datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameWithoutExtension);
} catch (IndexOutOfBoundsException ex) {
filenameWithoutExtension = "";
}
} else {
logger.fine("problem with filename '" + filenameComplete + "': no extension? empty string as filename?");
filenameWithoutExtension = filenameComplete;
}
filenameCompleteFinal = filenameComplete;
}
for (String tag : fileMetadata.getCategoriesByName()) {
datafileSolrInputDocument.addField(SearchFields.FILE_TAG, tag);
datafileSolrInputDocument.addField(SearchFields.FILE_TAG_SEARCHABLE, tag);
}
}
datafileSolrInputDocument.addField(SearchFields.NAME, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.NAME_SORT, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
/**
* for rules on sorting files see
* https://docs.google.com/a/harvard.edu/document/d/1DWsEqT8KfheKZmMB3n_VhJpl9nIxiUjai_AIQPAjiyA/edit?usp=sharing
* via https://redmine.hmdc.harvard.edu/issues/3701
*/
Date fileSortByDate = new Date();
DataFile datafile = fileMetadata.getDataFile();
if (datafile != null) {
boolean fileHasBeenReleased = datafile.isReleased();
if (fileHasBeenReleased) {
logger.fine("indexing file with filePublicationTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
Timestamp filePublicationTimestamp = datafile.getPublicationDate();
if (filePublicationTimestamp != null) {
fileSortByDate = filePublicationTimestamp;
} else {
String msg = "filePublicationTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
logger.info(msg);
}
datafileSolrInputDocument.addField(SearchFields.ACCESS, datafile.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
} else {
logger.fine("indexing file with fileCreateTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
Timestamp fileCreateTimestamp = datafile.getCreateDate();
if (fileCreateTimestamp != null) {
fileSortByDate = fileCreateTimestamp;
} else {
String msg = "fileCreateTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
logger.info(msg);
}
datafileSolrInputDocument.addField(SearchFields.ACCESS, fileMetadata.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
}
if (datafile.isHarvested()) {
datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, true);
datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
} else {
datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, false);
datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
}
}
if (fileSortByDate == null) {
if (datasetSortByDate != null) {
logger.info("fileSortByDate was null, assigning datasetSortByDate");
fileSortByDate = datasetSortByDate;
} else {
logger.info("fileSortByDate and datasetSortByDate were null, assigning 'now'");
fileSortByDate = new Date();
}
}
datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, fileSortByDate);
datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(fileSortByDate));
if (majorVersionReleaseDate == null && !datafile.isHarvested()) {
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
}
if (datasetVersion.isInReview()) {
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
}
String fileSolrDocId = solrDocIdentifierFile + fileEntityId;
if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().PUBLISHED)) {
fileSolrDocId = solrDocIdentifierFile + fileEntityId;
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
// datafileSolrInputDocument.addField(SearchFields.PERMS, publicGroupString);
addDatasetReleaseDateToSolrDoc(datafileSolrInputDocument, dataset);
} else if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().WORKING_COPY)) {
fileSolrDocId = solrDocIdentifierFile + fileEntityId + indexableDataset.getDatasetState().getSuffix();
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
}
datafileSolrInputDocument.addField(SearchFields.ID, fileSolrDocId);
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_FRIENDLY, fileMetadata.getDataFile().getFriendlyType());
datafileSolrInputDocument.addField(SearchFields.FILE_CONTENT_TYPE, fileMetadata.getDataFile().getContentType());
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, fileMetadata.getDataFile().getFriendlyType());
// For the file type facets, we have a property file that maps mime types
// to facet-friendly names; "application/fits" should become "FITS", etc.:
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
datafileSolrInputDocument.addField(SearchFields.FILE_SIZE_IN_BYTES, fileMetadata.getDataFile().getFilesize());
if (DataFile.ChecksumType.MD5.equals(fileMetadata.getDataFile().getChecksumType())) {
/**
* @todo Someday we should probably deprecate this
* FILE_MD5 in favor of a combination of
* FILE_CHECKSUM_TYPE and FILE_CHECKSUM_VALUE.
*/
datafileSolrInputDocument.addField(SearchFields.FILE_MD5, fileMetadata.getDataFile().getChecksumValue());
}
datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_TYPE, fileMetadata.getDataFile().getChecksumType().toString());
datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_VALUE, fileMetadata.getDataFile().getChecksumValue());
datafileSolrInputDocument.addField(SearchFields.DESCRIPTION, fileMetadata.getDescription());
datafileSolrInputDocument.addField(SearchFields.FILE_DESCRIPTION, fileMetadata.getDescription());
datafileSolrInputDocument.addField(SearchFields.UNF, fileMetadata.getDataFile().getUnf());
datafileSolrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
// datafileSolrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataFile.getOwner().getOwner().getName());
// datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, dataFile.getDataset().getTitle());
datafileSolrInputDocument.addField(SearchFields.PARENT_ID, fileMetadata.getDataFile().getOwner().getId());
datafileSolrInputDocument.addField(SearchFields.PARENT_IDENTIFIER, fileMetadata.getDataFile().getOwner().getGlobalId());
datafileSolrInputDocument.addField(SearchFields.PARENT_CITATION, fileMetadata.getDataFile().getOwner().getCitation());
datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, parentDatasetTitle);
// names and labels:
if (fileMetadata.getDataFile().isTabularData()) {
List<DataVariable> variables = fileMetadata.getDataFile().getDataTable().getDataVariables();
for (DataVariable var : variables) {
if (var.getName() != null && !var.getName().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName());
}
if (var.getLabel() != null && !var.getLabel().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_LABEL, var.getLabel());
}
}
// (not to be confused with the file categories, indexed above!)
for (DataFileTag tag : fileMetadata.getDataFile().getTags()) {
String tagLabel = tag.getTypeLabel();
datafileSolrInputDocument.addField(SearchFields.TABDATA_TAG, tagLabel);
}
}
if (indexableDataset.isFilesShouldBeIndexed()) {
filesIndexed.add(fileSolrDocId);
docs.add(datafileSolrInputDocument);
}
}
}
}
try {
solrServer.add(docs);
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
try {
solrServer.commit();
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
Long dsId = dataset.getId();
// /Dataset updatedDataset = (Dataset)dvObjectService.updateContentIndexTime(dataset);
// /updatedDataset = null;
// instead of making a call to dvObjectService, let's try and
// modify the index time stamp using the local EntityManager:
DvObject dvObjectToModify = em.find(DvObject.class, dsId);
dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime()));
dvObjectToModify = em.merge(dvObjectToModify);
dvObjectToModify = null;
// return "indexed dataset " + dataset.getId() + " as " + solrDocId + "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString();
return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
}
use of edu.harvard.iq.dataverse.Dataverse in project dataverse by IQSS.
the class SearchFilesServiceBean method getFileView.
public FileView getFileView(DatasetVersion datasetVersion, User user, String userSuppliedQuery) {
Dataverse dataverse = null;
List<String> filterQueries = new ArrayList<>();
filterQueries.add(SearchFields.TYPE + ":" + SearchConstants.FILES);
filterQueries.add(SearchFields.PARENT_ID + ":" + datasetVersion.getDataset().getId());
/**
* @todo In order to support searching for files based on dataset
* version for https://github.com/IQSS/dataverse/issues/2455 we're going
* to need to make the dataset version id searchable, perhaps as part of
* https://github.com/IQSS/dataverse/issues/2038
*/
// filterQueries.add(SearchFields.DATASET_VERSION_ID + ":" + datasetVersion.getId());
String finalQuery = SearchUtil.determineFinalQuery(userSuppliedQuery);
SortBy sortBy = getSortBy(finalQuery);
String sortField = sortBy.getField();
String sortOrder = sortBy.getOrder();
int paginationStart = 0;
boolean onlyDataRelatedToMe = false;
int numResultsPerPage = 25;
SolrQueryResponse solrQueryResponse = null;
try {
HttpServletRequest httpServletRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest();
solrQueryResponse = searchService.search(new DataverseRequest(user, httpServletRequest), dataverse, finalQuery, filterQueries, sortField, sortOrder, paginationStart, onlyDataRelatedToMe, numResultsPerPage);
} catch (SearchException ex) {
logger.info(SearchException.class + " searching for files: " + ex);
return null;
} catch (Exception ex) {
logger.info(Exception.class + " searching for files: " + ex);
return null;
}
return new FileView(solrQueryResponse.getSolrSearchResults(), solrQueryResponse.getFacetCategoryList(), solrQueryResponse.getFilterQueriesActual(), solrQueryResponse.getSolrQuery().getQuery());
}
Aggregations