use of edu.harvard.iq.dataverse.DatasetVersion in project dataverse by IQSS.
the class IndexServiceBean method addOrUpdateDataset.
private String addOrUpdateDataset(IndexableDataset indexableDataset) {
IndexableDataset.DatasetState state = indexableDataset.getDatasetState();
Dataset dataset = indexableDataset.getDatasetVersion().getDataset();
logger.fine("adding or updating Solr document for dataset id " + dataset.getId());
Collection<SolrInputDocument> docs = new ArrayList<>();
List<String> dataversePathSegmentsAccumulator = new ArrayList<>();
List<String> dataverseSegments = new ArrayList<>();
try {
dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator);
} catch (Exception ex) {
logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex);
}
List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments);
// Add Paths for linking dataverses
for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) {
List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>();
List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator);
List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments);
for (String dvPath : linkingDataversePaths) {
dataversePaths.add(dvPath);
}
}
SolrInputDocument solrInputDocument = new SolrInputDocument();
String datasetSolrDocId = indexableDataset.getSolrDocId();
solrInputDocument.addField(SearchFields.ID, datasetSolrDocId);
solrInputDocument.addField(SearchFields.ENTITY_ID, dataset.getId());
String dataverseVersion = systemConfig.getVersion();
solrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
solrInputDocument.addField(SearchFields.IDENTIFIER, dataset.getGlobalId());
solrInputDocument.addField(SearchFields.DATASET_PERSISTENT_ID, dataset.getGlobalId());
solrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
solrInputDocument.addField(SearchFields.TYPE, "datasets");
Date datasetSortByDate = new Date();
Date majorVersionReleaseDate = dataset.getMostRecentMajorVersionReleaseDate();
if (majorVersionReleaseDate != null) {
if (true) {
String msg = "major release date found: " + majorVersionReleaseDate.toString();
logger.fine(msg);
}
datasetSortByDate = majorVersionReleaseDate;
} else {
if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.WORKING_COPY)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
} else if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.DEACCESSIONED)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DEACCESSIONED_STRING);
}
Date createDate = dataset.getCreateDate();
if (createDate != null) {
if (true) {
String msg = "can't find major release date, using create date: " + createDate;
logger.fine(msg);
}
datasetSortByDate = createDate;
} else {
String msg = "can't find major release date or create date, using \"now\"";
logger.info(msg);
datasetSortByDate = new Date();
}
}
solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, datasetSortByDate);
solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(datasetSortByDate));
if (state.equals(indexableDataset.getDatasetState().PUBLISHED)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
// solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataset.getPublicationDate());
} else if (state.equals(indexableDataset.getDatasetState().WORKING_COPY)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
}
addDatasetReleaseDateToSolrDoc(solrInputDocument, dataset);
if (dataset.isHarvested()) {
solrInputDocument.addField(SearchFields.IS_HARVESTED, true);
solrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
} else {
solrInputDocument.addField(SearchFields.IS_HARVESTED, false);
// rootDataverseName);
solrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
}
DatasetVersion datasetVersion = indexableDataset.getDatasetVersion();
String parentDatasetTitle = "TBD";
if (datasetVersion != null) {
solrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(false));
solrInputDocument.addField(SearchFields.DATASET_CITATION_HTML, datasetVersion.getCitation(true));
if (datasetVersion.isInReview()) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
}
for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) {
DatasetFieldType dsfType = dsf.getDatasetFieldType();
String solrFieldSearchable = dsfType.getSolrField().getNameSearchable();
String solrFieldFacetable = dsfType.getSolrField().getNameFacetable();
if (dsf.getValues() != null && !dsf.getValues().isEmpty() && dsf.getValues().get(0) != null && solrFieldSearchable != null) {
logger.fine("indexing " + dsf.getDatasetFieldType().getName() + ":" + dsf.getValues() + " into " + solrFieldSearchable + " and maybe " + solrFieldFacetable);
// if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.INTEGER)) {
if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.EMAIL)) {
// no-op. we want to keep email address out of Solr per https://github.com/IQSS/dataverse/issues/759
} else if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.DATE)) {
String dateAsString = dsf.getValues().get(0);
logger.fine("date as string: " + dateAsString);
if (dateAsString != null && !dateAsString.isEmpty()) {
SimpleDateFormat inputDateyyyy = new SimpleDateFormat("yyyy", Locale.ENGLISH);
try {
/**
* @todo when bean validation is working we
* won't have to convert strings into dates
*/
logger.fine("Trying to convert " + dateAsString + " to a YYYY date from dataset " + dataset.getId());
Date dateAsDate = inputDateyyyy.parse(dateAsString);
SimpleDateFormat yearOnly = new SimpleDateFormat("yyyy");
String datasetFieldFlaggedAsDate = yearOnly.format(dateAsDate);
logger.fine("YYYY only: " + datasetFieldFlaggedAsDate);
// solrInputDocument.addField(solrFieldSearchable, Integer.parseInt(datasetFieldFlaggedAsDate));
solrInputDocument.addField(solrFieldSearchable, datasetFieldFlaggedAsDate);
if (dsfType.getSolrField().isFacetable()) {
// solrInputDocument.addField(solrFieldFacetable, Integer.parseInt(datasetFieldFlaggedAsDate));
solrInputDocument.addField(solrFieldFacetable, datasetFieldFlaggedAsDate);
}
} catch (Exception ex) {
logger.info("unable to convert " + dateAsString + " into YYYY format and couldn't index it (" + dsfType.getName() + ")");
}
}
} else {
if (dsf.getDatasetFieldType().getName().equals("authorAffiliation")) {
/**
* @todo think about how to tie the fact that this
* needs to be multivalued (_ss) because a
* multivalued facet (authorAffilition_ss) is being
* collapsed into here at index time. The business
* logic to determine if a data-driven metadata
* field should be indexed into Solr as a single or
* multiple value lives in the getSolrField() method
* of DatasetField.java
*/
solrInputDocument.addField(SearchFields.AFFILIATION, dsf.getValuesWithoutNaValues());
} else if (dsf.getDatasetFieldType().getName().equals("title")) {
// datasets have titles not names but index title under name as well so we can sort datasets by name along dataverses and files
List<String> possibleTitles = dsf.getValues();
String firstTitle = possibleTitles.get(0);
if (firstTitle != null) {
parentDatasetTitle = firstTitle;
}
solrInputDocument.addField(SearchFields.NAME_SORT, dsf.getValues());
}
if (dsfType.isControlledVocabulary()) {
for (ControlledVocabularyValue controlledVocabularyValue : dsf.getControlledVocabularyValues()) {
if (controlledVocabularyValue.getStrValue().equals(DatasetField.NA_VALUE)) {
continue;
}
solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getStrValue());
if (dsfType.getSolrField().isFacetable()) {
solrInputDocument.addField(solrFieldFacetable, controlledVocabularyValue.getStrValue());
}
}
} else if (dsfType.getFieldType().equals(DatasetFieldType.FieldType.TEXTBOX)) {
// strip HTML
List<String> htmlFreeText = StringUtil.htmlArray2textArray(dsf.getValuesWithoutNaValues());
solrInputDocument.addField(solrFieldSearchable, htmlFreeText);
if (dsfType.getSolrField().isFacetable()) {
solrInputDocument.addField(solrFieldFacetable, htmlFreeText);
}
} else {
// do not strip HTML
solrInputDocument.addField(solrFieldSearchable, dsf.getValuesWithoutNaValues());
if (dsfType.getSolrField().isFacetable()) {
if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassValue)) {
String topicClassificationTerm = getTopicClassificationTermOrTermAndVocabulary(dsf);
if (topicClassificationTerm != null) {
logger.fine(solrFieldFacetable + " gets " + topicClassificationTerm);
solrInputDocument.addField(solrFieldFacetable, topicClassificationTerm);
}
} else {
solrInputDocument.addField(solrFieldFacetable, dsf.getValuesWithoutNaValues());
}
}
}
}
}
}
}
solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
// solrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataset.getOwner().getName());
solrInputDocument.addField(SearchFields.PARENT_ID, dataset.getOwner().getId());
solrInputDocument.addField(SearchFields.PARENT_NAME, dataset.getOwner().getName());
if (state.equals(indexableDataset.getDatasetState().DEACCESSIONED)) {
String deaccessionNote = datasetVersion.getVersionNote();
if (deaccessionNote != null) {
solrInputDocument.addField(SearchFields.DATASET_DEACCESSION_REASON, deaccessionNote);
}
}
docs.add(solrInputDocument);
List<String> filesIndexed = new ArrayList<>();
if (datasetVersion != null) {
List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas();
boolean checkForDuplicateMetadata = false;
if (datasetVersion.isDraft() && dataset.isReleased() && dataset.getReleasedVersion() != null) {
checkForDuplicateMetadata = true;
logger.fine("We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions.");
}
for (FileMetadata fileMetadata : fileMetadatas) {
boolean indexThisMetadata = true;
if (checkForDuplicateMetadata) {
logger.fine("Checking if this file metadata is a duplicate.");
for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) {
if (fileMetadata.getDataFile() != null && fileMetadata.getDataFile().equals(releasedFileMetadata.getDataFile())) {
if (fileMetadata.contentEquals(releasedFileMetadata)) {
indexThisMetadata = false;
logger.fine("This file metadata hasn't changed since the released version; skipping indexing.");
} else {
logger.fine("This file metadata has changed since the released version; we want to index it!");
}
break;
}
}
}
if (indexThisMetadata) {
SolrInputDocument datafileSolrInputDocument = new SolrInputDocument();
Long fileEntityId = fileMetadata.getDataFile().getId();
datafileSolrInputDocument.addField(SearchFields.ENTITY_ID, fileEntityId);
datafileSolrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
datafileSolrInputDocument.addField(SearchFields.IDENTIFIER, fileEntityId);
datafileSolrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
datafileSolrInputDocument.addField(SearchFields.TYPE, "files");
String filenameCompleteFinal = "";
if (fileMetadata != null) {
String filenameComplete = fileMetadata.getLabel();
if (filenameComplete != null) {
String filenameWithoutExtension = "";
// String extension = "";
int i = filenameComplete.lastIndexOf('.');
if (i > 0) {
// extension = filenameComplete.substring(i + 1);
try {
filenameWithoutExtension = filenameComplete.substring(0, i);
datafileSolrInputDocument.addField(SearchFields.FILENAME_WITHOUT_EXTENSION, filenameWithoutExtension);
datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameWithoutExtension);
} catch (IndexOutOfBoundsException ex) {
filenameWithoutExtension = "";
}
} else {
logger.fine("problem with filename '" + filenameComplete + "': no extension? empty string as filename?");
filenameWithoutExtension = filenameComplete;
}
filenameCompleteFinal = filenameComplete;
}
for (String tag : fileMetadata.getCategoriesByName()) {
datafileSolrInputDocument.addField(SearchFields.FILE_TAG, tag);
datafileSolrInputDocument.addField(SearchFields.FILE_TAG_SEARCHABLE, tag);
}
}
datafileSolrInputDocument.addField(SearchFields.NAME, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.NAME_SORT, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
/**
* for rules on sorting files see
* https://docs.google.com/a/harvard.edu/document/d/1DWsEqT8KfheKZmMB3n_VhJpl9nIxiUjai_AIQPAjiyA/edit?usp=sharing
* via https://redmine.hmdc.harvard.edu/issues/3701
*/
Date fileSortByDate = new Date();
DataFile datafile = fileMetadata.getDataFile();
if (datafile != null) {
boolean fileHasBeenReleased = datafile.isReleased();
if (fileHasBeenReleased) {
logger.fine("indexing file with filePublicationTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
Timestamp filePublicationTimestamp = datafile.getPublicationDate();
if (filePublicationTimestamp != null) {
fileSortByDate = filePublicationTimestamp;
} else {
String msg = "filePublicationTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
logger.info(msg);
}
datafileSolrInputDocument.addField(SearchFields.ACCESS, datafile.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
} else {
logger.fine("indexing file with fileCreateTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
Timestamp fileCreateTimestamp = datafile.getCreateDate();
if (fileCreateTimestamp != null) {
fileSortByDate = fileCreateTimestamp;
} else {
String msg = "fileCreateTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
logger.info(msg);
}
datafileSolrInputDocument.addField(SearchFields.ACCESS, fileMetadata.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
}
if (datafile.isHarvested()) {
datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, true);
datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
} else {
datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, false);
datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
}
}
if (fileSortByDate == null) {
if (datasetSortByDate != null) {
logger.info("fileSortByDate was null, assigning datasetSortByDate");
fileSortByDate = datasetSortByDate;
} else {
logger.info("fileSortByDate and datasetSortByDate were null, assigning 'now'");
fileSortByDate = new Date();
}
}
datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, fileSortByDate);
datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(fileSortByDate));
if (majorVersionReleaseDate == null && !datafile.isHarvested()) {
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
}
if (datasetVersion.isInReview()) {
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
}
String fileSolrDocId = solrDocIdentifierFile + fileEntityId;
if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().PUBLISHED)) {
fileSolrDocId = solrDocIdentifierFile + fileEntityId;
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
// datafileSolrInputDocument.addField(SearchFields.PERMS, publicGroupString);
addDatasetReleaseDateToSolrDoc(datafileSolrInputDocument, dataset);
} else if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().WORKING_COPY)) {
fileSolrDocId = solrDocIdentifierFile + fileEntityId + indexableDataset.getDatasetState().getSuffix();
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
}
datafileSolrInputDocument.addField(SearchFields.ID, fileSolrDocId);
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_FRIENDLY, fileMetadata.getDataFile().getFriendlyType());
datafileSolrInputDocument.addField(SearchFields.FILE_CONTENT_TYPE, fileMetadata.getDataFile().getContentType());
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, fileMetadata.getDataFile().getFriendlyType());
// For the file type facets, we have a property file that maps mime types
// to facet-friendly names; "application/fits" should become "FITS", etc.:
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
datafileSolrInputDocument.addField(SearchFields.FILE_SIZE_IN_BYTES, fileMetadata.getDataFile().getFilesize());
if (DataFile.ChecksumType.MD5.equals(fileMetadata.getDataFile().getChecksumType())) {
/**
* @todo Someday we should probably deprecate this
* FILE_MD5 in favor of a combination of
* FILE_CHECKSUM_TYPE and FILE_CHECKSUM_VALUE.
*/
datafileSolrInputDocument.addField(SearchFields.FILE_MD5, fileMetadata.getDataFile().getChecksumValue());
}
datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_TYPE, fileMetadata.getDataFile().getChecksumType().toString());
datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_VALUE, fileMetadata.getDataFile().getChecksumValue());
datafileSolrInputDocument.addField(SearchFields.DESCRIPTION, fileMetadata.getDescription());
datafileSolrInputDocument.addField(SearchFields.FILE_DESCRIPTION, fileMetadata.getDescription());
datafileSolrInputDocument.addField(SearchFields.UNF, fileMetadata.getDataFile().getUnf());
datafileSolrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
// datafileSolrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataFile.getOwner().getOwner().getName());
// datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, dataFile.getDataset().getTitle());
datafileSolrInputDocument.addField(SearchFields.PARENT_ID, fileMetadata.getDataFile().getOwner().getId());
datafileSolrInputDocument.addField(SearchFields.PARENT_IDENTIFIER, fileMetadata.getDataFile().getOwner().getGlobalId());
datafileSolrInputDocument.addField(SearchFields.PARENT_CITATION, fileMetadata.getDataFile().getOwner().getCitation());
datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, parentDatasetTitle);
// names and labels:
if (fileMetadata.getDataFile().isTabularData()) {
List<DataVariable> variables = fileMetadata.getDataFile().getDataTable().getDataVariables();
for (DataVariable var : variables) {
if (var.getName() != null && !var.getName().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName());
}
if (var.getLabel() != null && !var.getLabel().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_LABEL, var.getLabel());
}
}
// (not to be confused with the file categories, indexed above!)
for (DataFileTag tag : fileMetadata.getDataFile().getTags()) {
String tagLabel = tag.getTypeLabel();
datafileSolrInputDocument.addField(SearchFields.TABDATA_TAG, tagLabel);
}
}
if (indexableDataset.isFilesShouldBeIndexed()) {
filesIndexed.add(fileSolrDocId);
docs.add(datafileSolrInputDocument);
}
}
}
}
try {
solrServer.add(docs);
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
try {
solrServer.commit();
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
Long dsId = dataset.getId();
// /Dataset updatedDataset = (Dataset)dvObjectService.updateContentIndexTime(dataset);
// /updatedDataset = null;
// instead of making a call to dvObjectService, let's try and
// modify the index time stamp using the local EntityManager:
DvObject dvObjectToModify = em.find(DvObject.class, dsId);
dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime()));
dvObjectToModify = em.merge(dvObjectToModify);
dvObjectToModify = null;
// return "indexed dataset " + dataset.getId() + " as " + solrDocId + "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString();
return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
}
use of edu.harvard.iq.dataverse.DatasetVersion in project dataverse by IQSS.
the class SearchPermissionsServiceBean method getDesiredCards.
public Map<DatasetVersion.VersionState, Boolean> getDesiredCards(Dataset dataset) {
Map<DatasetVersion.VersionState, Boolean> desiredCards = new LinkedHashMap<>();
DatasetVersion latestVersion = dataset.getLatestVersion();
DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState();
DatasetVersion releasedVersion = dataset.getReleasedVersion();
boolean atLeastOnePublishedVersion = false;
if (releasedVersion != null) {
atLeastOnePublishedVersion = true;
} else {
atLeastOnePublishedVersion = false;
}
if (atLeastOnePublishedVersion == false) {
if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) {
desiredCards.put(DatasetVersion.VersionState.DRAFT, true);
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false);
desiredCards.put(DatasetVersion.VersionState.RELEASED, false);
} else if (latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) {
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, true);
desiredCards.put(DatasetVersion.VersionState.RELEASED, false);
desiredCards.put(DatasetVersion.VersionState.DRAFT, false);
} else {
String msg = "No-op. Unexpected condition reached: There is no published version and the latest published version is neither " + DatasetVersion.VersionState.DRAFT + " nor " + DatasetVersion.VersionState.DEACCESSIONED + ". Its state is " + latestVersionState + ".";
logger.info(msg);
}
} else if (atLeastOnePublishedVersion == true) {
if (latestVersionState.equals(DatasetVersion.VersionState.RELEASED) || latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) {
desiredCards.put(DatasetVersion.VersionState.RELEASED, true);
desiredCards.put(DatasetVersion.VersionState.DRAFT, false);
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false);
} else if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) {
desiredCards.put(DatasetVersion.VersionState.DRAFT, true);
desiredCards.put(DatasetVersion.VersionState.RELEASED, true);
desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false);
} else {
String msg = "No-op. Unexpected condition reached: There is at least one published version but the latest version is neither published nor draft";
logger.info(msg);
}
} else {
String msg = "No-op. Unexpected condition reached: Has a version been published or not?";
logger.info(msg);
}
return desiredCards;
}
use of edu.harvard.iq.dataverse.DatasetVersion in project dataverse by IQSS.
the class FileRecordProcessor method processItem.
// TODO: This method may be meaningles when used in the context of a "package file"
// batch import. See if it can be modified/improved? -- L.A. 4.6.1
@Override
public Object processItem(Object object) throws Exception {
DatasetVersion version = dataset.getLatestVersion();
String path = object.toString();
String gid = dataset.getAuthority() + dataset.getDoiSeparator() + dataset.getIdentifier();
String relativePath = path.substring(path.indexOf(gid) + gid.length() + 1);
// skip if it already exists
DataFile datafile = dataFileServiceBean.findByStorageIdandDatasetVersion(relativePath, version);
if (datafile == null) {
return new File(path);
} else {
Logger.getLogger("job-" + jobContext.getInstanceId()).log(Level.INFO, "Skipping " + relativePath + ", DataFile already exists.");
return null;
}
}
use of edu.harvard.iq.dataverse.DatasetVersion in project dataverse by IQSS.
the class FileRecordWriter method createDataFile.
/**
* Create a DatasetFile and corresponding FileMetadata for a file on the filesystem and add it to the
* latest dataset version (if the user has AddDataset permissions for the dataset).
* @param file file to create dataFile from
* @return datafile
*/
private DataFile createDataFile(File file) {
DatasetVersion version = dataset.getLatestVersion();
String path = file.getAbsolutePath();
String gid = dataset.getAuthority() + dataset.getDoiSeparator() + dataset.getIdentifier();
String relativePath = path.substring(path.indexOf(gid) + gid.length() + 1);
// we don't determine mime type
DataFile datafile = new DataFile("application/octet-stream");
datafile.setStorageIdentifier(relativePath);
datafile.setFilesize(file.length());
datafile.setModificationTime(new Timestamp(new Date().getTime()));
datafile.setCreateDate(new Timestamp(new Date().getTime()));
datafile.setPermissionModificationTime(new Timestamp(new Date().getTime()));
datafile.setOwner(dataset);
datafile.setIngestDone();
// check system property first, otherwise use the batch job property
String jobChecksumType;
if (System.getProperty("checksumType") != null) {
jobChecksumType = System.getProperty("checksumType");
} else {
jobChecksumType = checksumType;
}
// initial default
datafile.setChecksumType(DataFile.ChecksumType.SHA1);
for (DataFile.ChecksumType type : DataFile.ChecksumType.values()) {
if (jobChecksumType.equalsIgnoreCase(type.name())) {
datafile.setChecksumType(type);
break;
}
}
// lookup the checksum value in the job's manifest hashmap
if (jobContext.getTransientUserData() != null) {
String checksumVal = ((Map<String, String>) jobContext.getTransientUserData()).get(relativePath);
if (checksumVal != null) {
datafile.setChecksumValue(checksumVal);
// remove the key, so we can check for unused checksums when the job is complete
((Map<String, String>) jobContext.getTransientUserData()).remove(relativePath);
} else {
datafile.setChecksumValue("Unknown");
getJobLogger().log(Level.WARNING, "Unable to find checksum in manifest for: " + file.getAbsolutePath());
}
} else {
getJobLogger().log(Level.SEVERE, "No checksum hashmap found in transientUserData");
jobContext.setExitStatus("FAILED");
return null;
}
// set metadata and add to latest version
FileMetadata fmd = new FileMetadata();
fmd.setLabel(file.getName());
// set the subdirectory if there is one
if (relativePath.contains(File.separator)) {
fmd.setDirectoryLabel(relativePath.replace(File.separator + file.getName(), ""));
}
fmd.setDataFile(datafile);
datafile.getFileMetadatas().add(fmd);
if (version.getFileMetadatas() == null)
version.setFileMetadatas(new ArrayList<>());
version.getFileMetadatas().add(fmd);
fmd.setDatasetVersion(version);
datafile = dataFileServiceBean.save(datafile);
return datafile;
}
use of edu.harvard.iq.dataverse.DatasetVersion in project dataverse by IQSS.
the class FileRecordWriter method updateDatasetVersion.
// utils
/**
* Update the dataset version using the command engine so permissions and constraints are enforced.
* Log errors to both the glassfish log and in the job context, as the exit status "failed".
*
* @param version dataset version
*/
private void updateDatasetVersion(DatasetVersion version) {
// update version using the command engine to enforce user permissions and constraints
if (dataset.getVersions().size() == 1 && version.getVersionState() == DatasetVersion.VersionState.DRAFT) {
try {
Command<DatasetVersion> cmd;
cmd = new UpdateDatasetVersionCommand(new DataverseRequest(user, (HttpServletRequest) null), version);
commandEngine.submit(cmd);
} catch (CommandException ex) {
String commandError = "CommandException updating DatasetVersion from batch job: " + ex.getMessage();
getJobLogger().log(Level.SEVERE, commandError);
jobContext.setExitStatus("FAILED");
}
} else {
String constraintError = "ConstraintException updating DatasetVersion form batch job: dataset must be a " + "single version in draft mode.";
getJobLogger().log(Level.SEVERE, constraintError);
jobContext.setExitStatus("FAILED");
}
}
Aggregations