use of edu.harvard.iq.dataverse.DataFileTag in project dataverse by IQSS.
the class JsonPrinterTest method testGetFileCategories.
@Test
public void testGetFileCategories() {
FileMetadata fmd = new FileMetadata();
DatasetVersion dsVersion = new DatasetVersion();
DataFile dataFile = new DataFile();
List<DataFileTag> dataFileTags = new ArrayList<>();
DataFileTag tag = new DataFileTag();
tag.setTypeByLabel("Survey");
dataFileTags.add(tag);
dataFile.setTags(dataFileTags);
fmd.setDatasetVersion(dsVersion);
fmd.setDataFile(dataFile);
List<DataFileCategory> fileCategories = new ArrayList<>();
DataFileCategory dataFileCategory = new DataFileCategory();
dataFileCategory.setName("Data");
fileCategories.add(dataFileCategory);
fmd.setCategories(fileCategories);
JsonObjectBuilder job = JsonPrinter.json(fmd);
assertNotNull(job);
JsonObject jsonObject = job.build();
System.out.println("json: " + jsonObject);
assertEquals("", jsonObject.getString("description"));
assertEquals("", jsonObject.getString("label"));
assertEquals("Data", jsonObject.getJsonArray("categories").getString(0));
assertEquals("", jsonObject.getJsonObject("dataFile").getString("filename"));
assertEquals(-1, jsonObject.getJsonObject("dataFile").getInt("filesize"));
assertEquals("UNKNOWN", jsonObject.getJsonObject("dataFile").getString("originalFormatLabel"));
assertEquals(-1, jsonObject.getJsonObject("dataFile").getInt("rootDataFileId"));
assertEquals("Survey", jsonObject.getJsonObject("dataFile").getJsonArray("tabularTags").getString(0));
}
use of edu.harvard.iq.dataverse.DataFileTag in project dataverse by IQSS.
the class OptionalFileParams method addFileDataTagsToFile.
/**
* NOTE: DataFile tags can only be added to tabular files
*
* - e.g. The file must already be ingested.
*
* Because of this, these tags cannot be used when "Adding" a file via
* the API--e.g. b/c the file will note yet be ingested
*
* @param df
*/
private void addFileDataTagsToFile(DataFile df) throws DataFileTagException {
if (df == null) {
throw new NullPointerException("The DataFile (df) cannot be null!");
}
// --------------------------------------------------
if (!hasFileDataTags()) {
return;
}
// --------------------------------------------------
if (!df.isTabularData()) {
String errMsg = ResourceBundle.getBundle("Bundle").getString("file.metadata.datafiletag.not_tabular");
throw new DataFileTagException(errMsg);
}
// --------------------------------------------------
// Get existing tag list and convert it to list of strings (labels)
// --------------------------------------------------
List<DataFileTag> existingDataFileTags = df.getTags();
List<String> currentLabels;
if (existingDataFileTags == null) {
// nothing, just make an empty list
currentLabels = new ArrayList<>();
} else {
// Yes, get the labels in a list
currentLabels = df.getTags().stream().map(x -> x.getTypeLabel()).collect(Collectors.toList());
}
// --------------------------------------------------
// Iterate through and add any new labels
// --------------------------------------------------
DataFileTag newTagObj;
for (String tagLabel : this.getDataFileTags()) {
if (!currentLabels.contains(tagLabel)) {
//
if (DataFileTag.isDataFileTag(tagLabel)) {
newTagObj = new DataFileTag();
newTagObj.setDataFile(df);
newTagObj.setTypeByLabel(tagLabel);
df.addTag(newTagObj);
}
}
}
}
use of edu.harvard.iq.dataverse.DataFileTag in project dataverse by IQSS.
the class IndexServiceBean method addOrUpdateDataset.
private String addOrUpdateDataset(IndexableDataset indexableDataset) {
IndexableDataset.DatasetState state = indexableDataset.getDatasetState();
Dataset dataset = indexableDataset.getDatasetVersion().getDataset();
logger.fine("adding or updating Solr document for dataset id " + dataset.getId());
Collection<SolrInputDocument> docs = new ArrayList<>();
List<String> dataversePathSegmentsAccumulator = new ArrayList<>();
List<String> dataverseSegments = new ArrayList<>();
try {
dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator);
} catch (Exception ex) {
logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex);
}
List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments);
// Add Paths for linking dataverses
for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) {
List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>();
List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator);
List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments);
for (String dvPath : linkingDataversePaths) {
dataversePaths.add(dvPath);
}
}
SolrInputDocument solrInputDocument = new SolrInputDocument();
String datasetSolrDocId = indexableDataset.getSolrDocId();
solrInputDocument.addField(SearchFields.ID, datasetSolrDocId);
solrInputDocument.addField(SearchFields.ENTITY_ID, dataset.getId());
String dataverseVersion = systemConfig.getVersion();
solrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
solrInputDocument.addField(SearchFields.IDENTIFIER, dataset.getGlobalId());
solrInputDocument.addField(SearchFields.DATASET_PERSISTENT_ID, dataset.getGlobalId());
solrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
solrInputDocument.addField(SearchFields.TYPE, "datasets");
Date datasetSortByDate = new Date();
Date majorVersionReleaseDate = dataset.getMostRecentMajorVersionReleaseDate();
if (majorVersionReleaseDate != null) {
if (true) {
String msg = "major release date found: " + majorVersionReleaseDate.toString();
logger.fine(msg);
}
datasetSortByDate = majorVersionReleaseDate;
} else {
if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.WORKING_COPY)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
} else if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.DEACCESSIONED)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DEACCESSIONED_STRING);
}
Date createDate = dataset.getCreateDate();
if (createDate != null) {
if (true) {
String msg = "can't find major release date, using create date: " + createDate;
logger.fine(msg);
}
datasetSortByDate = createDate;
} else {
String msg = "can't find major release date or create date, using \"now\"";
logger.info(msg);
datasetSortByDate = new Date();
}
}
solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, datasetSortByDate);
solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(datasetSortByDate));
if (state.equals(indexableDataset.getDatasetState().PUBLISHED)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
// solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataset.getPublicationDate());
} else if (state.equals(indexableDataset.getDatasetState().WORKING_COPY)) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
}
addDatasetReleaseDateToSolrDoc(solrInputDocument, dataset);
if (dataset.isHarvested()) {
solrInputDocument.addField(SearchFields.IS_HARVESTED, true);
solrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
} else {
solrInputDocument.addField(SearchFields.IS_HARVESTED, false);
// rootDataverseName);
solrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
}
DatasetVersion datasetVersion = indexableDataset.getDatasetVersion();
String parentDatasetTitle = "TBD";
if (datasetVersion != null) {
solrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(false));
solrInputDocument.addField(SearchFields.DATASET_CITATION_HTML, datasetVersion.getCitation(true));
if (datasetVersion.isInReview()) {
solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
}
for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) {
DatasetFieldType dsfType = dsf.getDatasetFieldType();
String solrFieldSearchable = dsfType.getSolrField().getNameSearchable();
String solrFieldFacetable = dsfType.getSolrField().getNameFacetable();
if (dsf.getValues() != null && !dsf.getValues().isEmpty() && dsf.getValues().get(0) != null && solrFieldSearchable != null) {
logger.fine("indexing " + dsf.getDatasetFieldType().getName() + ":" + dsf.getValues() + " into " + solrFieldSearchable + " and maybe " + solrFieldFacetable);
// if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.INTEGER)) {
if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.EMAIL)) {
// no-op. we want to keep email address out of Solr per https://github.com/IQSS/dataverse/issues/759
} else if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.DATE)) {
String dateAsString = dsf.getValues().get(0);
logger.fine("date as string: " + dateAsString);
if (dateAsString != null && !dateAsString.isEmpty()) {
SimpleDateFormat inputDateyyyy = new SimpleDateFormat("yyyy", Locale.ENGLISH);
try {
/**
* @todo when bean validation is working we
* won't have to convert strings into dates
*/
logger.fine("Trying to convert " + dateAsString + " to a YYYY date from dataset " + dataset.getId());
Date dateAsDate = inputDateyyyy.parse(dateAsString);
SimpleDateFormat yearOnly = new SimpleDateFormat("yyyy");
String datasetFieldFlaggedAsDate = yearOnly.format(dateAsDate);
logger.fine("YYYY only: " + datasetFieldFlaggedAsDate);
// solrInputDocument.addField(solrFieldSearchable, Integer.parseInt(datasetFieldFlaggedAsDate));
solrInputDocument.addField(solrFieldSearchable, datasetFieldFlaggedAsDate);
if (dsfType.getSolrField().isFacetable()) {
// solrInputDocument.addField(solrFieldFacetable, Integer.parseInt(datasetFieldFlaggedAsDate));
solrInputDocument.addField(solrFieldFacetable, datasetFieldFlaggedAsDate);
}
} catch (Exception ex) {
logger.info("unable to convert " + dateAsString + " into YYYY format and couldn't index it (" + dsfType.getName() + ")");
}
}
} else {
if (dsf.getDatasetFieldType().getName().equals("authorAffiliation")) {
/**
* @todo think about how to tie the fact that this
* needs to be multivalued (_ss) because a
* multivalued facet (authorAffilition_ss) is being
* collapsed into here at index time. The business
* logic to determine if a data-driven metadata
* field should be indexed into Solr as a single or
* multiple value lives in the getSolrField() method
* of DatasetField.java
*/
solrInputDocument.addField(SearchFields.AFFILIATION, dsf.getValuesWithoutNaValues());
} else if (dsf.getDatasetFieldType().getName().equals("title")) {
// datasets have titles not names but index title under name as well so we can sort datasets by name along dataverses and files
List<String> possibleTitles = dsf.getValues();
String firstTitle = possibleTitles.get(0);
if (firstTitle != null) {
parentDatasetTitle = firstTitle;
}
solrInputDocument.addField(SearchFields.NAME_SORT, dsf.getValues());
}
if (dsfType.isControlledVocabulary()) {
for (ControlledVocabularyValue controlledVocabularyValue : dsf.getControlledVocabularyValues()) {
if (controlledVocabularyValue.getStrValue().equals(DatasetField.NA_VALUE)) {
continue;
}
solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getStrValue());
if (dsfType.getSolrField().isFacetable()) {
solrInputDocument.addField(solrFieldFacetable, controlledVocabularyValue.getStrValue());
}
}
} else if (dsfType.getFieldType().equals(DatasetFieldType.FieldType.TEXTBOX)) {
// strip HTML
List<String> htmlFreeText = StringUtil.htmlArray2textArray(dsf.getValuesWithoutNaValues());
solrInputDocument.addField(solrFieldSearchable, htmlFreeText);
if (dsfType.getSolrField().isFacetable()) {
solrInputDocument.addField(solrFieldFacetable, htmlFreeText);
}
} else {
// do not strip HTML
solrInputDocument.addField(solrFieldSearchable, dsf.getValuesWithoutNaValues());
if (dsfType.getSolrField().isFacetable()) {
if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassValue)) {
String topicClassificationTerm = getTopicClassificationTermOrTermAndVocabulary(dsf);
if (topicClassificationTerm != null) {
logger.fine(solrFieldFacetable + " gets " + topicClassificationTerm);
solrInputDocument.addField(solrFieldFacetable, topicClassificationTerm);
}
} else {
solrInputDocument.addField(solrFieldFacetable, dsf.getValuesWithoutNaValues());
}
}
}
}
}
}
}
solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
// solrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataset.getOwner().getName());
solrInputDocument.addField(SearchFields.PARENT_ID, dataset.getOwner().getId());
solrInputDocument.addField(SearchFields.PARENT_NAME, dataset.getOwner().getName());
if (state.equals(indexableDataset.getDatasetState().DEACCESSIONED)) {
String deaccessionNote = datasetVersion.getVersionNote();
if (deaccessionNote != null) {
solrInputDocument.addField(SearchFields.DATASET_DEACCESSION_REASON, deaccessionNote);
}
}
docs.add(solrInputDocument);
List<String> filesIndexed = new ArrayList<>();
if (datasetVersion != null) {
List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas();
boolean checkForDuplicateMetadata = false;
if (datasetVersion.isDraft() && dataset.isReleased() && dataset.getReleasedVersion() != null) {
checkForDuplicateMetadata = true;
logger.fine("We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions.");
}
for (FileMetadata fileMetadata : fileMetadatas) {
boolean indexThisMetadata = true;
if (checkForDuplicateMetadata) {
logger.fine("Checking if this file metadata is a duplicate.");
for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) {
if (fileMetadata.getDataFile() != null && fileMetadata.getDataFile().equals(releasedFileMetadata.getDataFile())) {
if (fileMetadata.contentEquals(releasedFileMetadata)) {
indexThisMetadata = false;
logger.fine("This file metadata hasn't changed since the released version; skipping indexing.");
} else {
logger.fine("This file metadata has changed since the released version; we want to index it!");
}
break;
}
}
}
if (indexThisMetadata) {
SolrInputDocument datafileSolrInputDocument = new SolrInputDocument();
Long fileEntityId = fileMetadata.getDataFile().getId();
datafileSolrInputDocument.addField(SearchFields.ENTITY_ID, fileEntityId);
datafileSolrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion);
datafileSolrInputDocument.addField(SearchFields.IDENTIFIER, fileEntityId);
datafileSolrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL());
datafileSolrInputDocument.addField(SearchFields.TYPE, "files");
String filenameCompleteFinal = "";
if (fileMetadata != null) {
String filenameComplete = fileMetadata.getLabel();
if (filenameComplete != null) {
String filenameWithoutExtension = "";
// String extension = "";
int i = filenameComplete.lastIndexOf('.');
if (i > 0) {
// extension = filenameComplete.substring(i + 1);
try {
filenameWithoutExtension = filenameComplete.substring(0, i);
datafileSolrInputDocument.addField(SearchFields.FILENAME_WITHOUT_EXTENSION, filenameWithoutExtension);
datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameWithoutExtension);
} catch (IndexOutOfBoundsException ex) {
filenameWithoutExtension = "";
}
} else {
logger.fine("problem with filename '" + filenameComplete + "': no extension? empty string as filename?");
filenameWithoutExtension = filenameComplete;
}
filenameCompleteFinal = filenameComplete;
}
for (String tag : fileMetadata.getCategoriesByName()) {
datafileSolrInputDocument.addField(SearchFields.FILE_TAG, tag);
datafileSolrInputDocument.addField(SearchFields.FILE_TAG_SEARCHABLE, tag);
}
}
datafileSolrInputDocument.addField(SearchFields.NAME, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.NAME_SORT, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameCompleteFinal);
datafileSolrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId());
/**
* for rules on sorting files see
* https://docs.google.com/a/harvard.edu/document/d/1DWsEqT8KfheKZmMB3n_VhJpl9nIxiUjai_AIQPAjiyA/edit?usp=sharing
* via https://redmine.hmdc.harvard.edu/issues/3701
*/
Date fileSortByDate = new Date();
DataFile datafile = fileMetadata.getDataFile();
if (datafile != null) {
boolean fileHasBeenReleased = datafile.isReleased();
if (fileHasBeenReleased) {
logger.fine("indexing file with filePublicationTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
Timestamp filePublicationTimestamp = datafile.getPublicationDate();
if (filePublicationTimestamp != null) {
fileSortByDate = filePublicationTimestamp;
} else {
String msg = "filePublicationTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
logger.info(msg);
}
datafileSolrInputDocument.addField(SearchFields.ACCESS, datafile.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
} else {
logger.fine("indexing file with fileCreateTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")");
Timestamp fileCreateTimestamp = datafile.getCreateDate();
if (fileCreateTimestamp != null) {
fileSortByDate = fileCreateTimestamp;
} else {
String msg = "fileCreateTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")";
logger.info(msg);
}
datafileSolrInputDocument.addField(SearchFields.ACCESS, fileMetadata.isRestricted() ? SearchConstants.RESTRICTED : SearchConstants.PUBLIC);
}
if (datafile.isHarvested()) {
datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, true);
datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, HARVESTED);
} else {
datafileSolrInputDocument.addField(SearchFields.IS_HARVESTED, false);
datafileSolrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName());
}
}
if (fileSortByDate == null) {
if (datasetSortByDate != null) {
logger.info("fileSortByDate was null, assigning datasetSortByDate");
fileSortByDate = datasetSortByDate;
} else {
logger.info("fileSortByDate and datasetSortByDate were null, assigning 'now'");
fileSortByDate = new Date();
}
}
datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, fileSortByDate);
datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(fileSortByDate));
if (majorVersionReleaseDate == null && !datafile.isHarvested()) {
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING);
}
if (datasetVersion.isInReview()) {
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING);
}
String fileSolrDocId = solrDocIdentifierFile + fileEntityId;
if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().PUBLISHED)) {
fileSolrDocId = solrDocIdentifierFile + fileEntityId;
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING);
// datafileSolrInputDocument.addField(SearchFields.PERMS, publicGroupString);
addDatasetReleaseDateToSolrDoc(datafileSolrInputDocument, dataset);
} else if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().WORKING_COPY)) {
fileSolrDocId = solrDocIdentifierFile + fileEntityId + indexableDataset.getDatasetState().getSuffix();
datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING);
}
datafileSolrInputDocument.addField(SearchFields.ID, fileSolrDocId);
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_FRIENDLY, fileMetadata.getDataFile().getFriendlyType());
datafileSolrInputDocument.addField(SearchFields.FILE_CONTENT_TYPE, fileMetadata.getDataFile().getContentType());
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, fileMetadata.getDataFile().getFriendlyType());
// For the file type facets, we have a property file that maps mime types
// to facet-friendly names; "application/fits" should become "FITS", etc.:
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, FileUtil.getFacetFileType(fileMetadata.getDataFile()));
datafileSolrInputDocument.addField(SearchFields.FILE_SIZE_IN_BYTES, fileMetadata.getDataFile().getFilesize());
if (DataFile.ChecksumType.MD5.equals(fileMetadata.getDataFile().getChecksumType())) {
/**
* @todo Someday we should probably deprecate this
* FILE_MD5 in favor of a combination of
* FILE_CHECKSUM_TYPE and FILE_CHECKSUM_VALUE.
*/
datafileSolrInputDocument.addField(SearchFields.FILE_MD5, fileMetadata.getDataFile().getChecksumValue());
}
datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_TYPE, fileMetadata.getDataFile().getChecksumType().toString());
datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_VALUE, fileMetadata.getDataFile().getChecksumValue());
datafileSolrInputDocument.addField(SearchFields.DESCRIPTION, fileMetadata.getDescription());
datafileSolrInputDocument.addField(SearchFields.FILE_DESCRIPTION, fileMetadata.getDescription());
datafileSolrInputDocument.addField(SearchFields.UNF, fileMetadata.getDataFile().getUnf());
datafileSolrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
// datafileSolrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataFile.getOwner().getOwner().getName());
// datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, dataFile.getDataset().getTitle());
datafileSolrInputDocument.addField(SearchFields.PARENT_ID, fileMetadata.getDataFile().getOwner().getId());
datafileSolrInputDocument.addField(SearchFields.PARENT_IDENTIFIER, fileMetadata.getDataFile().getOwner().getGlobalId());
datafileSolrInputDocument.addField(SearchFields.PARENT_CITATION, fileMetadata.getDataFile().getOwner().getCitation());
datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, parentDatasetTitle);
// names and labels:
if (fileMetadata.getDataFile().isTabularData()) {
List<DataVariable> variables = fileMetadata.getDataFile().getDataTable().getDataVariables();
for (DataVariable var : variables) {
if (var.getName() != null && !var.getName().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName());
}
if (var.getLabel() != null && !var.getLabel().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_LABEL, var.getLabel());
}
}
// (not to be confused with the file categories, indexed above!)
for (DataFileTag tag : fileMetadata.getDataFile().getTags()) {
String tagLabel = tag.getTypeLabel();
datafileSolrInputDocument.addField(SearchFields.TABDATA_TAG, tagLabel);
}
}
if (indexableDataset.isFilesShouldBeIndexed()) {
filesIndexed.add(fileSolrDocId);
docs.add(datafileSolrInputDocument);
}
}
}
}
try {
solrServer.add(docs);
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
try {
solrServer.commit();
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
Long dsId = dataset.getId();
// /Dataset updatedDataset = (Dataset)dvObjectService.updateContentIndexTime(dataset);
// /updatedDataset = null;
// instead of making a call to dvObjectService, let's try and
// modify the index time stamp using the local EntityManager:
DvObject dvObjectToModify = em.find(DvObject.class, dsId);
dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime()));
dvObjectToModify = em.merge(dvObjectToModify);
dvObjectToModify = null;
// return "indexed dataset " + dataset.getId() + " as " + solrDocId + "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString();
return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
}
use of edu.harvard.iq.dataverse.DataFileTag in project dataverse by IQSS.
the class JsonPrinter method getTabularFileTags.
private static JsonArrayBuilder getTabularFileTags(DataFile df) {
if (df == null) {
return null;
}
List<DataFileTag> tags = df.getTags();
if (tags == null || tags.isEmpty()) {
return null;
}
JsonArrayBuilder tabularTags = Json.createArrayBuilder();
for (DataFileTag tag : tags) {
String label = tag.getTypeLabel();
if (label != null) {
tabularTags.add(label);
}
}
return tabularTags;
}
Aggregations