use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.
the class JsonParserTest method assertFieldsEqual.
public boolean assertFieldsEqual(DatasetField ex, DatasetField act) {
if (ex == act)
return true;
if ((ex == null) ^ (act == null))
return false;
// type
if (!ex.getDatasetFieldType().equals(act.getDatasetFieldType()))
return false;
if (ex.getDatasetFieldType().isPrimitive()) {
List<DatasetFieldValue> exVals = ex.getDatasetFieldValues();
List<DatasetFieldValue> actVals = act.getDatasetFieldValues();
if (exVals.size() != actVals.size())
return false;
Iterator<DatasetFieldValue> exItr = exVals.iterator();
for (DatasetFieldValue actVal : actVals) {
DatasetFieldValue exVal = exItr.next();
if (!exVal.getValue().equals(actVal.getValue())) {
return false;
}
}
return true;
} else if (ex.getDatasetFieldType().isControlledVocabulary()) {
List<ControlledVocabularyValue> exVals = ex.getControlledVocabularyValues();
List<ControlledVocabularyValue> actVals = act.getControlledVocabularyValues();
if (exVals.size() != actVals.size())
return false;
Iterator<ControlledVocabularyValue> exItr = exVals.iterator();
for (ControlledVocabularyValue actVal : actVals) {
ControlledVocabularyValue exVal = exItr.next();
if (!exVal.getId().equals(actVal.getId())) {
return false;
}
}
return true;
} else if (ex.getDatasetFieldType().isCompound()) {
List<DatasetFieldCompoundValue> exVals = ex.getDatasetFieldCompoundValues();
List<DatasetFieldCompoundValue> actVals = act.getDatasetFieldCompoundValues();
if (exVals.size() != actVals.size())
return false;
Iterator<DatasetFieldCompoundValue> exItr = exVals.iterator();
for (DatasetFieldCompoundValue actVal : actVals) {
DatasetFieldCompoundValue exVal = exItr.next();
Iterator<DatasetField> exChildItr = exVal.getChildDatasetFields().iterator();
Iterator<DatasetField> actChildItr = actVal.getChildDatasetFields().iterator();
while (exChildItr.hasNext()) {
assertFieldsEqual(exChildItr.next(), actChildItr.next());
}
}
return true;
}
throw new IllegalArgumentException("Unknown dataset field type '" + ex.getDatasetFieldType() + "'");
}
use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.
the class JsonParser method parseControlledVocabularyValue.
public List<ControlledVocabularyValue> parseControlledVocabularyValue(DatasetFieldType cvvType, JsonObject json) throws JsonParseException {
if (json.getBoolean("multiple")) {
List<ControlledVocabularyValue> vals = new LinkedList<>();
for (JsonString strVal : json.getJsonArray("value").getValuesAs(JsonString.class)) {
String strValue = strVal.getString();
ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue, lenient);
if (cvv == null) {
throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue);
}
// Only add value to the list if it is not a duplicate
if (strValue.equals("Other")) {
System.out.println("vals = " + vals + ", contains: " + vals.contains(cvv));
}
if (!vals.contains(cvv)) {
vals.add(cvv);
}
}
return vals;
} else {
String strValue = json.getString("value", "");
ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue, lenient);
if (cvv == null) {
throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue);
}
return Collections.singletonList(cvv);
}
}
use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.
the class JsonParser method convertKeywordsToSubjects.
/**
* Special processing of keywords and subjects. All keywords and subjects will be input
* from foreign formats (DDI, dcterms, etc) as keywords.
* As part of the parsing, we will move keywords that match subject controlled vocabulary values
* into the subjects datasetField.
* @param fields - the parsed datasetFields
*/
public void convertKeywordsToSubjects(List<DatasetField> fields) {
DatasetField keywordField = null;
for (DatasetField field : fields) {
if (field.getDatasetFieldType().getName().equals("keyword")) {
keywordField = field;
break;
}
}
if (keywordField == null) {
// nothing to do.
return;
}
DatasetFieldType type = datasetFieldSvc.findByNameOpt(DatasetFieldConstant.subject);
// new list to hold subjects that we find
List<ControlledVocabularyValue> subjects = new ArrayList<>();
// Make new list to hold the non-subject keywords
List<DatasetFieldCompoundValue> filteredValues = new ArrayList<>();
for (DatasetFieldCompoundValue compoundVal : keywordField.getDatasetFieldCompoundValues()) {
// Loop through the child fields to find the "keywordValue" field
for (DatasetField childField : compoundVal.getChildDatasetFields()) {
if (childField.getDatasetFieldType().getName().equals(DatasetFieldConstant.keywordValue)) {
// check if this value is a subject
ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(type, childField.getValue(), lenient);
if (cvv == null) {
// the keyword was not found in the subject list, so retain it in filtered list
filteredValues.add(compoundVal);
} else {
// save the value for our subject field
if (!subjects.contains(cvv)) {
subjects.add(cvv);
}
}
}
}
}
// if we have found any subjects in the keyword list, then update the keyword and subject fields appropriately.
if (subjects.size() > 0) {
keywordField.setDatasetFieldCompoundValues(filteredValues);
DatasetField subjectField = new DatasetField();
subjectField.setDatasetFieldType(type);
for (ControlledVocabularyValue val : subjects) {
int order = 0;
val.setDisplayOrder(order);
val.setDatasetFieldType(type);
order++;
}
subjectField.setControlledVocabularyValues(subjects);
fields.add(subjectField);
}
}
use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.
the class IngestServiceBean method processDatasetMetadata.
private void processDatasetMetadata(FileMetadataIngest fileMetadataIngest, DatasetVersion editVersion) throws IOException {
for (MetadataBlock mdb : editVersion.getDataset().getOwner().getMetadataBlocks()) {
if (mdb.getName().equals(fileMetadataIngest.getMetadataBlockName())) {
logger.fine("Ingest Service: dataset version has " + mdb.getName() + " metadata block enabled.");
editVersion.setDatasetFields(editVersion.initDatasetFields());
Map<String, Set<String>> fileMetadataMap = fileMetadataIngest.getMetadataMap();
for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) {
if (dsft.isPrimitive()) {
if (!dsft.isHasParent()) {
String dsfName = dsft.getName();
// See if the plugin has found anything for this field:
if (fileMetadataMap.get(dsfName) != null && !fileMetadataMap.get(dsfName).isEmpty()) {
logger.fine("Ingest Service: found extracted metadata for field " + dsfName);
// go through the existing fields:
for (DatasetField dsf : editVersion.getFlatDatasetFields()) {
if (dsf.getDatasetFieldType().equals(dsft)) {
// yep, this is our field!
// let's go through the values that the ingest
// plugin found in the file for this field:
Set<String> mValues = fileMetadataMap.get(dsfName);
// programmatically defined. -- L.A. 4.0
if (dsfName.equals("resolution.Temporal") || dsfName.equals("resolution.Spatial") || dsfName.equals("resolution.Spectral")) {
// For these values, we aggregate the minimum-maximum
// pair, for the entire set.
// So first, we need to go through the values found by
// the plugin and select the min. and max. values of
// these:
// (note that we are assuming that they all must
// validate as doubles!)
Double minValue = null;
Double maxValue = null;
for (String fValue : mValues) {
try {
double thisValue = Double.parseDouble(fValue);
if (minValue == null || Double.compare(thisValue, minValue) < 0) {
minValue = thisValue;
}
if (maxValue == null || Double.compare(thisValue, maxValue) > 0) {
maxValue = thisValue;
}
} catch (NumberFormatException e) {
}
}
// logger.fine("Min value: "+minValue+", Max value: "+maxValue);
if (minValue != null && maxValue != null) {
Double storedMinValue = null;
Double storedMaxValue = null;
String storedValue = "";
if (dsf.getDatasetFieldValues() != null && dsf.getDatasetFieldValues().get(0) != null) {
storedValue = dsf.getDatasetFieldValues().get(0).getValue();
if (storedValue != null && !storedValue.equals("")) {
try {
if (storedValue.indexOf(" - ") > -1) {
storedMinValue = Double.parseDouble(storedValue.substring(0, storedValue.indexOf(" - ")));
storedMaxValue = Double.parseDouble(storedValue.substring(storedValue.indexOf(" - ") + 3));
} else {
storedMinValue = Double.parseDouble(storedValue);
storedMaxValue = storedMinValue;
}
if (storedMinValue != null && storedMinValue.compareTo(minValue) < 0) {
minValue = storedMinValue;
}
if (storedMaxValue != null && storedMaxValue.compareTo(maxValue) > 0) {
maxValue = storedMaxValue;
}
} catch (NumberFormatException e) {
}
} else {
storedValue = "";
}
}
// logger.fine("Stored min value: "+storedMinValue+", Stored max value: "+storedMaxValue);
String newAggregateValue = "";
if (minValue.equals(maxValue)) {
newAggregateValue = minValue.toString();
} else {
newAggregateValue = minValue.toString() + " - " + maxValue.toString();
}
// finally, compare it to the value we have now:
if (!storedValue.equals(newAggregateValue)) {
if (dsf.getDatasetFieldValues() == null) {
dsf.setDatasetFieldValues(new ArrayList<DatasetFieldValue>());
}
if (dsf.getDatasetFieldValues().get(0) == null) {
DatasetFieldValue newDsfv = new DatasetFieldValue(dsf);
dsf.getDatasetFieldValues().add(newDsfv);
}
dsf.getDatasetFieldValues().get(0).setValue(newAggregateValue);
}
}
// Ouch.
} else {
for (String fValue : mValues) {
if (!dsft.isControlledVocabulary()) {
Iterator<DatasetFieldValue> dsfvIt = dsf.getDatasetFieldValues().iterator();
boolean valueExists = false;
while (dsfvIt.hasNext()) {
DatasetFieldValue dsfv = dsfvIt.next();
if (fValue.equals(dsfv.getValue())) {
logger.fine("Value " + fValue + " already exists for field " + dsfName);
valueExists = true;
break;
}
}
if (!valueExists) {
logger.fine("Creating a new value for field " + dsfName + ": " + fValue);
DatasetFieldValue newDsfv = new DatasetFieldValue(dsf);
newDsfv.setValue(fValue);
dsf.getDatasetFieldValues().add(newDsfv);
}
} else {
// A controlled vocabulary entry:
// first, let's see if it's a legit control vocab. entry:
ControlledVocabularyValue legitControlledVocabularyValue = null;
Collection<ControlledVocabularyValue> definedVocabularyValues = dsft.getControlledVocabularyValues();
if (definedVocabularyValues != null) {
for (ControlledVocabularyValue definedVocabValue : definedVocabularyValues) {
if (fValue.equals(definedVocabValue.getStrValue())) {
logger.fine("Yes, " + fValue + " is a valid controlled vocabulary value for the field " + dsfName);
legitControlledVocabularyValue = definedVocabValue;
break;
}
}
}
if (legitControlledVocabularyValue != null) {
// Only need to add the value if it is new,
// i.e. if it does not exist yet:
boolean valueExists = false;
List<ControlledVocabularyValue> existingControlledVocabValues = dsf.getControlledVocabularyValues();
if (existingControlledVocabValues != null) {
Iterator<ControlledVocabularyValue> cvvIt = existingControlledVocabValues.iterator();
while (cvvIt.hasNext()) {
ControlledVocabularyValue cvv = cvvIt.next();
if (fValue.equals(cvv.getStrValue())) {
// or should I use if (legitControlledVocabularyValue.equals(cvv)) ?
logger.fine("Controlled vocab. value " + fValue + " already exists for field " + dsfName);
valueExists = true;
break;
}
}
}
if (!valueExists) {
logger.fine("Adding controlled vocabulary value " + fValue + " to field " + dsfName);
dsf.getControlledVocabularyValues().add(legitControlledVocabularyValue);
}
}
}
}
}
}
}
}
}
} else {
// A compound field:
// See if the plugin has found anything for the fields that
// make up this compound field; if we find at least one
// of the child values in the map of extracted values, we'll
// create a new compound field value and its child
//
DatasetFieldCompoundValue compoundDsfv = new DatasetFieldCompoundValue();
int nonEmptyFields = 0;
for (DatasetFieldType cdsft : dsft.getChildDatasetFieldTypes()) {
String dsfName = cdsft.getName();
if (fileMetadataMap.get(dsfName) != null && !fileMetadataMap.get(dsfName).isEmpty()) {
logger.fine("Ingest Service: found extracted metadata for field " + dsfName + ", part of the compound field " + dsft.getName());
if (cdsft.isPrimitive()) {
// but maybe it'll change in the future.
if (!cdsft.isControlledVocabulary()) {
// TODO: can we have controlled vocabulary
// sub-fields inside compound fields?
DatasetField childDsf = new DatasetField();
childDsf.setDatasetFieldType(cdsft);
DatasetFieldValue newDsfv = new DatasetFieldValue(childDsf);
newDsfv.setValue((String) fileMetadataMap.get(dsfName).toArray()[0]);
childDsf.getDatasetFieldValues().add(newDsfv);
childDsf.setParentDatasetFieldCompoundValue(compoundDsfv);
compoundDsfv.getChildDatasetFields().add(childDsf);
nonEmptyFields++;
}
}
}
}
if (nonEmptyFields > 0) {
// actual parent for this sub-field:
for (DatasetField dsf : editVersion.getFlatDatasetFields()) {
if (dsf.getDatasetFieldType().equals(dsft)) {
// Now let's check that the dataset version doesn't already have
// this compound value - we are only interested in aggregating
// unique values. Note that we need to compare compound values
// as sets! -- i.e. all the sub fields in 2 compound fields
// must match in order for these 2 compounds to be recognized
// as "the same":
boolean alreadyExists = false;
for (DatasetFieldCompoundValue dsfcv : dsf.getDatasetFieldCompoundValues()) {
int matches = 0;
for (DatasetField cdsf : dsfcv.getChildDatasetFields()) {
String cdsfName = cdsf.getDatasetFieldType().getName();
String cdsfValue = cdsf.getDatasetFieldValues().get(0).getValue();
if (cdsfValue != null && !cdsfValue.equals("")) {
String extractedValue = (String) fileMetadataMap.get(cdsfName).toArray()[0];
logger.fine("values: existing: " + cdsfValue + ", extracted: " + extractedValue);
if (cdsfValue.equals(extractedValue)) {
matches++;
}
}
}
if (matches == nonEmptyFields) {
alreadyExists = true;
break;
}
}
if (!alreadyExists) {
// save this compound value, by attaching it to the
// version for proper cascading:
compoundDsfv.setParentDatasetField(dsf);
dsf.getDatasetFieldCompoundValues().add(compoundDsfv);
}
}
}
}
}
}
}
}
}
use of edu.harvard.iq.dataverse.ControlledVocabularyValue in project dataverse by IQSS.
the class SwordServiceBean method addDatasetSubjectIfMissing.
/**
* If no subject exists, mutate the dataset version, adding "N/A" for the
* subject. Otherwise, leave the dataset alone.
*/
public void addDatasetSubjectIfMissing(DatasetVersion datasetVersion) {
DatasetFieldType subjectDatasetFieldType = datasetFieldService.findByNameOpt(DatasetFieldConstant.subject);
boolean subjectFieldExists = false;
List<DatasetField> datasetFields = datasetVersion.getDatasetFields();
for (DatasetField datasetField : datasetFields) {
logger.fine("datasetField: " + datasetField.getDisplayValue() + " ... " + datasetField.getDatasetFieldType().getName());
if (datasetField.getDatasetFieldType().getName().equals(subjectDatasetFieldType.getName())) {
subjectFieldExists = true;
logger.fine("subject field exists already");
break;
}
}
if (subjectFieldExists) {
// return early. nothing to do. dataset already has a subject
logger.fine("returning early because subject exists already");
return;
}
// if we made it here, we must not have a subject, so let's add one
DatasetField subjectDatasetField = DatasetField.createNewEmptyDatasetField(subjectDatasetFieldType, datasetVersion);
/**
* @todo Once dataverse has subject
* (https://github.com/IQSS/dataverse/issues/769), we should get subject
* from there for now, we'll use the global NA value. However, there is
* currently oddness in that if you go to edit the title of a dataset
* via the GUI you can not save the dataset without selecting a Subject:
* https://github.com/IQSS/dataverse/issues/1296#issuecomment-70146314
*/
ControlledVocabularyValue cvv = datasetFieldService.findNAControlledVocabularyValue();
subjectDatasetField.setSingleControlledVocabularyValue(cvv);
datasetVersion.getDatasetFields().add(subjectDatasetField);
}
Aggregations