use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.
the class ImportServiceBean method doImport.
public JsonObjectBuilder doImport(DataverseRequest dataverseRequest, Dataverse owner, String xmlToParse, String fileName, ImportType importType, PrintWriter cleanupLog) throws ImportException, IOException {
String status = "";
Long createdId = null;
DatasetDTO dsDTO = null;
try {
dsDTO = importDDIService.doImport(importType, xmlToParse);
} catch (XMLStreamException e) {
throw new ImportException("XMLStreamException" + e);
}
// convert DTO to Json,
Gson gson = new GsonBuilder().setPrettyPrinting().create();
String json = gson.toJson(dsDTO);
JsonReader jsonReader = Json.createReader(new StringReader(json));
JsonObject obj = jsonReader.readObject();
// and call parse Json to read it into a dataset
try {
JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService);
parser.setLenient(!importType.equals(ImportType.NEW));
Dataset ds = parser.parseDataset(obj);
// we support, it will be rejected.
if (importType.equals(ImportType.NEW)) {
if (ds.getGlobalId() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) {
throw new ImportException("Could not register id " + ds.getGlobalId() + ", protocol not supported");
}
}
ds.setOwner(owner);
ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields());
// Check data against required contraints
List<ConstraintViolation<DatasetField>> violations = ds.getVersions().get(0).validateRequired();
if (!violations.isEmpty()) {
if (importType.equals(ImportType.MIGRATION) || importType.equals(ImportType.HARVEST)) {
// For migration and harvest, add NA for missing required values
for (ConstraintViolation<DatasetField> v : violations) {
DatasetField f = v.getRootBean();
f.setSingleValue(DatasetField.NA_VALUE);
}
} else {
// when importing a new dataset, the import will fail
// if required values are missing.
String errMsg = "Error importing data:";
for (ConstraintViolation<DatasetField> v : violations) {
errMsg += " " + v.getMessage();
}
throw new ImportException(errMsg);
}
}
// Check data against validation constraints
// If we are migrating and "scrub migration data" is true we attempt to fix invalid data
// if the fix fails stop processing of this file by throwing exception
Set<ConstraintViolation> invalidViolations = ds.getVersions().get(0).validate();
ValidatorFactory factory = Validation.buildDefaultValidatorFactory();
Validator validator = factory.getValidator();
if (!invalidViolations.isEmpty()) {
for (ConstraintViolation<DatasetFieldValue> v : invalidViolations) {
DatasetFieldValue f = v.getRootBean();
boolean fixed = false;
boolean converted = false;
if ((importType.equals(ImportType.MIGRATION) || importType.equals(ImportType.HARVEST)) && settingsService.isTrueForKey(SettingsServiceBean.Key.ScrubMigrationData, false)) {
fixed = processMigrationValidationError(f, cleanupLog, fileName);
converted = true;
if (fixed) {
Set<ConstraintViolation<DatasetFieldValue>> scrubbedViolations = validator.validate(f);
if (!scrubbedViolations.isEmpty()) {
fixed = false;
}
}
}
if (!fixed) {
if (importType.equals(ImportType.HARVEST)) {
String msg = "Data modified - File: " + fileName + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; " + "Invalid value: '" + f.getValue() + "'" + " Converted Value:'" + DatasetField.NA_VALUE + "'";
cleanupLog.println(msg);
f.setValue(DatasetField.NA_VALUE);
} else {
String msg = " Validation error for ";
if (converted) {
msg += "converted ";
}
msg += "value: " + f.getValue() + ", " + f.getValidationMessage();
throw new ImportException(msg);
}
}
}
}
Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId());
if (existingDs != null) {
if (importType.equals(ImportType.HARVEST)) {
// We will replace the current version with the imported version.
if (existingDs.getVersions().size() != 1) {
throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDs.getVersions().size() + " versions");
}
engineSvc.submit(new DestroyDatasetCommand(existingDs, dataverseRequest));
Dataset managedDs = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, importType));
status = " updated dataset, id=" + managedDs.getId() + ".";
} else {
// check that the version number isn't already in the dataset
for (DatasetVersion dsv : existingDs.getVersions()) {
if (dsv.getVersionNumber().equals(ds.getLatestVersion().getVersionNumber())) {
throw new ImportException("VersionNumber " + ds.getLatestVersion().getVersionNumber() + " already exists in dataset " + existingDs.getGlobalId());
}
}
DatasetVersion dsv = engineSvc.submit(new CreateDatasetVersionCommand(dataverseRequest, existingDs, ds.getVersions().get(0)));
status = " created datasetVersion, for dataset " + dsv.getDataset().getGlobalId();
createdId = dsv.getId();
}
} else {
Dataset managedDs = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, importType));
status = " created dataset, id=" + managedDs.getId() + ".";
createdId = managedDs.getId();
}
} catch (JsonParseException ex) {
logger.log(Level.INFO, "Error parsing datasetVersion: {0}", ex.getMessage());
throw new ImportException("Error parsing datasetVersion: " + ex.getMessage(), ex);
} catch (CommandException ex) {
logger.log(Level.INFO, "Error excuting Create dataset command: {0}", ex.getMessage());
throw new ImportException("Error excuting dataverse command: " + ex.getMessage(), ex);
}
return Json.createObjectBuilder().add("message", status);
}
use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.
the class JsonParserTest method latLonField.
DatasetField latLonField(String latLon, String value) {
DatasetField retVal = new DatasetField();
retVal.setDatasetFieldType(datasetFieldTypeSvc.findByName(latLon));
retVal.setDatasetFieldValues(Collections.singletonList(new DatasetFieldValue(retVal, value)));
return retVal;
}
use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.
the class JsonParserTest method assertFieldsEqual.
public boolean assertFieldsEqual(DatasetField ex, DatasetField act) {
if (ex == act)
return true;
if ((ex == null) ^ (act == null))
return false;
// type
if (!ex.getDatasetFieldType().equals(act.getDatasetFieldType()))
return false;
if (ex.getDatasetFieldType().isPrimitive()) {
List<DatasetFieldValue> exVals = ex.getDatasetFieldValues();
List<DatasetFieldValue> actVals = act.getDatasetFieldValues();
if (exVals.size() != actVals.size())
return false;
Iterator<DatasetFieldValue> exItr = exVals.iterator();
for (DatasetFieldValue actVal : actVals) {
DatasetFieldValue exVal = exItr.next();
if (!exVal.getValue().equals(actVal.getValue())) {
return false;
}
}
return true;
} else if (ex.getDatasetFieldType().isControlledVocabulary()) {
List<ControlledVocabularyValue> exVals = ex.getControlledVocabularyValues();
List<ControlledVocabularyValue> actVals = act.getControlledVocabularyValues();
if (exVals.size() != actVals.size())
return false;
Iterator<ControlledVocabularyValue> exItr = exVals.iterator();
for (ControlledVocabularyValue actVal : actVals) {
ControlledVocabularyValue exVal = exItr.next();
if (!exVal.getId().equals(actVal.getId())) {
return false;
}
}
return true;
} else if (ex.getDatasetFieldType().isCompound()) {
List<DatasetFieldCompoundValue> exVals = ex.getDatasetFieldCompoundValues();
List<DatasetFieldCompoundValue> actVals = act.getDatasetFieldCompoundValues();
if (exVals.size() != actVals.size())
return false;
Iterator<DatasetFieldCompoundValue> exItr = exVals.iterator();
for (DatasetFieldCompoundValue actVal : actVals) {
DatasetFieldCompoundValue exVal = exItr.next();
Iterator<DatasetField> exChildItr = exVal.getChildDatasetFields().iterator();
Iterator<DatasetField> actChildItr = actVal.getChildDatasetFields().iterator();
while (exChildItr.hasNext()) {
assertFieldsEqual(exChildItr.next(), actChildItr.next());
}
}
return true;
}
throw new IllegalArgumentException("Unknown dataset field type '" + ex.getDatasetFieldType() + "'");
}
use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.
the class JsonParserTest method testPrimitiveNoRepeatesFieldRoundTrip.
@Test
public void testPrimitiveNoRepeatesFieldRoundTrip() throws JsonParseException {
DatasetField expected = new DatasetField();
expected.setDatasetFieldType(datasetFieldTypeSvc.findByName("description"));
expected.setDatasetFieldValues(Collections.singletonList(new DatasetFieldValue(expected, "This is a description value")));
JsonObject json = JsonPrinter.json(expected);
DatasetField actual = sut.parseField(json);
assertFieldsEqual(actual, expected);
}
use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.
the class IngestServiceBean method processDatasetMetadata.
private void processDatasetMetadata(FileMetadataIngest fileMetadataIngest, DatasetVersion editVersion) throws IOException {
for (MetadataBlock mdb : editVersion.getDataset().getOwner().getMetadataBlocks()) {
if (mdb.getName().equals(fileMetadataIngest.getMetadataBlockName())) {
logger.fine("Ingest Service: dataset version has " + mdb.getName() + " metadata block enabled.");
editVersion.setDatasetFields(editVersion.initDatasetFields());
Map<String, Set<String>> fileMetadataMap = fileMetadataIngest.getMetadataMap();
for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) {
if (dsft.isPrimitive()) {
if (!dsft.isHasParent()) {
String dsfName = dsft.getName();
// See if the plugin has found anything for this field:
if (fileMetadataMap.get(dsfName) != null && !fileMetadataMap.get(dsfName).isEmpty()) {
logger.fine("Ingest Service: found extracted metadata for field " + dsfName);
// go through the existing fields:
for (DatasetField dsf : editVersion.getFlatDatasetFields()) {
if (dsf.getDatasetFieldType().equals(dsft)) {
// yep, this is our field!
// let's go through the values that the ingest
// plugin found in the file for this field:
Set<String> mValues = fileMetadataMap.get(dsfName);
// programmatically defined. -- L.A. 4.0
if (dsfName.equals("resolution.Temporal") || dsfName.equals("resolution.Spatial") || dsfName.equals("resolution.Spectral")) {
// For these values, we aggregate the minimum-maximum
// pair, for the entire set.
// So first, we need to go through the values found by
// the plugin and select the min. and max. values of
// these:
// (note that we are assuming that they all must
// validate as doubles!)
Double minValue = null;
Double maxValue = null;
for (String fValue : mValues) {
try {
double thisValue = Double.parseDouble(fValue);
if (minValue == null || Double.compare(thisValue, minValue) < 0) {
minValue = thisValue;
}
if (maxValue == null || Double.compare(thisValue, maxValue) > 0) {
maxValue = thisValue;
}
} catch (NumberFormatException e) {
}
}
// logger.fine("Min value: "+minValue+", Max value: "+maxValue);
if (minValue != null && maxValue != null) {
Double storedMinValue = null;
Double storedMaxValue = null;
String storedValue = "";
if (dsf.getDatasetFieldValues() != null && dsf.getDatasetFieldValues().get(0) != null) {
storedValue = dsf.getDatasetFieldValues().get(0).getValue();
if (storedValue != null && !storedValue.equals("")) {
try {
if (storedValue.indexOf(" - ") > -1) {
storedMinValue = Double.parseDouble(storedValue.substring(0, storedValue.indexOf(" - ")));
storedMaxValue = Double.parseDouble(storedValue.substring(storedValue.indexOf(" - ") + 3));
} else {
storedMinValue = Double.parseDouble(storedValue);
storedMaxValue = storedMinValue;
}
if (storedMinValue != null && storedMinValue.compareTo(minValue) < 0) {
minValue = storedMinValue;
}
if (storedMaxValue != null && storedMaxValue.compareTo(maxValue) > 0) {
maxValue = storedMaxValue;
}
} catch (NumberFormatException e) {
}
} else {
storedValue = "";
}
}
// logger.fine("Stored min value: "+storedMinValue+", Stored max value: "+storedMaxValue);
String newAggregateValue = "";
if (minValue.equals(maxValue)) {
newAggregateValue = minValue.toString();
} else {
newAggregateValue = minValue.toString() + " - " + maxValue.toString();
}
// finally, compare it to the value we have now:
if (!storedValue.equals(newAggregateValue)) {
if (dsf.getDatasetFieldValues() == null) {
dsf.setDatasetFieldValues(new ArrayList<DatasetFieldValue>());
}
if (dsf.getDatasetFieldValues().get(0) == null) {
DatasetFieldValue newDsfv = new DatasetFieldValue(dsf);
dsf.getDatasetFieldValues().add(newDsfv);
}
dsf.getDatasetFieldValues().get(0).setValue(newAggregateValue);
}
}
// Ouch.
} else {
for (String fValue : mValues) {
if (!dsft.isControlledVocabulary()) {
Iterator<DatasetFieldValue> dsfvIt = dsf.getDatasetFieldValues().iterator();
boolean valueExists = false;
while (dsfvIt.hasNext()) {
DatasetFieldValue dsfv = dsfvIt.next();
if (fValue.equals(dsfv.getValue())) {
logger.fine("Value " + fValue + " already exists for field " + dsfName);
valueExists = true;
break;
}
}
if (!valueExists) {
logger.fine("Creating a new value for field " + dsfName + ": " + fValue);
DatasetFieldValue newDsfv = new DatasetFieldValue(dsf);
newDsfv.setValue(fValue);
dsf.getDatasetFieldValues().add(newDsfv);
}
} else {
// A controlled vocabulary entry:
// first, let's see if it's a legit control vocab. entry:
ControlledVocabularyValue legitControlledVocabularyValue = null;
Collection<ControlledVocabularyValue> definedVocabularyValues = dsft.getControlledVocabularyValues();
if (definedVocabularyValues != null) {
for (ControlledVocabularyValue definedVocabValue : definedVocabularyValues) {
if (fValue.equals(definedVocabValue.getStrValue())) {
logger.fine("Yes, " + fValue + " is a valid controlled vocabulary value for the field " + dsfName);
legitControlledVocabularyValue = definedVocabValue;
break;
}
}
}
if (legitControlledVocabularyValue != null) {
// Only need to add the value if it is new,
// i.e. if it does not exist yet:
boolean valueExists = false;
List<ControlledVocabularyValue> existingControlledVocabValues = dsf.getControlledVocabularyValues();
if (existingControlledVocabValues != null) {
Iterator<ControlledVocabularyValue> cvvIt = existingControlledVocabValues.iterator();
while (cvvIt.hasNext()) {
ControlledVocabularyValue cvv = cvvIt.next();
if (fValue.equals(cvv.getStrValue())) {
// or should I use if (legitControlledVocabularyValue.equals(cvv)) ?
logger.fine("Controlled vocab. value " + fValue + " already exists for field " + dsfName);
valueExists = true;
break;
}
}
}
if (!valueExists) {
logger.fine("Adding controlled vocabulary value " + fValue + " to field " + dsfName);
dsf.getControlledVocabularyValues().add(legitControlledVocabularyValue);
}
}
}
}
}
}
}
}
}
} else {
// A compound field:
// See if the plugin has found anything for the fields that
// make up this compound field; if we find at least one
// of the child values in the map of extracted values, we'll
// create a new compound field value and its child
//
DatasetFieldCompoundValue compoundDsfv = new DatasetFieldCompoundValue();
int nonEmptyFields = 0;
for (DatasetFieldType cdsft : dsft.getChildDatasetFieldTypes()) {
String dsfName = cdsft.getName();
if (fileMetadataMap.get(dsfName) != null && !fileMetadataMap.get(dsfName).isEmpty()) {
logger.fine("Ingest Service: found extracted metadata for field " + dsfName + ", part of the compound field " + dsft.getName());
if (cdsft.isPrimitive()) {
// but maybe it'll change in the future.
if (!cdsft.isControlledVocabulary()) {
// TODO: can we have controlled vocabulary
// sub-fields inside compound fields?
DatasetField childDsf = new DatasetField();
childDsf.setDatasetFieldType(cdsft);
DatasetFieldValue newDsfv = new DatasetFieldValue(childDsf);
newDsfv.setValue((String) fileMetadataMap.get(dsfName).toArray()[0]);
childDsf.getDatasetFieldValues().add(newDsfv);
childDsf.setParentDatasetFieldCompoundValue(compoundDsfv);
compoundDsfv.getChildDatasetFields().add(childDsf);
nonEmptyFields++;
}
}
}
}
if (nonEmptyFields > 0) {
// actual parent for this sub-field:
for (DatasetField dsf : editVersion.getFlatDatasetFields()) {
if (dsf.getDatasetFieldType().equals(dsft)) {
// Now let's check that the dataset version doesn't already have
// this compound value - we are only interested in aggregating
// unique values. Note that we need to compare compound values
// as sets! -- i.e. all the sub fields in 2 compound fields
// must match in order for these 2 compounds to be recognized
// as "the same":
boolean alreadyExists = false;
for (DatasetFieldCompoundValue dsfcv : dsf.getDatasetFieldCompoundValues()) {
int matches = 0;
for (DatasetField cdsf : dsfcv.getChildDatasetFields()) {
String cdsfName = cdsf.getDatasetFieldType().getName();
String cdsfValue = cdsf.getDatasetFieldValues().get(0).getValue();
if (cdsfValue != null && !cdsfValue.equals("")) {
String extractedValue = (String) fileMetadataMap.get(cdsfName).toArray()[0];
logger.fine("values: existing: " + cdsfValue + ", extracted: " + extractedValue);
if (cdsfValue.equals(extractedValue)) {
matches++;
}
}
}
if (matches == nonEmptyFields) {
alreadyExists = true;
break;
}
}
if (!alreadyExists) {
// save this compound value, by attaching it to the
// version for proper cascading:
compoundDsfv.setParentDatasetField(dsf);
dsf.getDatasetFieldCompoundValues().add(compoundDsfv);
}
}
}
}
}
}
}
}
}
Aggregations