use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.
the class ImportServiceBean method doImportHarvestedDataset.
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String metadataFormat, File metadataFile, PrintWriter cleanupLog) throws ImportException, IOException {
if (harvestingClient == null || harvestingClient.getDataverse() == null) {
throw new ImportException("importHarvestedDataset called wiht a null harvestingClient, or an invalid harvestingClient.");
}
Dataverse owner = harvestingClient.getDataverse();
Dataset importedDataset = null;
DatasetDTO dsDTO = null;
String json = null;
if ("ddi".equalsIgnoreCase(metadataFormat) || "oai_ddi".equals(metadataFormat) || metadataFormat.toLowerCase().matches("^oai_ddi.*")) {
try {
String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath()));
// TODO:
// import type should be configurable - it should be possible to
// select whether you want to harvest with or without files,
// ImportType.HARVEST vs. ImportType.HARVEST_WITH_FILES
logger.fine("importing DDI " + metadataFile.getAbsolutePath());
dsDTO = importDDIService.doImport(ImportType.HARVEST_WITH_FILES, xmlToParse);
} catch (IOException | XMLStreamException | ImportException e) {
throw new ImportException("Failed to process DDI XML record: " + e.getClass() + " (" + e.getMessage() + ")");
}
} else if ("dc".equalsIgnoreCase(metadataFormat) || "oai_dc".equals(metadataFormat)) {
logger.fine("importing DC " + metadataFile.getAbsolutePath());
try {
String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath()));
dsDTO = importGenericService.processOAIDCxml(xmlToParse);
} catch (IOException | XMLStreamException e) {
throw new ImportException("Failed to process Dublin Core XML record: " + e.getClass() + " (" + e.getMessage() + ")");
}
} else if ("dataverse_json".equals(metadataFormat)) {
// This is Dataverse metadata already formatted in JSON.
// Simply read it into a string, and pass to the final import further down:
logger.fine("Attempting to import custom dataverse metadata from file " + metadataFile.getAbsolutePath());
json = new String(Files.readAllBytes(metadataFile.toPath()));
} else {
throw new ImportException("Unsupported import metadata format: " + metadataFormat);
}
if (json == null) {
if (dsDTO != null) {
// convert DTO to Json,
Gson gson = new GsonBuilder().setPrettyPrinting().create();
json = gson.toJson(dsDTO);
logger.fine("JSON produced for the metadata harvested: " + json);
} else {
throw new ImportException("Failed to transform XML metadata format " + metadataFormat + " into a DatasetDTO");
}
}
JsonReader jsonReader = Json.createReader(new StringReader(json));
JsonObject obj = jsonReader.readObject();
// and call parse Json to read it into a dataset
try {
JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService);
parser.setLenient(true);
Dataset ds = parser.parseDataset(obj);
// For ImportType.NEW, if the metadata contains a global identifier, and it's not a protocol
// we support, it should be rejected.
// (TODO: ! - add some way of keeping track of supported protocols!)
// if (ds.getGlobalId() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) {
// throw new ImportException("Could not register id " + ds.getGlobalId() + ", protocol not supported");
// }
ds.setOwner(owner);
ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields());
// Check data against required contraints
List<ConstraintViolation<DatasetField>> violations = ds.getVersions().get(0).validateRequired();
if (!violations.isEmpty()) {
// For migration and harvest, add NA for missing required values
for (ConstraintViolation<DatasetField> v : violations) {
DatasetField f = v.getRootBean();
f.setSingleValue(DatasetField.NA_VALUE);
}
}
// Check data against validation constraints
// If we are migrating and "scrub migration data" is true we attempt to fix invalid data
// if the fix fails stop processing of this file by throwing exception
Set<ConstraintViolation> invalidViolations = ds.getVersions().get(0).validate();
ValidatorFactory factory = Validation.buildDefaultValidatorFactory();
Validator validator = factory.getValidator();
if (!invalidViolations.isEmpty()) {
for (ConstraintViolation<DatasetFieldValue> v : invalidViolations) {
DatasetFieldValue f = v.getRootBean();
boolean fixed = false;
boolean converted = false;
// TODO: Is this scrubbing something we want to continue doing?
if (settingsService.isTrueForKey(SettingsServiceBean.Key.ScrubMigrationData, false)) {
fixed = processMigrationValidationError(f, cleanupLog, metadataFile.getName());
converted = true;
if (fixed) {
Set<ConstraintViolation<DatasetFieldValue>> scrubbedViolations = validator.validate(f);
if (!scrubbedViolations.isEmpty()) {
fixed = false;
}
}
}
if (!fixed) {
String msg = "Data modified - File: " + metadataFile.getName() + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; " + "Invalid value: '" + f.getValue() + "'" + " Converted Value:'" + DatasetField.NA_VALUE + "'";
cleanupLog.println(msg);
f.setValue(DatasetField.NA_VALUE);
}
}
}
// this dataset:
if (StringUtils.isEmpty(ds.getGlobalId())) {
throw new ImportException("The harvested metadata record with the OAI server identifier " + harvestIdentifier + " does not contain a global unique identifier that we could recognize, skipping.");
}
ds.setHarvestedFrom(harvestingClient);
ds.setHarvestIdentifier(harvestIdentifier);
Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId());
if (existingDs != null) {
// we are just going to skip it!
if (existingDs.getOwner() != null && !owner.getId().equals(existingDs.getOwner().getId())) {
throw new ImportException("The dataset with the global id " + ds.getGlobalId() + " already exists, in the dataverse " + existingDs.getOwner().getAlias() + ", skipping.");
}
// skip it also:
if (!existingDs.isHarvested()) {
throw new ImportException("A LOCAL dataset with the global id " + ds.getGlobalId() + " already exists in this dataverse; skipping.");
}
// We will replace the current version with the imported version.
if (existingDs.getVersions().size() != 1) {
throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDs.getVersions().size() + " versions");
}
// Purge all the SOLR documents associated with this client from the
// index server:
indexService.deleteHarvestedDocuments(existingDs);
// DeleteFileCommand on them.
for (DataFile harvestedFile : existingDs.getFiles()) {
DataFile merged = em.merge(harvestedFile);
em.remove(merged);
harvestedFile = null;
}
// TODO:
// Verify what happens with the indexed files in SOLR?
// are they going to be overwritten by the reindexing of the dataset?
existingDs.setFiles(null);
Dataset merged = em.merge(existingDs);
engineSvc.submit(new DestroyDatasetCommand(merged, dataverseRequest));
importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST));
} else {
importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST));
}
} catch (JsonParseException | ImportException | CommandException ex) {
logger.fine("Failed to import harvested dataset: " + ex.getClass() + ": " + ex.getMessage());
FileOutputStream savedJsonFileStream = new FileOutputStream(new File(metadataFile.getAbsolutePath() + ".json"));
byte[] jsonBytes = json.getBytes();
int i = 0;
while (i < jsonBytes.length) {
int chunkSize = i + 8192 <= jsonBytes.length ? 8192 : jsonBytes.length - i;
savedJsonFileStream.write(jsonBytes, i, chunkSize);
i += chunkSize;
savedJsonFileStream.flush();
}
savedJsonFileStream.close();
logger.info("JSON produced saved in " + metadataFile.getAbsolutePath() + ".json");
throw new ImportException("Failed to import harvested dataset: " + ex.getClass() + " (" + ex.getMessage() + ")", ex);
}
return importedDataset;
}
use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.
the class JsonPrinterTest method constructPrimitive.
DatasetField constructPrimitive(String datasetFieldTypeName, String value) {
DatasetField retVal = new DatasetField();
retVal.setDatasetFieldType(datasetFieldTypeSvc.findByName(datasetFieldTypeName));
retVal.setDatasetFieldValues(Collections.singletonList(new DatasetFieldValue(retVal, value)));
return retVal;
}
use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.
the class ForeignMetadataImportServiceBean method createDatasetFieldValue.
private DatasetFieldCompoundValue createDatasetFieldValue(DatasetFieldType dsft, DatasetFieldCompoundValue savedCompoundValue, String elementText, DatasetVersion datasetVersion) {
if (dsft.isPrimitive()) {
if (!dsft.isHasParent()) {
// simple primitive:
DatasetField dsf = null;
for (DatasetField existingDsf : datasetVersion.getFlatDatasetFields()) {
if (existingDsf.getDatasetFieldType().equals(dsft)) {
dsf = existingDsf;
}
}
// if doesn't exist, create a new one:
if (dsf == null) {
dsf = new DatasetField();
dsf.setDatasetFieldType(dsft);
datasetVersion.getDatasetFields().add(dsf);
dsf.setDatasetVersion(datasetVersion);
}
String dsfName = dsft.getName();
if (!dsft.isControlledVocabulary()) {
logger.fine("Creating a new value for field " + dsfName + ": " + elementText);
DatasetFieldValue newDsfv = new DatasetFieldValue(dsf);
newDsfv.setValue(elementText);
dsf.getDatasetFieldValues().add(newDsfv);
} else {
// A controlled vocabulary entry:
// first, let's see if it's a legit control vocab. entry:
/* not supported yet; though I expect the commented-out code
below to work;
ControlledVocabularyValue legitControlledVocabularyValue = null;
Collection<ControlledVocabularyValue> definedVocabularyValues = dsft.getControlledVocabularyValues();
if (definedVocabularyValues != null) {
for (ControlledVocabularyValue definedVocabValue : definedVocabularyValues) {
if (elementText.equals(definedVocabValue.getStrValue())) {
logger.fine("Yes, " + elementText + " is a valid controlled vocabulary value for the field " + dsfName);
legitControlledVocabularyValue = definedVocabValue;
break;
}
}
}
if (legitControlledVocabularyValue != null) {
logger.fine("Adding controlled vocabulary value " + elementText + " to field " + dsfName);
dsf.getControlledVocabularyValues().add(legitControlledVocabularyValue);
}
*/
}
// No compound values had to be created; returning null:
return null;
} else {
// a primitive that is part of a compound value:
// first, let's create the field and the value, for the
// primitive node itself:
DatasetField childField = new DatasetField();
childField.setDatasetFieldType(dsft);
DatasetFieldValue childValue = new DatasetFieldValue(childField);
childValue.setValue(elementText);
childField.getDatasetFieldValues().add(childValue);
// see if a compound value of the right type has already been
// created and passed to us:
DatasetFieldCompoundValue parentCompoundValue = null;
DatasetFieldType parentFieldType = dsft.getParentDatasetFieldType();
if (parentFieldType == null) {
logger.severe("Child field type with no parent field type defined!");
// could just skip this field and try to continue - ?
return null;
}
if (savedCompoundValue != null) {
if (parentFieldType.equals(savedCompoundValue.getParentDatasetField().getDatasetFieldType())) {
parentCompoundValue = savedCompoundValue;
}
}
if (parentCompoundValue == null) {
// and to do that, we need to find or create the "parent"
// dataset field for this compoound value:
// (I put quotes around "parent", because I really feel it
// is a misnomer, and that the relationship between the compound value
// and the corresponding dataset field should be called
// "CompoundDatasetField", not "ParentDatasetField") (discuss?)
DatasetField parentField = null;
for (DatasetField existingDsf : datasetVersion.getFlatDatasetFields()) {
if (existingDsf.getDatasetFieldType().equals(parentFieldType)) {
parentField = existingDsf;
}
}
// if doesn't exist, create a new one:
if (parentField == null) {
parentField = new DatasetField();
parentField.setDatasetFieldType(parentFieldType);
datasetVersion.getDatasetFields().add(parentField);
parentField.setDatasetVersion(datasetVersion);
}
// and then create new compound value:
parentCompoundValue = new DatasetFieldCompoundValue();
parentCompoundValue.setParentDatasetField(parentField);
parentField.getDatasetFieldCompoundValues().add(parentCompoundValue);
}
childField.setParentDatasetFieldCompoundValue(parentCompoundValue);
parentCompoundValue.getChildDatasetFields().add(childField);
return parentCompoundValue;
}
}
return null;
}
use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.
the class JsonParserTest method testPrimitiveRepeatesFieldRoundTrip.
@Test
public void testPrimitiveRepeatesFieldRoundTrip() throws JsonParseException {
DatasetField expected = new DatasetField();
expected.setDatasetFieldType(datasetFieldTypeSvc.findByName("keyword"));
expected.setDatasetFieldValues(Arrays.asList(new DatasetFieldValue(expected, "kw1"), new DatasetFieldValue(expected, "kw2"), new DatasetFieldValue(expected, "kw3")));
JsonObject json = JsonPrinter.json(expected);
DatasetField actual = sut.parseField(json);
assertFieldsEqual(actual, expected);
}
use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.
the class JsonParser method parseField.
public DatasetField parseField(JsonObject json) throws JsonParseException {
if (json == null) {
return null;
}
DatasetField ret = new DatasetField();
DatasetFieldType type = datasetFieldSvc.findByNameOpt(json.getString("typeName", ""));
if (type == null) {
throw new JsonParseException("Can't find type '" + json.getString("typeName", "") + "'");
}
if (type.isAllowMultiples() != json.getBoolean("multiple")) {
throw new JsonParseException("incorrect multiple for field " + json.getString("typeName", ""));
}
if (type.isCompound() && !json.getString("typeClass").equals("compound")) {
throw new JsonParseException("incorrect typeClass for field " + json.getString("typeName", "") + ", should be compound.");
}
if (!type.isControlledVocabulary() && type.isPrimitive() && !json.getString("typeClass").equals("primitive")) {
throw new JsonParseException("incorrect typeClass for field: " + json.getString("typeName", "") + ", should be primitive");
}
if (type.isControlledVocabulary() && !json.getString("typeClass").equals("controlledVocabulary")) {
throw new JsonParseException("incorrect typeClass for field " + json.getString("typeName", "") + ", should be controlledVocabulary");
}
ret.setDatasetFieldType(type);
if (type.isCompound()) {
List<DatasetFieldCompoundValue> vals = parseCompoundValue(type, json);
for (DatasetFieldCompoundValue dsfcv : vals) {
dsfcv.setParentDatasetField(ret);
}
ret.setDatasetFieldCompoundValues(vals);
} else if (type.isControlledVocabulary()) {
List<ControlledVocabularyValue> vals = parseControlledVocabularyValue(type, json);
for (ControlledVocabularyValue cvv : vals) {
cvv.setDatasetFieldType(type);
}
ret.setControlledVocabularyValues(vals);
} else {
// primitive
List<DatasetFieldValue> values = parsePrimitiveValue(json);
for (DatasetFieldValue val : values) {
val.setDatasetField(ret);
}
ret.setDatasetFieldValues(values);
}
return ret;
}
Aggregations