Search in sources :

Example 6 with DatasetFieldValue

use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.

the class ImportServiceBean method doImportHarvestedDataset.

@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String metadataFormat, File metadataFile, PrintWriter cleanupLog) throws ImportException, IOException {
    if (harvestingClient == null || harvestingClient.getDataverse() == null) {
        throw new ImportException("importHarvestedDataset called wiht a null harvestingClient, or an invalid harvestingClient.");
    }
    Dataverse owner = harvestingClient.getDataverse();
    Dataset importedDataset = null;
    DatasetDTO dsDTO = null;
    String json = null;
    if ("ddi".equalsIgnoreCase(metadataFormat) || "oai_ddi".equals(metadataFormat) || metadataFormat.toLowerCase().matches("^oai_ddi.*")) {
        try {
            String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath()));
            // TODO:
            // import type should be configurable - it should be possible to
            // select whether you want to harvest with or without files,
            // ImportType.HARVEST vs. ImportType.HARVEST_WITH_FILES
            logger.fine("importing DDI " + metadataFile.getAbsolutePath());
            dsDTO = importDDIService.doImport(ImportType.HARVEST_WITH_FILES, xmlToParse);
        } catch (IOException | XMLStreamException | ImportException e) {
            throw new ImportException("Failed to process DDI XML record: " + e.getClass() + " (" + e.getMessage() + ")");
        }
    } else if ("dc".equalsIgnoreCase(metadataFormat) || "oai_dc".equals(metadataFormat)) {
        logger.fine("importing DC " + metadataFile.getAbsolutePath());
        try {
            String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath()));
            dsDTO = importGenericService.processOAIDCxml(xmlToParse);
        } catch (IOException | XMLStreamException e) {
            throw new ImportException("Failed to process Dublin Core XML record: " + e.getClass() + " (" + e.getMessage() + ")");
        }
    } else if ("dataverse_json".equals(metadataFormat)) {
        // This is Dataverse metadata already formatted in JSON.
        // Simply read it into a string, and pass to the final import further down:
        logger.fine("Attempting to import custom dataverse metadata from file " + metadataFile.getAbsolutePath());
        json = new String(Files.readAllBytes(metadataFile.toPath()));
    } else {
        throw new ImportException("Unsupported import metadata format: " + metadataFormat);
    }
    if (json == null) {
        if (dsDTO != null) {
            // convert DTO to Json,
            Gson gson = new GsonBuilder().setPrettyPrinting().create();
            json = gson.toJson(dsDTO);
            logger.fine("JSON produced for the metadata harvested: " + json);
        } else {
            throw new ImportException("Failed to transform XML metadata format " + metadataFormat + " into a DatasetDTO");
        }
    }
    JsonReader jsonReader = Json.createReader(new StringReader(json));
    JsonObject obj = jsonReader.readObject();
    // and call parse Json to read it into a dataset
    try {
        JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService);
        parser.setLenient(true);
        Dataset ds = parser.parseDataset(obj);
        // For ImportType.NEW, if the metadata contains a global identifier, and it's not a protocol
        // we support, it should be rejected.
        // (TODO: ! - add some way of keeping track of supported protocols!)
        // if (ds.getGlobalId() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) {
        // throw new ImportException("Could not register id " + ds.getGlobalId() + ", protocol not supported");
        // }
        ds.setOwner(owner);
        ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields());
        // Check data against required contraints
        List<ConstraintViolation<DatasetField>> violations = ds.getVersions().get(0).validateRequired();
        if (!violations.isEmpty()) {
            // For migration and harvest, add NA for missing required values
            for (ConstraintViolation<DatasetField> v : violations) {
                DatasetField f = v.getRootBean();
                f.setSingleValue(DatasetField.NA_VALUE);
            }
        }
        // Check data against validation constraints
        // If we are migrating and "scrub migration data" is true we attempt to fix invalid data
        // if the fix fails stop processing of this file by throwing exception
        Set<ConstraintViolation> invalidViolations = ds.getVersions().get(0).validate();
        ValidatorFactory factory = Validation.buildDefaultValidatorFactory();
        Validator validator = factory.getValidator();
        if (!invalidViolations.isEmpty()) {
            for (ConstraintViolation<DatasetFieldValue> v : invalidViolations) {
                DatasetFieldValue f = v.getRootBean();
                boolean fixed = false;
                boolean converted = false;
                // TODO: Is this scrubbing something we want to continue doing?
                if (settingsService.isTrueForKey(SettingsServiceBean.Key.ScrubMigrationData, false)) {
                    fixed = processMigrationValidationError(f, cleanupLog, metadataFile.getName());
                    converted = true;
                    if (fixed) {
                        Set<ConstraintViolation<DatasetFieldValue>> scrubbedViolations = validator.validate(f);
                        if (!scrubbedViolations.isEmpty()) {
                            fixed = false;
                        }
                    }
                }
                if (!fixed) {
                    String msg = "Data modified - File: " + metadataFile.getName() + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; " + "Invalid value:  '" + f.getValue() + "'" + " Converted Value:'" + DatasetField.NA_VALUE + "'";
                    cleanupLog.println(msg);
                    f.setValue(DatasetField.NA_VALUE);
                }
            }
        }
        // this dataset:
        if (StringUtils.isEmpty(ds.getGlobalId())) {
            throw new ImportException("The harvested metadata record with the OAI server identifier " + harvestIdentifier + " does not contain a global unique identifier that we could recognize, skipping.");
        }
        ds.setHarvestedFrom(harvestingClient);
        ds.setHarvestIdentifier(harvestIdentifier);
        Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId());
        if (existingDs != null) {
            // we are just going to skip it!
            if (existingDs.getOwner() != null && !owner.getId().equals(existingDs.getOwner().getId())) {
                throw new ImportException("The dataset with the global id " + ds.getGlobalId() + " already exists, in the dataverse " + existingDs.getOwner().getAlias() + ", skipping.");
            }
            // skip it also:
            if (!existingDs.isHarvested()) {
                throw new ImportException("A LOCAL dataset with the global id " + ds.getGlobalId() + " already exists in this dataverse; skipping.");
            }
            // We will replace the current version with the imported version.
            if (existingDs.getVersions().size() != 1) {
                throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDs.getVersions().size() + " versions");
            }
            // Purge all the SOLR documents associated with this client from the
            // index server:
            indexService.deleteHarvestedDocuments(existingDs);
            // DeleteFileCommand on them.
            for (DataFile harvestedFile : existingDs.getFiles()) {
                DataFile merged = em.merge(harvestedFile);
                em.remove(merged);
                harvestedFile = null;
            }
            // TODO:
            // Verify what happens with the indexed files in SOLR?
            // are they going to be overwritten by the reindexing of the dataset?
            existingDs.setFiles(null);
            Dataset merged = em.merge(existingDs);
            engineSvc.submit(new DestroyDatasetCommand(merged, dataverseRequest));
            importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST));
        } else {
            importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST));
        }
    } catch (JsonParseException | ImportException | CommandException ex) {
        logger.fine("Failed to import harvested dataset: " + ex.getClass() + ": " + ex.getMessage());
        FileOutputStream savedJsonFileStream = new FileOutputStream(new File(metadataFile.getAbsolutePath() + ".json"));
        byte[] jsonBytes = json.getBytes();
        int i = 0;
        while (i < jsonBytes.length) {
            int chunkSize = i + 8192 <= jsonBytes.length ? 8192 : jsonBytes.length - i;
            savedJsonFileStream.write(jsonBytes, i, chunkSize);
            i += chunkSize;
            savedJsonFileStream.flush();
        }
        savedJsonFileStream.close();
        logger.info("JSON produced saved in " + metadataFile.getAbsolutePath() + ".json");
        throw new ImportException("Failed to import harvested dataset: " + ex.getClass() + " (" + ex.getMessage() + ")", ex);
    }
    return importedDataset;
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) CreateDatasetCommand(edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetCommand) Gson(com.google.gson.Gson) JsonObject(javax.json.JsonObject) JsonParseException(edu.harvard.iq.dataverse.util.json.JsonParseException) DatasetDTO(edu.harvard.iq.dataverse.api.dto.DatasetDTO) DataFile(edu.harvard.iq.dataverse.DataFile) DatasetFieldValue(edu.harvard.iq.dataverse.DatasetFieldValue) StringReader(java.io.StringReader) JsonReader(javax.json.JsonReader) JsonParser(edu.harvard.iq.dataverse.util.json.JsonParser) ValidatorFactory(javax.validation.ValidatorFactory) GsonBuilder(com.google.gson.GsonBuilder) Dataset(edu.harvard.iq.dataverse.Dataset) IOException(java.io.IOException) CommandException(edu.harvard.iq.dataverse.engine.command.exception.CommandException) Dataverse(edu.harvard.iq.dataverse.Dataverse) XMLStreamException(javax.xml.stream.XMLStreamException) ConstraintViolation(javax.validation.ConstraintViolation) FileOutputStream(java.io.FileOutputStream) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File) Validator(javax.validation.Validator) DestroyDatasetCommand(edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand) TransactionAttribute(javax.ejb.TransactionAttribute)

Example 7 with DatasetFieldValue

use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.

the class JsonPrinterTest method constructPrimitive.

DatasetField constructPrimitive(String datasetFieldTypeName, String value) {
    DatasetField retVal = new DatasetField();
    retVal.setDatasetFieldType(datasetFieldTypeSvc.findByName(datasetFieldTypeName));
    retVal.setDatasetFieldValues(Collections.singletonList(new DatasetFieldValue(retVal, value)));
    return retVal;
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) DatasetFieldValue(edu.harvard.iq.dataverse.DatasetFieldValue)

Example 8 with DatasetFieldValue

use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.

the class ForeignMetadataImportServiceBean method createDatasetFieldValue.

private DatasetFieldCompoundValue createDatasetFieldValue(DatasetFieldType dsft, DatasetFieldCompoundValue savedCompoundValue, String elementText, DatasetVersion datasetVersion) {
    if (dsft.isPrimitive()) {
        if (!dsft.isHasParent()) {
            // simple primitive:
            DatasetField dsf = null;
            for (DatasetField existingDsf : datasetVersion.getFlatDatasetFields()) {
                if (existingDsf.getDatasetFieldType().equals(dsft)) {
                    dsf = existingDsf;
                }
            }
            // if doesn't exist, create a new one:
            if (dsf == null) {
                dsf = new DatasetField();
                dsf.setDatasetFieldType(dsft);
                datasetVersion.getDatasetFields().add(dsf);
                dsf.setDatasetVersion(datasetVersion);
            }
            String dsfName = dsft.getName();
            if (!dsft.isControlledVocabulary()) {
                logger.fine("Creating a new value for field " + dsfName + ": " + elementText);
                DatasetFieldValue newDsfv = new DatasetFieldValue(dsf);
                newDsfv.setValue(elementText);
                dsf.getDatasetFieldValues().add(newDsfv);
            } else {
            // A controlled vocabulary entry:
            // first, let's see if it's a legit control vocab. entry:
            /* not supported yet; though I expect the commented-out code
                       below to work;
                    ControlledVocabularyValue legitControlledVocabularyValue = null;
                    Collection<ControlledVocabularyValue> definedVocabularyValues = dsft.getControlledVocabularyValues();
                    if (definedVocabularyValues != null) {
                        for (ControlledVocabularyValue definedVocabValue : definedVocabularyValues) {
                            if (elementText.equals(definedVocabValue.getStrValue())) {
                                logger.fine("Yes, " + elementText + " is a valid controlled vocabulary value for the field " + dsfName);
                                legitControlledVocabularyValue = definedVocabValue;
                                break;
                            }
                        }
                    }
                    if (legitControlledVocabularyValue != null) {
                        logger.fine("Adding controlled vocabulary value " + elementText + " to field " + dsfName);
                        dsf.getControlledVocabularyValues().add(legitControlledVocabularyValue);
                    }
                    */
            }
            // No compound values had to be created; returning null:
            return null;
        } else {
            // a primitive that is part of a compound value:
            // first, let's create the field and the value, for the
            // primitive node itself:
            DatasetField childField = new DatasetField();
            childField.setDatasetFieldType(dsft);
            DatasetFieldValue childValue = new DatasetFieldValue(childField);
            childValue.setValue(elementText);
            childField.getDatasetFieldValues().add(childValue);
            // see if a compound value of the right type has already been
            // created and passed to us:
            DatasetFieldCompoundValue parentCompoundValue = null;
            DatasetFieldType parentFieldType = dsft.getParentDatasetFieldType();
            if (parentFieldType == null) {
                logger.severe("Child field type with no parent field type defined!");
                // could just skip this field and try to continue - ?
                return null;
            }
            if (savedCompoundValue != null) {
                if (parentFieldType.equals(savedCompoundValue.getParentDatasetField().getDatasetFieldType())) {
                    parentCompoundValue = savedCompoundValue;
                }
            }
            if (parentCompoundValue == null) {
                // and to do that, we need to find or create the "parent"
                // dataset field for this compoound value:
                // (I put quotes around "parent", because I really feel it
                // is a misnomer, and that the relationship between the compound value
                // and the corresponding dataset field should be called
                // "CompoundDatasetField", not "ParentDatasetField") (discuss?)
                DatasetField parentField = null;
                for (DatasetField existingDsf : datasetVersion.getFlatDatasetFields()) {
                    if (existingDsf.getDatasetFieldType().equals(parentFieldType)) {
                        parentField = existingDsf;
                    }
                }
                // if doesn't exist, create a new one:
                if (parentField == null) {
                    parentField = new DatasetField();
                    parentField.setDatasetFieldType(parentFieldType);
                    datasetVersion.getDatasetFields().add(parentField);
                    parentField.setDatasetVersion(datasetVersion);
                }
                // and then create new compound value:
                parentCompoundValue = new DatasetFieldCompoundValue();
                parentCompoundValue.setParentDatasetField(parentField);
                parentField.getDatasetFieldCompoundValues().add(parentCompoundValue);
            }
            childField.setParentDatasetFieldCompoundValue(parentCompoundValue);
            parentCompoundValue.getChildDatasetFields().add(childField);
            return parentCompoundValue;
        }
    }
    return null;
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) DatasetFieldValue(edu.harvard.iq.dataverse.DatasetFieldValue) DatasetFieldCompoundValue(edu.harvard.iq.dataverse.DatasetFieldCompoundValue) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType)

Example 9 with DatasetFieldValue

use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.

the class JsonParserTest method testPrimitiveRepeatesFieldRoundTrip.

@Test
public void testPrimitiveRepeatesFieldRoundTrip() throws JsonParseException {
    DatasetField expected = new DatasetField();
    expected.setDatasetFieldType(datasetFieldTypeSvc.findByName("keyword"));
    expected.setDatasetFieldValues(Arrays.asList(new DatasetFieldValue(expected, "kw1"), new DatasetFieldValue(expected, "kw2"), new DatasetFieldValue(expected, "kw3")));
    JsonObject json = JsonPrinter.json(expected);
    DatasetField actual = sut.parseField(json);
    assertFieldsEqual(actual, expected);
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) DatasetFieldValue(edu.harvard.iq.dataverse.DatasetFieldValue) JsonObject(javax.json.JsonObject) Test(org.junit.Test)

Example 10 with DatasetFieldValue

use of edu.harvard.iq.dataverse.DatasetFieldValue in project dataverse by IQSS.

the class JsonParser method parseField.

public DatasetField parseField(JsonObject json) throws JsonParseException {
    if (json == null) {
        return null;
    }
    DatasetField ret = new DatasetField();
    DatasetFieldType type = datasetFieldSvc.findByNameOpt(json.getString("typeName", ""));
    if (type == null) {
        throw new JsonParseException("Can't find type '" + json.getString("typeName", "") + "'");
    }
    if (type.isAllowMultiples() != json.getBoolean("multiple")) {
        throw new JsonParseException("incorrect multiple   for field " + json.getString("typeName", ""));
    }
    if (type.isCompound() && !json.getString("typeClass").equals("compound")) {
        throw new JsonParseException("incorrect  typeClass for field " + json.getString("typeName", "") + ", should be compound.");
    }
    if (!type.isControlledVocabulary() && type.isPrimitive() && !json.getString("typeClass").equals("primitive")) {
        throw new JsonParseException("incorrect  typeClass for field: " + json.getString("typeName", "") + ", should be primitive");
    }
    if (type.isControlledVocabulary() && !json.getString("typeClass").equals("controlledVocabulary")) {
        throw new JsonParseException("incorrect  typeClass for field " + json.getString("typeName", "") + ", should be controlledVocabulary");
    }
    ret.setDatasetFieldType(type);
    if (type.isCompound()) {
        List<DatasetFieldCompoundValue> vals = parseCompoundValue(type, json);
        for (DatasetFieldCompoundValue dsfcv : vals) {
            dsfcv.setParentDatasetField(ret);
        }
        ret.setDatasetFieldCompoundValues(vals);
    } else if (type.isControlledVocabulary()) {
        List<ControlledVocabularyValue> vals = parseControlledVocabularyValue(type, json);
        for (ControlledVocabularyValue cvv : vals) {
            cvv.setDatasetFieldType(type);
        }
        ret.setControlledVocabularyValues(vals);
    } else {
        // primitive
        List<DatasetFieldValue> values = parsePrimitiveValue(json);
        for (DatasetFieldValue val : values) {
            val.setDatasetField(ret);
        }
        ret.setDatasetFieldValues(values);
    }
    return ret;
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) DatasetFieldValue(edu.harvard.iq.dataverse.DatasetFieldValue) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) DatasetFieldType(edu.harvard.iq.dataverse.DatasetFieldType) DatasetFieldCompoundValue(edu.harvard.iq.dataverse.DatasetFieldCompoundValue) ControlledVocabularyValue(edu.harvard.iq.dataverse.ControlledVocabularyValue)

Aggregations

DatasetFieldValue (edu.harvard.iq.dataverse.DatasetFieldValue)12 DatasetField (edu.harvard.iq.dataverse.DatasetField)11 DatasetFieldCompoundValue (edu.harvard.iq.dataverse.DatasetFieldCompoundValue)5 ControlledVocabularyValue (edu.harvard.iq.dataverse.ControlledVocabularyValue)4 DatasetFieldType (edu.harvard.iq.dataverse.DatasetFieldType)4 JsonObject (javax.json.JsonObject)4 LinkedList (java.util.LinkedList)3 Gson (com.google.gson.Gson)2 GsonBuilder (com.google.gson.GsonBuilder)2 Dataset (edu.harvard.iq.dataverse.Dataset)2 DatasetDTO (edu.harvard.iq.dataverse.api.dto.DatasetDTO)2 CommandException (edu.harvard.iq.dataverse.engine.command.exception.CommandException)2 CreateDatasetCommand (edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetCommand)2 DestroyDatasetCommand (edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand)2 JsonParseException (edu.harvard.iq.dataverse.util.json.JsonParseException)2 JsonParser (edu.harvard.iq.dataverse.util.json.JsonParser)2 StringReader (java.io.StringReader)2 List (java.util.List)2 JsonReader (javax.json.JsonReader)2 ConstraintViolation (javax.validation.ConstraintViolation)2