Search in sources :

Example 6 with CreateDatasetCommand

use of edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetCommand in project dataverse by IQSS.

the class ImportServiceBean method doImportHarvestedDataset.

@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String metadataFormat, File metadataFile, PrintWriter cleanupLog) throws ImportException, IOException {
    if (harvestingClient == null || harvestingClient.getDataverse() == null) {
        throw new ImportException("importHarvestedDataset called wiht a null harvestingClient, or an invalid harvestingClient.");
    }
    Dataverse owner = harvestingClient.getDataverse();
    Dataset importedDataset = null;
    DatasetDTO dsDTO = null;
    String json = null;
    if ("ddi".equalsIgnoreCase(metadataFormat) || "oai_ddi".equals(metadataFormat) || metadataFormat.toLowerCase().matches("^oai_ddi.*")) {
        try {
            String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath()));
            // TODO:
            // import type should be configurable - it should be possible to
            // select whether you want to harvest with or without files,
            // ImportType.HARVEST vs. ImportType.HARVEST_WITH_FILES
            logger.fine("importing DDI " + metadataFile.getAbsolutePath());
            dsDTO = importDDIService.doImport(ImportType.HARVEST_WITH_FILES, xmlToParse);
        } catch (IOException | XMLStreamException | ImportException e) {
            throw new ImportException("Failed to process DDI XML record: " + e.getClass() + " (" + e.getMessage() + ")");
        }
    } else if ("dc".equalsIgnoreCase(metadataFormat) || "oai_dc".equals(metadataFormat)) {
        logger.fine("importing DC " + metadataFile.getAbsolutePath());
        try {
            String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath()));
            dsDTO = importGenericService.processOAIDCxml(xmlToParse);
        } catch (IOException | XMLStreamException e) {
            throw new ImportException("Failed to process Dublin Core XML record: " + e.getClass() + " (" + e.getMessage() + ")");
        }
    } else if ("dataverse_json".equals(metadataFormat)) {
        // This is Dataverse metadata already formatted in JSON.
        // Simply read it into a string, and pass to the final import further down:
        logger.fine("Attempting to import custom dataverse metadata from file " + metadataFile.getAbsolutePath());
        json = new String(Files.readAllBytes(metadataFile.toPath()));
    } else {
        throw new ImportException("Unsupported import metadata format: " + metadataFormat);
    }
    if (json == null) {
        if (dsDTO != null) {
            // convert DTO to Json,
            Gson gson = new GsonBuilder().setPrettyPrinting().create();
            json = gson.toJson(dsDTO);
            logger.fine("JSON produced for the metadata harvested: " + json);
        } else {
            throw new ImportException("Failed to transform XML metadata format " + metadataFormat + " into a DatasetDTO");
        }
    }
    JsonReader jsonReader = Json.createReader(new StringReader(json));
    JsonObject obj = jsonReader.readObject();
    // and call parse Json to read it into a dataset
    try {
        JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService);
        parser.setLenient(true);
        Dataset ds = parser.parseDataset(obj);
        // For ImportType.NEW, if the metadata contains a global identifier, and it's not a protocol
        // we support, it should be rejected.
        // (TODO: ! - add some way of keeping track of supported protocols!)
        // if (ds.getGlobalId() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) {
        // throw new ImportException("Could not register id " + ds.getGlobalId() + ", protocol not supported");
        // }
        ds.setOwner(owner);
        ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields());
        // Check data against required contraints
        List<ConstraintViolation<DatasetField>> violations = ds.getVersions().get(0).validateRequired();
        if (!violations.isEmpty()) {
            // For migration and harvest, add NA for missing required values
            for (ConstraintViolation<DatasetField> v : violations) {
                DatasetField f = v.getRootBean();
                f.setSingleValue(DatasetField.NA_VALUE);
            }
        }
        // Check data against validation constraints
        // If we are migrating and "scrub migration data" is true we attempt to fix invalid data
        // if the fix fails stop processing of this file by throwing exception
        Set<ConstraintViolation> invalidViolations = ds.getVersions().get(0).validate();
        ValidatorFactory factory = Validation.buildDefaultValidatorFactory();
        Validator validator = factory.getValidator();
        if (!invalidViolations.isEmpty()) {
            for (ConstraintViolation<DatasetFieldValue> v : invalidViolations) {
                DatasetFieldValue f = v.getRootBean();
                boolean fixed = false;
                boolean converted = false;
                // TODO: Is this scrubbing something we want to continue doing?
                if (settingsService.isTrueForKey(SettingsServiceBean.Key.ScrubMigrationData, false)) {
                    fixed = processMigrationValidationError(f, cleanupLog, metadataFile.getName());
                    converted = true;
                    if (fixed) {
                        Set<ConstraintViolation<DatasetFieldValue>> scrubbedViolations = validator.validate(f);
                        if (!scrubbedViolations.isEmpty()) {
                            fixed = false;
                        }
                    }
                }
                if (!fixed) {
                    String msg = "Data modified - File: " + metadataFile.getName() + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; " + "Invalid value:  '" + f.getValue() + "'" + " Converted Value:'" + DatasetField.NA_VALUE + "'";
                    cleanupLog.println(msg);
                    f.setValue(DatasetField.NA_VALUE);
                }
            }
        }
        // this dataset:
        if (StringUtils.isEmpty(ds.getGlobalId())) {
            throw new ImportException("The harvested metadata record with the OAI server identifier " + harvestIdentifier + " does not contain a global unique identifier that we could recognize, skipping.");
        }
        ds.setHarvestedFrom(harvestingClient);
        ds.setHarvestIdentifier(harvestIdentifier);
        Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId());
        if (existingDs != null) {
            // we are just going to skip it!
            if (existingDs.getOwner() != null && !owner.getId().equals(existingDs.getOwner().getId())) {
                throw new ImportException("The dataset with the global id " + ds.getGlobalId() + " already exists, in the dataverse " + existingDs.getOwner().getAlias() + ", skipping.");
            }
            // skip it also:
            if (!existingDs.isHarvested()) {
                throw new ImportException("A LOCAL dataset with the global id " + ds.getGlobalId() + " already exists in this dataverse; skipping.");
            }
            // We will replace the current version with the imported version.
            if (existingDs.getVersions().size() != 1) {
                throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDs.getVersions().size() + " versions");
            }
            // Purge all the SOLR documents associated with this client from the
            // index server:
            indexService.deleteHarvestedDocuments(existingDs);
            // DeleteFileCommand on them.
            for (DataFile harvestedFile : existingDs.getFiles()) {
                DataFile merged = em.merge(harvestedFile);
                em.remove(merged);
                harvestedFile = null;
            }
            // TODO:
            // Verify what happens with the indexed files in SOLR?
            // are they going to be overwritten by the reindexing of the dataset?
            existingDs.setFiles(null);
            Dataset merged = em.merge(existingDs);
            engineSvc.submit(new DestroyDatasetCommand(merged, dataverseRequest));
            importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST));
        } else {
            importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST));
        }
    } catch (JsonParseException | ImportException | CommandException ex) {
        logger.fine("Failed to import harvested dataset: " + ex.getClass() + ": " + ex.getMessage());
        FileOutputStream savedJsonFileStream = new FileOutputStream(new File(metadataFile.getAbsolutePath() + ".json"));
        byte[] jsonBytes = json.getBytes();
        int i = 0;
        while (i < jsonBytes.length) {
            int chunkSize = i + 8192 <= jsonBytes.length ? 8192 : jsonBytes.length - i;
            savedJsonFileStream.write(jsonBytes, i, chunkSize);
            i += chunkSize;
            savedJsonFileStream.flush();
        }
        savedJsonFileStream.close();
        logger.info("JSON produced saved in " + metadataFile.getAbsolutePath() + ".json");
        throw new ImportException("Failed to import harvested dataset: " + ex.getClass() + " (" + ex.getMessage() + ")", ex);
    }
    return importedDataset;
}
Also used : DatasetField(edu.harvard.iq.dataverse.DatasetField) CreateDatasetCommand(edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetCommand) Gson(com.google.gson.Gson) JsonObject(javax.json.JsonObject) JsonParseException(edu.harvard.iq.dataverse.util.json.JsonParseException) DatasetDTO(edu.harvard.iq.dataverse.api.dto.DatasetDTO) DataFile(edu.harvard.iq.dataverse.DataFile) DatasetFieldValue(edu.harvard.iq.dataverse.DatasetFieldValue) StringReader(java.io.StringReader) JsonReader(javax.json.JsonReader) JsonParser(edu.harvard.iq.dataverse.util.json.JsonParser) ValidatorFactory(javax.validation.ValidatorFactory) GsonBuilder(com.google.gson.GsonBuilder) Dataset(edu.harvard.iq.dataverse.Dataset) IOException(java.io.IOException) CommandException(edu.harvard.iq.dataverse.engine.command.exception.CommandException) Dataverse(edu.harvard.iq.dataverse.Dataverse) XMLStreamException(javax.xml.stream.XMLStreamException) ConstraintViolation(javax.validation.ConstraintViolation) FileOutputStream(java.io.FileOutputStream) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File) Validator(javax.validation.Validator) DestroyDatasetCommand(edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand) TransactionAttribute(javax.ejb.TransactionAttribute)

Example 7 with CreateDatasetCommand

use of edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetCommand in project dataverse by IQSS.

the class Dataverses method createDataset.

@POST
@Path("{identifier}/datasets")
public Response createDataset(String jsonBody, @PathParam("identifier") String parentIdtf) {
    try {
        User u = findUserOrDie();
        Dataverse owner = findDataverseOrDie(parentIdtf);
        JsonObject json;
        try (StringReader rdr = new StringReader(jsonBody)) {
            json = Json.createReader(rdr).readObject();
        } catch (JsonParsingException jpe) {
            LOGGER.log(Level.SEVERE, "Json: {0}", jsonBody);
            return error(Status.BAD_REQUEST, "Error parsing Json: " + jpe.getMessage());
        }
        Dataset ds = new Dataset();
        ds.setOwner(owner);
        JsonObject jsonVersion = json.getJsonObject("datasetVersion");
        if (jsonVersion == null) {
            return error(Status.BAD_REQUEST, "Json POST data are missing datasetVersion object.");
        }
        try {
            try {
                DatasetVersion version = new DatasetVersion();
                version.setDataset(ds);
                // Use the two argument version so that the version knows which dataset it's associated with.
                version = jsonParser().parseDatasetVersion(jsonVersion, version);
                // force "initial version" properties
                version.setMinorVersionNumber(null);
                version.setVersionNumber(null);
                version.setVersionState(DatasetVersion.VersionState.DRAFT);
                LinkedList<DatasetVersion> versions = new LinkedList<>();
                versions.add(version);
                version.setDataset(ds);
                ds.setVersions(versions);
            } catch (javax.ejb.TransactionRolledbackLocalException rbe) {
                throw rbe.getCausedByException();
            }
        } catch (JsonParseException ex) {
            LOGGER.log(Level.INFO, "Error parsing dataset version from Json", ex);
            return error(Status.BAD_REQUEST, "Error parsing datasetVersion: " + ex.getMessage());
        } catch (Exception e) {
            LOGGER.log(Level.WARNING, "Error parsing dataset version from Json", e);
            return error(Status.INTERNAL_SERVER_ERROR, "Error parsing datasetVersion: " + e.getMessage());
        }
        Dataset managedDs = execCommand(new CreateDatasetCommand(ds, createDataverseRequest(u)));
        return created("/datasets/" + managedDs.getId(), Json.createObjectBuilder().add("id", managedDs.getId()).add("persistentId", managedDs.getGlobalId()));
    } catch (WrappedResponse ex) {
        return ex.getResponse();
    }
}
Also used : AuthenticatedUser(edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser) User(edu.harvard.iq.dataverse.authorization.users.User) Dataset(edu.harvard.iq.dataverse.Dataset) CreateDatasetCommand(edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetCommand) JsonObject(javax.json.JsonObject) DatasetVersion(edu.harvard.iq.dataverse.DatasetVersion) JsonParseException(edu.harvard.iq.dataverse.util.json.JsonParseException) Dataverse(edu.harvard.iq.dataverse.Dataverse) LinkedList(java.util.LinkedList) JsonParseException(edu.harvard.iq.dataverse.util.json.JsonParseException) EJBException(javax.ejb.EJBException) JsonParsingException(javax.json.stream.JsonParsingException) ConstraintViolationException(javax.validation.ConstraintViolationException) StringReader(java.io.StringReader) JsonParsingException(javax.json.stream.JsonParsingException) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST)

Aggregations

CreateDatasetCommand (edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetCommand)7 Dataset (edu.harvard.iq.dataverse.Dataset)4 CommandException (edu.harvard.iq.dataverse.engine.command.exception.CommandException)4 ConstraintViolation (javax.validation.ConstraintViolation)4 DatasetVersion (edu.harvard.iq.dataverse.DatasetVersion)3 Dataverse (edu.harvard.iq.dataverse.Dataverse)3 AuthenticatedUser (edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser)3 IllegalCommandException (edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException)3 JsonParseException (edu.harvard.iq.dataverse.util.json.JsonParseException)3 StringReader (java.io.StringReader)3 EJBException (javax.ejb.EJBException)3 JsonObject (javax.json.JsonObject)3 Gson (com.google.gson.Gson)2 GsonBuilder (com.google.gson.GsonBuilder)2 DatasetField (edu.harvard.iq.dataverse.DatasetField)2 DatasetFieldValue (edu.harvard.iq.dataverse.DatasetFieldValue)2 DatasetDTO (edu.harvard.iq.dataverse.api.dto.DatasetDTO)2 DestroyDatasetCommand (edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand)2 JsonParser (edu.harvard.iq.dataverse.util.json.JsonParser)2 JsonReader (javax.json.JsonReader)2