Search in sources :

Example 6 with OrgDisambiguatedEntity

use of org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity in project ORCID-Source by ORCID.

the class LoadFundRefData method createDisambiguatedOrg.

/**
     * Creates a disambiguated ORG in the org_disambiguated table
     * */
private OrgDisambiguatedEntity createDisambiguatedOrg(RDFOrganization organization) {
    LOGGER.info("Creating disambiguated org {}", organization.name);
    String orgType = organization.type + (StringUtils.isEmpty(organization.subtype) ? "" : "/" + organization.subtype);
    Iso3166Country country = StringUtils.isNotBlank(organization.country) ? Iso3166Country.fromValue(organization.country) : null;
    OrgDisambiguatedEntity orgDisambiguatedEntity = new OrgDisambiguatedEntity();
    orgDisambiguatedEntity.setName(organization.name);
    orgDisambiguatedEntity.setCountry(country);
    orgDisambiguatedEntity.setCity(organization.city);
    orgDisambiguatedEntity.setRegion(organization.stateCode);
    orgDisambiguatedEntity.setOrgType(orgType);
    orgDisambiguatedEntity.setSourceId(organization.doi);
    orgDisambiguatedEntity.setSourceUrl(organization.doi);
    orgDisambiguatedEntity.setSourceType(FUNDREF_SOURCE_TYPE);
    if (!PojoUtil.isEmpty(organization.status)) {
        orgDisambiguatedEntity.setStatus(OrganizationStatus.DEPRECATED.name());
    }
    orgDisambiguatedDao.persist(orgDisambiguatedEntity);
    return orgDisambiguatedEntity;
}
Also used : OrgDisambiguatedEntity(org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity) Iso3166Country(org.orcid.jaxb.model.message.Iso3166Country)

Example 7 with OrgDisambiguatedEntity

use of org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity in project ORCID-Source by ORCID.

the class LoadFundRefData method execute.

/**
     * Executes the import process
     * */
private void execute() {
    try {
        long start = System.currentTimeMillis();
        FileInputStream file = new FileInputStream(fileToLoad);
        DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = builderFactory.newDocumentBuilder();
        Document xmlDocument = builder.parse(file);
        // Parent node
        NodeList nodeList = (NodeList) xPath.compile(conceptsExpression).evaluate(xmlDocument, XPathConstants.NODESET);
        for (int i = 0; i < nodeList.getLength(); i++) {
            RDFOrganization rdfOrganization = getOrganization(xmlDocument, nodeList.item(i).getAttributes());
            LOGGER.info("Processing organization from RDF, doi:{}, name:{}, country:{}, state:{}, stateCode:{}, type:{}, subtype:{}, status:{}", new String[] { rdfOrganization.doi, rdfOrganization.name, rdfOrganization.country, rdfOrganization.state, rdfOrganization.stateCode, rdfOrganization.type, rdfOrganization.subtype, rdfOrganization.status });
            // #1: Look for an existing org
            OrgDisambiguatedEntity existingEntity = findByDetails(rdfOrganization);
            if (existingEntity != null) {
                // #2: If the name, city or region changed, update those values
                if (entityChanged(rdfOrganization, existingEntity)) {
                    existingEntity.setCity(rdfOrganization.city);
                    Iso3166Country country = StringUtils.isNotBlank(rdfOrganization.country) ? Iso3166Country.fromValue(rdfOrganization.country) : null;
                    existingEntity.setCountry(country);
                    existingEntity.setName(rdfOrganization.name);
                    String orgType = rdfOrganization.type + (StringUtils.isNotBlank(rdfOrganization.subtype) ? ('/' + rdfOrganization.subtype) : "");
                    existingEntity.setOrgType(orgType);
                    existingEntity.setRegion(rdfOrganization.stateCode);
                    existingEntity.setSourceId(rdfOrganization.doi);
                    existingEntity.setSourceType(FUNDREF_SOURCE_TYPE);
                    existingEntity.setSourceUrl(rdfOrganization.doi);
                    existingEntity.setLastModified(new Date());
                    existingEntity.setIndexingStatus(IndexingStatus.PENDING);
                    existingEntity.setStatus(rdfOrganization.status);
                    orgDisambiguatedDao.merge(existingEntity);
                    updatedOrgs += 1;
                } else if (idChanged(rdfOrganization, existingEntity)) {
                    // #3: If the ID changed, create an external identifier
                    createExternalIdentifier(existingEntity, rdfOrganization.doi);
                    addedExternalIdentifiers += 1;
                } else if (statusChanged(rdfOrganization, existingEntity)) {
                    //If the status changed, update the status
                    existingEntity.setStatus(rdfOrganization.status);
                    existingEntity.setLastModified(new Date());
                    existingEntity.setIndexingStatus(IndexingStatus.PENDING);
                    orgDisambiguatedDao.merge(existingEntity);
                }
            } else {
                // #4: Else, create the new org
                createDisambiguatedOrg(rdfOrganization);
                addedDisambiguatedOrgs += 1;
            }
        }
        long end = System.currentTimeMillis();
        LOGGER.info("Time taken to process the files: {}", (end - start));
    } catch (FileNotFoundException fne) {
        LOGGER.error("Unable to read file {}", fileToLoad);
    } catch (ParserConfigurationException pce) {
        LOGGER.error("Unable to initialize the DocumentBuilder");
    } catch (IOException ioe) {
        LOGGER.error("Unable to parse document {}", fileToLoad);
    } catch (SAXException se) {
        LOGGER.error("Unable to parse document {}", fileToLoad);
    } catch (XPathExpressionException xpe) {
        LOGGER.error("XPathExpressionException {}", xpe.getMessage());
    } finally {
        LOGGER.info("Number new Disambiguated Orgs={}, Updated Orgs={}, new External Identifiers={}", new Object[] { addedDisambiguatedOrgs, updatedOrgs, addedExternalIdentifiers, getTotal() });
    }
}
Also used : DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) XPathExpressionException(javax.xml.xpath.XPathExpressionException) NodeList(org.w3c.dom.NodeList) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) Document(org.w3c.dom.Document) Iso3166Country(org.orcid.jaxb.model.message.Iso3166Country) FileInputStream(java.io.FileInputStream) Date(java.util.Date) SAXException(org.xml.sax.SAXException) DocumentBuilder(javax.xml.parsers.DocumentBuilder) OrgDisambiguatedEntity(org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity) ParserConfigurationException(javax.xml.parsers.ParserConfigurationException)

Example 8 with OrgDisambiguatedEntity

use of org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity in project ORCID-Source by ORCID.

the class LoadRinggoldData method processDeletedIdsLine.

private void processDeletedIdsLine(String[] line) {
    String deletedSourceId = line[0];
    String replacementSourceId = line[1];
    OrgDisambiguatedEntity deletedEntity = orgDisambiguatedDao.findBySourceIdAndSourceType(deletedSourceId, RINGGOLD_SOURCE_TYPE);
    if (deletedEntity != null) {
        LOGGER.info("Deleted ID exists in DB, id={}", deletedSourceId);
        Long deletedEntityId = deletedEntity.getId();
        OrgDisambiguatedEntity replacementEntity = orgDisambiguatedDao.findBySourceIdAndSourceType(replacementSourceId, RINGGOLD_SOURCE_TYPE);
        if (replacementEntity == null) {
            LOGGER.warn("Replacement does not exist, id={}", replacementEntity);
            numDeletionsSkipped++;
        } else {
            Long replacementEntityId = replacementEntity.getId();
            orgDisambiguatedSolrDao.remove(deletedEntityId);
            orgDisambiguatedDao.replace(deletedEntityId, replacementEntityId);
            orgDisambiguatedDao.remove(deletedEntityId);
            numDeleted++;
        }
    }
}
Also used : OrgDisambiguatedEntity(org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity)

Example 9 with OrgDisambiguatedEntity

use of org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity in project ORCID-Source by ORCID.

the class LoadRinggoldData method checkForDuplicates.

private void checkForDuplicates() {
    LOGGER.info("Checking for duplicates");
    List<OrgDisambiguatedEntity> duplicates = orgDisambiguatedDao.findDuplicates();
    for (OrgDisambiguatedEntity duplicate : duplicates) {
        LOGGER.info("Found duplicate: {}\t{}\t{}\t{}\t{}\t{}\t{}", new Object[] { duplicate.getSourceType(), duplicate.getSourceId(), duplicate.getName(), duplicate.getCity(), duplicate.getRegion(), duplicate.getCountry(), duplicate.getOrgType() });
    }
    LOGGER.info("Finished checking for duplicates");
}
Also used : OrgDisambiguatedEntity(org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity)

Example 10 with OrgDisambiguatedEntity

use of org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity in project ORCID-Source by ORCID.

the class OrgDisambiguatedManagerImpl method processOrgsForIndexing.

@Override
public synchronized void processOrgsForIndexing() {
    LOGGER.info("About to process disambiguated orgs for indexing");
    List<OrgDisambiguatedEntity> entities = null;
    do {
        entities = orgDisambiguatedDaoReadOnly.findOrgsByIndexingStatus(IndexingStatus.PENDING, 0, INDEXING_CHUNK_SIZE);
        LOGGER.info("Found chunk of {} disambiguated orgs for indexing", entities.size());
        for (OrgDisambiguatedEntity entity : entities) {
            processDisambiguatedOrgInTransaction(entity);
        }
    } while (!entities.isEmpty());
}
Also used : OrgDisambiguatedEntity(org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity)

Aggregations

OrgDisambiguatedEntity (org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity)15 Iso3166Country (org.orcid.jaxb.model.message.Iso3166Country)3 FuzzyDateEntity (org.orcid.persistence.jpa.entities.FuzzyDateEntity)2 CSVWriter (au.com.bytecode.opencsv.CSVWriter)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 Date (java.util.Date)1 DocumentBuilder (javax.xml.parsers.DocumentBuilder)1 DocumentBuilderFactory (javax.xml.parsers.DocumentBuilderFactory)1 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)1 XPathExpressionException (javax.xml.xpath.XPathExpressionException)1 FundingExternalIdentifiers (org.orcid.core.adapter.impl.jsonidentifiers.FundingExternalIdentifiers)1 AmbiguousOrgEntity (org.orcid.persistence.jpa.entities.AmbiguousOrgEntity)1 OrgEntity (org.orcid.persistence.jpa.entities.OrgEntity)1 SourceEntity (org.orcid.persistence.jpa.entities.SourceEntity)1 OrgDisambiguated (org.orcid.pojo.OrgDisambiguated)1 Document (org.w3c.dom.Document)1 NodeList (org.w3c.dom.NodeList)1 SAXException (org.xml.sax.SAXException)1