use of org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity in project ORCID-Source by ORCID.
the class LoadFundRefData method createDisambiguatedOrg.
/**
* Creates a disambiguated ORG in the org_disambiguated table
* */
private OrgDisambiguatedEntity createDisambiguatedOrg(RDFOrganization organization) {
LOGGER.info("Creating disambiguated org {}", organization.name);
String orgType = organization.type + (StringUtils.isEmpty(organization.subtype) ? "" : "/" + organization.subtype);
Iso3166Country country = StringUtils.isNotBlank(organization.country) ? Iso3166Country.fromValue(organization.country) : null;
OrgDisambiguatedEntity orgDisambiguatedEntity = new OrgDisambiguatedEntity();
orgDisambiguatedEntity.setName(organization.name);
orgDisambiguatedEntity.setCountry(country);
orgDisambiguatedEntity.setCity(organization.city);
orgDisambiguatedEntity.setRegion(organization.stateCode);
orgDisambiguatedEntity.setOrgType(orgType);
orgDisambiguatedEntity.setSourceId(organization.doi);
orgDisambiguatedEntity.setSourceUrl(organization.doi);
orgDisambiguatedEntity.setSourceType(FUNDREF_SOURCE_TYPE);
if (!PojoUtil.isEmpty(organization.status)) {
orgDisambiguatedEntity.setStatus(OrganizationStatus.DEPRECATED.name());
}
orgDisambiguatedDao.persist(orgDisambiguatedEntity);
return orgDisambiguatedEntity;
}
use of org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity in project ORCID-Source by ORCID.
the class LoadFundRefData method execute.
/**
* Executes the import process
* */
private void execute() {
try {
long start = System.currentTimeMillis();
FileInputStream file = new FileInputStream(fileToLoad);
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = builderFactory.newDocumentBuilder();
Document xmlDocument = builder.parse(file);
// Parent node
NodeList nodeList = (NodeList) xPath.compile(conceptsExpression).evaluate(xmlDocument, XPathConstants.NODESET);
for (int i = 0; i < nodeList.getLength(); i++) {
RDFOrganization rdfOrganization = getOrganization(xmlDocument, nodeList.item(i).getAttributes());
LOGGER.info("Processing organization from RDF, doi:{}, name:{}, country:{}, state:{}, stateCode:{}, type:{}, subtype:{}, status:{}", new String[] { rdfOrganization.doi, rdfOrganization.name, rdfOrganization.country, rdfOrganization.state, rdfOrganization.stateCode, rdfOrganization.type, rdfOrganization.subtype, rdfOrganization.status });
// #1: Look for an existing org
OrgDisambiguatedEntity existingEntity = findByDetails(rdfOrganization);
if (existingEntity != null) {
// #2: If the name, city or region changed, update those values
if (entityChanged(rdfOrganization, existingEntity)) {
existingEntity.setCity(rdfOrganization.city);
Iso3166Country country = StringUtils.isNotBlank(rdfOrganization.country) ? Iso3166Country.fromValue(rdfOrganization.country) : null;
existingEntity.setCountry(country);
existingEntity.setName(rdfOrganization.name);
String orgType = rdfOrganization.type + (StringUtils.isNotBlank(rdfOrganization.subtype) ? ('/' + rdfOrganization.subtype) : "");
existingEntity.setOrgType(orgType);
existingEntity.setRegion(rdfOrganization.stateCode);
existingEntity.setSourceId(rdfOrganization.doi);
existingEntity.setSourceType(FUNDREF_SOURCE_TYPE);
existingEntity.setSourceUrl(rdfOrganization.doi);
existingEntity.setLastModified(new Date());
existingEntity.setIndexingStatus(IndexingStatus.PENDING);
existingEntity.setStatus(rdfOrganization.status);
orgDisambiguatedDao.merge(existingEntity);
updatedOrgs += 1;
} else if (idChanged(rdfOrganization, existingEntity)) {
// #3: If the ID changed, create an external identifier
createExternalIdentifier(existingEntity, rdfOrganization.doi);
addedExternalIdentifiers += 1;
} else if (statusChanged(rdfOrganization, existingEntity)) {
//If the status changed, update the status
existingEntity.setStatus(rdfOrganization.status);
existingEntity.setLastModified(new Date());
existingEntity.setIndexingStatus(IndexingStatus.PENDING);
orgDisambiguatedDao.merge(existingEntity);
}
} else {
// #4: Else, create the new org
createDisambiguatedOrg(rdfOrganization);
addedDisambiguatedOrgs += 1;
}
}
long end = System.currentTimeMillis();
LOGGER.info("Time taken to process the files: {}", (end - start));
} catch (FileNotFoundException fne) {
LOGGER.error("Unable to read file {}", fileToLoad);
} catch (ParserConfigurationException pce) {
LOGGER.error("Unable to initialize the DocumentBuilder");
} catch (IOException ioe) {
LOGGER.error("Unable to parse document {}", fileToLoad);
} catch (SAXException se) {
LOGGER.error("Unable to parse document {}", fileToLoad);
} catch (XPathExpressionException xpe) {
LOGGER.error("XPathExpressionException {}", xpe.getMessage());
} finally {
LOGGER.info("Number new Disambiguated Orgs={}, Updated Orgs={}, new External Identifiers={}", new Object[] { addedDisambiguatedOrgs, updatedOrgs, addedExternalIdentifiers, getTotal() });
}
}
use of org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity in project ORCID-Source by ORCID.
the class LoadRinggoldData method processDeletedIdsLine.
private void processDeletedIdsLine(String[] line) {
String deletedSourceId = line[0];
String replacementSourceId = line[1];
OrgDisambiguatedEntity deletedEntity = orgDisambiguatedDao.findBySourceIdAndSourceType(deletedSourceId, RINGGOLD_SOURCE_TYPE);
if (deletedEntity != null) {
LOGGER.info("Deleted ID exists in DB, id={}", deletedSourceId);
Long deletedEntityId = deletedEntity.getId();
OrgDisambiguatedEntity replacementEntity = orgDisambiguatedDao.findBySourceIdAndSourceType(replacementSourceId, RINGGOLD_SOURCE_TYPE);
if (replacementEntity == null) {
LOGGER.warn("Replacement does not exist, id={}", replacementEntity);
numDeletionsSkipped++;
} else {
Long replacementEntityId = replacementEntity.getId();
orgDisambiguatedSolrDao.remove(deletedEntityId);
orgDisambiguatedDao.replace(deletedEntityId, replacementEntityId);
orgDisambiguatedDao.remove(deletedEntityId);
numDeleted++;
}
}
}
use of org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity in project ORCID-Source by ORCID.
the class LoadRinggoldData method checkForDuplicates.
private void checkForDuplicates() {
LOGGER.info("Checking for duplicates");
List<OrgDisambiguatedEntity> duplicates = orgDisambiguatedDao.findDuplicates();
for (OrgDisambiguatedEntity duplicate : duplicates) {
LOGGER.info("Found duplicate: {}\t{}\t{}\t{}\t{}\t{}\t{}", new Object[] { duplicate.getSourceType(), duplicate.getSourceId(), duplicate.getName(), duplicate.getCity(), duplicate.getRegion(), duplicate.getCountry(), duplicate.getOrgType() });
}
LOGGER.info("Finished checking for duplicates");
}
use of org.orcid.persistence.jpa.entities.OrgDisambiguatedEntity in project ORCID-Source by ORCID.
the class OrgDisambiguatedManagerImpl method processOrgsForIndexing.
@Override
public synchronized void processOrgsForIndexing() {
LOGGER.info("About to process disambiguated orgs for indexing");
List<OrgDisambiguatedEntity> entities = null;
do {
entities = orgDisambiguatedDaoReadOnly.findOrgsByIndexingStatus(IndexingStatus.PENDING, 0, INDEXING_CHUNK_SIZE);
LOGGER.info("Found chunk of {} disambiguated orgs for indexing", entities.size());
for (OrgDisambiguatedEntity entity : entities) {
processDisambiguatedOrgInTransaction(entity);
}
} while (!entities.isEmpty());
}
Aggregations