use of org.eol.globi.domain.TaxonNode in project eol-globi-data by jhpoelen.
the class LinkerTermMatcher method link.
@Override
public void link() {
Index<Node> taxons = graphDb.index().forNodes("taxons");
IndexHits<Node> hits = taxons.query("*:*");
final Map<Long, TaxonNode> nodeMap = new HashMap<Long, TaxonNode>();
int counter = 1;
for (Node hit : hits) {
if (counter % BATCH_SIZE == 0) {
handleBatch(graphDb, termMatcher, nodeMap, counter);
}
TaxonNode node = new TaxonNode(hit);
nodeMap.put(node.getNodeID(), node);
counter++;
}
handleBatch(graphDb, termMatcher, nodeMap, counter);
}
use of org.eol.globi.domain.TaxonNode in project eol-globi-data by jhpoelen.
the class LinkerTermMatcher method handleBatch.
private void handleBatch(final GraphDatabaseService graphDb, TermMatcher termMatcher, final Map<Long, TaxonNode> nodeMap, int counter) {
StopWatch stopWatch = new StopWatch();
stopWatch.start();
String msgPrefix = "batch #" + counter / BATCH_SIZE;
LOG.info(msgPrefix + " preparing...");
List<String> nodeIdAndNames = new ArrayList<String>();
for (Map.Entry<Long, TaxonNode> entry : nodeMap.entrySet()) {
String name = entry.getKey() + "|" + entry.getValue().getName();
nodeIdAndNames.add(name);
}
try {
if (nodeIdAndNames.size() > 0) {
termMatcher.findTermsForNames(nodeIdAndNames, new TermMatchListener() {
@Override
public void foundTaxonForName(Long nodeId, String name, Taxon taxon, NameType relType) {
TaxonNode taxonNode = nodeMap.get(nodeId);
if (taxonNode != null && NameType.NONE != relType && !TaxonUtil.likelyHomonym(taxon, taxonNode)) {
NodeUtil.connectTaxa(taxon, taxonNode, graphDb, RelTypes.forType(relType));
}
}
});
}
} catch (PropertyEnricherException ex) {
LOG.error(msgPrefix + " problem matching terms", ex);
}
stopWatch.stop();
LOG.info(msgPrefix + " completed in [" + stopWatch.getTime() + "] ms (" + (1.0 * stopWatch.getTime() / BATCH_SIZE) + " ms/name )");
nodeMap.clear();
}
use of org.eol.globi.domain.TaxonNode in project eol-globi-data by jhpoelen.
the class LinkerTrustyNanoPubs method generateOrganisms.
public static void generateOrganisms(StringBuilder builder, InteractionNode interaction) {
Collection<Specimen> participants = interaction.getParticipants();
Map<Long, Integer> nodeIdParticipantMap = new TreeMap<>();
int participantNumber = 0;
for (Specimen participant : participants) {
builder.append(String.format("\n obo:RO_0000057 :Organism_%d ", participantNumber));
builder.append(participants.size() - 1 == participantNumber ? "." : ";");
nodeIdParticipantMap.put(((NodeBacked) participant).getNodeID(), participantNumber);
participantNumber++;
}
participantNumber = 0;
for (Specimen participant : participants) {
Iterable<Relationship> classification = NodeUtil.getClassifications(participant);
if (classification != null && classification.iterator().hasNext()) {
TaxonNode taxonNode = new TaxonNode(classification.iterator().next().getEndNode());
String ncbiTaxonId = resolveNCBITaxonId(taxonNode);
if (StringUtils.isNotBlank(ncbiTaxonId)) {
builder.append(String.format("\n :Organism_%d a NCBITaxon:%s ", participantNumber, ncbiTaxonId));
Iterable<Relationship> interactRel = ((NodeBacked) participant).getUnderlyingNode().getRelationships(Direction.OUTGOING, NodeUtil.asNeo4j(InteractType.values()));
for (Relationship relationship : interactRel) {
if (!relationship.hasProperty(PropertyAndValueDictionary.INVERTED)) {
if (relationship.hasProperty(PropertyAndValueDictionary.IRI)) {
String interactIRI = relationship.getProperty(PropertyAndValueDictionary.IRI).toString();
if (StringUtils.isNotBlank(interactIRI)) {
builder.append(";\n");
builder.append(String.format(" <%s> :Organism_%d ", interactIRI, nodeIdParticipantMap.get(relationship.getEndNode().getId())));
}
}
}
}
builder.append(".");
participantNumber++;
}
}
}
}
use of org.eol.globi.domain.TaxonNode in project eol-globi-data by jhpoelen.
the class NonResolvingTaxonIndex method findTaxonByKey.
private TaxonNode findTaxonByKey(String key, String value) {
TaxonNode firstMatchingTaxon = null;
if (StringUtils.isNotBlank(value)) {
String query = key + ":\"" + QueryParser.escape(value) + "\"";
IndexHits<Node> matchingTaxa = taxons.query(query);
Node matchingTaxon;
if (matchingTaxa.hasNext()) {
matchingTaxon = matchingTaxa.next();
if (matchingTaxon != null) {
firstMatchingTaxon = new TaxonNode(matchingTaxon);
}
}
matchingTaxa.close();
}
return firstMatchingTaxon;
}
use of org.eol.globi.domain.TaxonNode in project eol-globi-data by jhpoelen.
the class LinkerTaxonIndex method link.
public void link() {
Index<Node> taxons = graphDb.index().forNodes("taxons");
Index<Node> ids = graphDb.index().forNodes(INDEX_TAXON_NAMES_AND_IDS, MapUtil.stringMap(IndexManager.PROVIDER, "lucene", "type", "fulltext"));
TaxonFuzzySearchIndex fuzzySearchIndex = new TaxonFuzzySearchIndex(graphDb);
IndexHits<Node> hits = taxons.query("*:*");
for (Node hit : hits) {
List<String> taxonIds = new ArrayList<>();
List<String> taxonPathIdsAndNames = new ArrayList<>();
TaxonNode taxonNode = new TaxonNode(hit);
addTaxonId(taxonIds, taxonNode);
addPathIdAndNames(taxonPathIdsAndNames, taxonNode);
addToFuzzyIndex(graphDb, fuzzySearchIndex, hit, taxonNode);
Iterable<Relationship> rels = hit.getRelationships(Direction.OUTGOING, NodeUtil.asNeo4j(RelTypes.SAME_AS));
for (Relationship rel : rels) {
TaxonNode sameAsTaxon = new TaxonNode(rel.getEndNode());
addTaxonId(taxonIds, sameAsTaxon);
addPathIdAndNames(taxonPathIdsAndNames, sameAsTaxon);
addToFuzzyIndex(graphDb, fuzzySearchIndex, hit, sameAsTaxon);
}
Transaction tx = graphDb.beginTx();
try {
taxonPathIdsAndNames.addAll(taxonIds);
String aggregateIds = StringUtils.join(taxonPathIdsAndNames, CharsetConstant.SEPARATOR);
ids.add(hit, PropertyAndValueDictionary.PATH, aggregateIds);
hit.setProperty(PropertyAndValueDictionary.EXTERNAL_IDS, aggregateIds);
String aggregateTaxonIds = StringUtils.join(taxonIds, CharsetConstant.SEPARATOR);
hit.setProperty(PropertyAndValueDictionary.NAME_IDS, aggregateTaxonIds);
tx.success();
} finally {
tx.finish();
}
}
hits.close();
}
Aggregations