Search in sources :

Example 1 with TaxonNode

use of org.eol.globi.domain.TaxonNode in project eol-globi-data by jhpoelen.

the class LinkerTermMatcher method link.

@Override
public void link() {
    Index<Node> taxons = graphDb.index().forNodes("taxons");
    IndexHits<Node> hits = taxons.query("*:*");
    final Map<Long, TaxonNode> nodeMap = new HashMap<Long, TaxonNode>();
    int counter = 1;
    for (Node hit : hits) {
        if (counter % BATCH_SIZE == 0) {
            handleBatch(graphDb, termMatcher, nodeMap, counter);
        }
        TaxonNode node = new TaxonNode(hit);
        nodeMap.put(node.getNodeID(), node);
        counter++;
    }
    handleBatch(graphDb, termMatcher, nodeMap, counter);
}
Also used : TaxonNode(org.eol.globi.domain.TaxonNode) HashMap(java.util.HashMap) Node(org.neo4j.graphdb.Node) TaxonNode(org.eol.globi.domain.TaxonNode)

Example 2 with TaxonNode

use of org.eol.globi.domain.TaxonNode in project eol-globi-data by jhpoelen.

the class LinkerTermMatcher method handleBatch.

private void handleBatch(final GraphDatabaseService graphDb, TermMatcher termMatcher, final Map<Long, TaxonNode> nodeMap, int counter) {
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    String msgPrefix = "batch #" + counter / BATCH_SIZE;
    LOG.info(msgPrefix + " preparing...");
    List<String> nodeIdAndNames = new ArrayList<String>();
    for (Map.Entry<Long, TaxonNode> entry : nodeMap.entrySet()) {
        String name = entry.getKey() + "|" + entry.getValue().getName();
        nodeIdAndNames.add(name);
    }
    try {
        if (nodeIdAndNames.size() > 0) {
            termMatcher.findTermsForNames(nodeIdAndNames, new TermMatchListener() {

                @Override
                public void foundTaxonForName(Long nodeId, String name, Taxon taxon, NameType relType) {
                    TaxonNode taxonNode = nodeMap.get(nodeId);
                    if (taxonNode != null && NameType.NONE != relType && !TaxonUtil.likelyHomonym(taxon, taxonNode)) {
                        NodeUtil.connectTaxa(taxon, taxonNode, graphDb, RelTypes.forType(relType));
                    }
                }
            });
        }
    } catch (PropertyEnricherException ex) {
        LOG.error(msgPrefix + " problem matching terms", ex);
    }
    stopWatch.stop();
    LOG.info(msgPrefix + " completed in [" + stopWatch.getTime() + "] ms (" + (1.0 * stopWatch.getTime() / BATCH_SIZE) + " ms/name )");
    nodeMap.clear();
}
Also used : PropertyEnricherException(org.eol.globi.service.PropertyEnricherException) TaxonNode(org.eol.globi.domain.TaxonNode) Taxon(org.eol.globi.domain.Taxon) ArrayList(java.util.ArrayList) NameType(org.eol.globi.domain.NameType) StopWatch(org.apache.commons.lang.time.StopWatch) HashMap(java.util.HashMap) Map(java.util.Map) TermMatchListener(org.eol.globi.taxon.TermMatchListener)

Example 3 with TaxonNode

use of org.eol.globi.domain.TaxonNode in project eol-globi-data by jhpoelen.

the class LinkerTrustyNanoPubs method generateOrganisms.

public static void generateOrganisms(StringBuilder builder, InteractionNode interaction) {
    Collection<Specimen> participants = interaction.getParticipants();
    Map<Long, Integer> nodeIdParticipantMap = new TreeMap<>();
    int participantNumber = 0;
    for (Specimen participant : participants) {
        builder.append(String.format("\n    obo:RO_0000057 :Organism_%d ", participantNumber));
        builder.append(participants.size() - 1 == participantNumber ? "." : ";");
        nodeIdParticipantMap.put(((NodeBacked) participant).getNodeID(), participantNumber);
        participantNumber++;
    }
    participantNumber = 0;
    for (Specimen participant : participants) {
        Iterable<Relationship> classification = NodeUtil.getClassifications(participant);
        if (classification != null && classification.iterator().hasNext()) {
            TaxonNode taxonNode = new TaxonNode(classification.iterator().next().getEndNode());
            String ncbiTaxonId = resolveNCBITaxonId(taxonNode);
            if (StringUtils.isNotBlank(ncbiTaxonId)) {
                builder.append(String.format("\n  :Organism_%d a NCBITaxon:%s ", participantNumber, ncbiTaxonId));
                Iterable<Relationship> interactRel = ((NodeBacked) participant).getUnderlyingNode().getRelationships(Direction.OUTGOING, NodeUtil.asNeo4j(InteractType.values()));
                for (Relationship relationship : interactRel) {
                    if (!relationship.hasProperty(PropertyAndValueDictionary.INVERTED)) {
                        if (relationship.hasProperty(PropertyAndValueDictionary.IRI)) {
                            String interactIRI = relationship.getProperty(PropertyAndValueDictionary.IRI).toString();
                            if (StringUtils.isNotBlank(interactIRI)) {
                                builder.append(";\n");
                                builder.append(String.format("    <%s> :Organism_%d ", interactIRI, nodeIdParticipantMap.get(relationship.getEndNode().getId())));
                            }
                        }
                    }
                }
                builder.append(".");
                participantNumber++;
            }
        }
    }
}
Also used : Specimen(org.eol.globi.domain.Specimen) TaxonNode(org.eol.globi.domain.TaxonNode) Relationship(org.neo4j.graphdb.Relationship) TreeMap(java.util.TreeMap)

Example 4 with TaxonNode

use of org.eol.globi.domain.TaxonNode in project eol-globi-data by jhpoelen.

the class NonResolvingTaxonIndex method findTaxonByKey.

private TaxonNode findTaxonByKey(String key, String value) {
    TaxonNode firstMatchingTaxon = null;
    if (StringUtils.isNotBlank(value)) {
        String query = key + ":\"" + QueryParser.escape(value) + "\"";
        IndexHits<Node> matchingTaxa = taxons.query(query);
        Node matchingTaxon;
        if (matchingTaxa.hasNext()) {
            matchingTaxon = matchingTaxa.next();
            if (matchingTaxon != null) {
                firstMatchingTaxon = new TaxonNode(matchingTaxon);
            }
        }
        matchingTaxa.close();
    }
    return firstMatchingTaxon;
}
Also used : TaxonNode(org.eol.globi.domain.TaxonNode) Node(org.neo4j.graphdb.Node) TaxonNode(org.eol.globi.domain.TaxonNode)

Example 5 with TaxonNode

use of org.eol.globi.domain.TaxonNode in project eol-globi-data by jhpoelen.

the class LinkerTaxonIndex method link.

public void link() {
    Index<Node> taxons = graphDb.index().forNodes("taxons");
    Index<Node> ids = graphDb.index().forNodes(INDEX_TAXON_NAMES_AND_IDS, MapUtil.stringMap(IndexManager.PROVIDER, "lucene", "type", "fulltext"));
    TaxonFuzzySearchIndex fuzzySearchIndex = new TaxonFuzzySearchIndex(graphDb);
    IndexHits<Node> hits = taxons.query("*:*");
    for (Node hit : hits) {
        List<String> taxonIds = new ArrayList<>();
        List<String> taxonPathIdsAndNames = new ArrayList<>();
        TaxonNode taxonNode = new TaxonNode(hit);
        addTaxonId(taxonIds, taxonNode);
        addPathIdAndNames(taxonPathIdsAndNames, taxonNode);
        addToFuzzyIndex(graphDb, fuzzySearchIndex, hit, taxonNode);
        Iterable<Relationship> rels = hit.getRelationships(Direction.OUTGOING, NodeUtil.asNeo4j(RelTypes.SAME_AS));
        for (Relationship rel : rels) {
            TaxonNode sameAsTaxon = new TaxonNode(rel.getEndNode());
            addTaxonId(taxonIds, sameAsTaxon);
            addPathIdAndNames(taxonPathIdsAndNames, sameAsTaxon);
            addToFuzzyIndex(graphDb, fuzzySearchIndex, hit, sameAsTaxon);
        }
        Transaction tx = graphDb.beginTx();
        try {
            taxonPathIdsAndNames.addAll(taxonIds);
            String aggregateIds = StringUtils.join(taxonPathIdsAndNames, CharsetConstant.SEPARATOR);
            ids.add(hit, PropertyAndValueDictionary.PATH, aggregateIds);
            hit.setProperty(PropertyAndValueDictionary.EXTERNAL_IDS, aggregateIds);
            String aggregateTaxonIds = StringUtils.join(taxonIds, CharsetConstant.SEPARATOR);
            hit.setProperty(PropertyAndValueDictionary.NAME_IDS, aggregateTaxonIds);
            tx.success();
        } finally {
            tx.finish();
        }
    }
    hits.close();
}
Also used : TaxonNode(org.eol.globi.domain.TaxonNode) Transaction(org.neo4j.graphdb.Transaction) Node(org.neo4j.graphdb.Node) TaxonNode(org.eol.globi.domain.TaxonNode) Relationship(org.neo4j.graphdb.Relationship) ArrayList(java.util.ArrayList) TaxonFuzzySearchIndex(org.eol.globi.taxon.TaxonFuzzySearchIndex)

Aggregations

TaxonNode (org.eol.globi.domain.TaxonNode)31 Taxon (org.eol.globi.domain.Taxon)13 Test (org.junit.Test)13 Node (org.neo4j.graphdb.Node)12 Relationship (org.neo4j.graphdb.Relationship)12 TaxonImpl (org.eol.globi.domain.TaxonImpl)11 Study (org.eol.globi.domain.Study)6 ArrayList (java.util.ArrayList)5 HashMap (java.util.HashMap)5 Specimen (org.eol.globi.domain.Specimen)5 SpecimenNode (org.eol.globi.domain.SpecimenNode)5 Transaction (org.neo4j.graphdb.Transaction)4 HashSet (java.util.HashSet)3 Map (java.util.Map)3 TreeMap (java.util.TreeMap)3 PropertyEnricherException (org.eol.globi.service.PropertyEnricherException)3 StudyImpl (org.eol.globi.domain.StudyImpl)2 StudyNode (org.eol.globi.domain.StudyNode)2 PropertyEnricher (org.eol.globi.service.PropertyEnricher)2 TaxonFuzzySearchIndex (org.eol.globi.taxon.TaxonFuzzySearchIndex)2