Search in sources :

Example 1 with StudyNode

use of org.eol.globi.domain.StudyNode in project eol-globi-data by jhpoelen.

the class LinkerDOI method link.

@Override
public void link() {
    Index<Node> taxons = this.graphDb.index().forNodes("studies");
    IndexHits<Node> hits = taxons.query("*:*");
    int counter = 0;
    int counterResolved = 0;
    String msg = "linking study citations to DOIs";
    LOG.info(msg + " started...");
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    Map<String, StudyNode> batch = new HashMap<>();
    for (Node hit : hits) {
        counter++;
        StudyNode study = new StudyNode(hit);
        if (shouldResolve(study)) {
            counterResolved++;
            batch.put(study.getCitation(), study);
        }
        if (batch.size() >= BATCH_SIZE) {
            LOG.info(logProgress(counterResolved, stopWatch));
            resolveBatch(doiResolver, batch);
            batch.clear();
        }
    }
    resolveBatch(doiResolver, batch);
    LOG.info(msg + " complete. Out of [" + counter + "] references, [" + counterResolved + "] needed resolving.");
    if (counter % 100 != 0) {
        LOG.info(logProgress(counterResolved, stopWatch));
    }
    stopWatch.stop();
}
Also used : HashMap(java.util.HashMap) StudyNode(org.eol.globi.domain.StudyNode) Node(org.neo4j.graphdb.Node) StopWatch(org.apache.commons.lang3.time.StopWatch) StudyNode(org.eol.globi.domain.StudyNode)

Example 2 with StudyNode

use of org.eol.globi.domain.StudyNode in project eol-globi-data by jhpoelen.

the class ExporterAggregateUtil method collectDistinctInteractions.

public static void collectDistinctInteractions(StudyNode aStudy, Map<Fun.Tuple3<Long, String, String>, List<String>> studyOccAggregate) {
    final Iterable<Relationship> specimens = NodeUtil.getSpecimens(aStudy);
    for (Relationship specimen : specimens) {
        final Iterable<Relationship> interactions = specimen.getEndNode().getRelationships(Direction.OUTGOING, NodeUtil.asNeo4j());
        for (Relationship interaction : interactions) {
            if (!interaction.hasProperty(PropertyAndValueDictionary.INVERTED)) {
                final Node targetSpecimen = interaction.getEndNode();
                final Node sourceSpecimen = interaction.getStartNode();
                final String sourceTaxonExternalId = getExternalIdForTaxonOf(sourceSpecimen);
                final String targetTaxonExternalId = getExternalIdForTaxonOf(targetSpecimen);
                if (sourceTaxonExternalId != null && targetTaxonExternalId != null) {
                    final Fun.Tuple3<Long, String, String> key = new Fun.Tuple3<Long, String, String>(aStudy.getNodeID(), sourceTaxonExternalId, interaction.getType().name());
                    List<String> targetTaxonExternalIds = studyOccAggregate.get(key);
                    if (targetTaxonExternalIds == null) {
                        targetTaxonExternalIds = new ArrayList<String>();
                    }
                    if (!targetTaxonExternalIds.contains(targetTaxonExternalId)) {
                        targetTaxonExternalIds.add(targetTaxonExternalId);
                    }
                    studyOccAggregate.put(key, targetTaxonExternalIds);
                }
            }
        }
    }
}
Also used : Relationship(org.neo4j.graphdb.Relationship) StudyNode(org.eol.globi.domain.StudyNode) Node(org.neo4j.graphdb.Node) TaxonNode(org.eol.globi.domain.TaxonNode) Fun(org.mapdb.Fun)

Example 3 with StudyNode

use of org.eol.globi.domain.StudyNode in project eol-globi-data by jhpoelen.

the class IndexInteractionsTest method indexInteractions.

@Test
public void indexInteractions() throws NodeFactoryException {
    TaxonIndex taxonIndex = getOrCreateTaxonIndex();
    // see https://github.com/jhpoelen/eol-globi-data/wiki/Nanopubs
    StudyImpl study = new StudyImpl("some study", "some source", "http://doi.org/123.23/222", "some study citation");
    NodeFactoryWithDatasetContext factory = new NodeFactoryWithDatasetContext(nodeFactory, new DatasetImpl("some/namespace", URI.create("https://some.uri")));
    Study interaction = factory.getOrCreateStudy(study);
    TaxonImpl donaldTaxon = new TaxonImpl("donald duck", "NCBI:1234");
    Specimen donald = factory.createSpecimen(interaction, donaldTaxon);
    donald.classifyAs(taxonIndex.getOrCreateTaxon(donaldTaxon));
    TaxonImpl mickeyTaxon = new TaxonImpl("mickey mouse", "NCBI:4444");
    Taxon mickeyTaxonNCBI = taxonIndex.getOrCreateTaxon(new TaxonImpl("mickey mouse", "EOL:567"));
    NodeUtil.connectTaxa(mickeyTaxon, (TaxonNode) mickeyTaxonNCBI, getGraphDb(), RelTypes.SAME_AS);
    Specimen mickey = factory.createSpecimen(interaction, mickeyTaxon);
    mickey.classifyAs(taxonIndex.getOrCreateTaxon(mickeyTaxon));
    donald.ate(mickey);
    new IndexInteractions(getGraphDb()).link();
    NodeFactoryNeo4j nodeFactoryNeo4j = new NodeFactoryNeo4j(getGraphDb());
    StudyImpl study1 = new StudyImpl("some study", "some source", null, "come citation");
    study1.setOriginatingDataset(new DatasetImpl("some/namespace", URI.create("some:uri")));
    StudyNode someStudy = nodeFactoryNeo4j.getOrCreateStudy(study1);
    assertThat(interaction.getOriginatingDataset().getNamespace(), is(someStudy.getOriginatingDataset().getNamespace()));
    assertThat(interaction.getTitle(), is(someStudy.getTitle()));
    Iterable<Relationship> specimens = NodeUtil.getSpecimens(someStudy);
    RelationshipType hasParticipant = NodeUtil.asNeo4j(RelTypes.HAS_PARTICIPANT);
    Set<Long> ids = new HashSet<>();
    List<Long> idList = new ArrayList<>();
    for (Relationship specimen : specimens) {
        assertThat(specimen.getEndNode().hasRelationship(Direction.INCOMING, hasParticipant), Is.is(true));
        Relationship singleRelationship = specimen.getEndNode().getSingleRelationship(hasParticipant, Direction.INCOMING);
        long id = singleRelationship.getStartNode().getId();
        ids.add(id);
        idList.add(id);
    }
    assertThat(ids.size(), Is.is(1));
    assertThat(idList.size(), Is.is(2));
    Node interactionNode = getGraphDb().getNodeById(idList.get(0));
    assertTrue(interactionNode.hasRelationship(Direction.OUTGOING, NodeUtil.asNeo4j(RelTypes.DERIVED_FROM)));
    assertTrue(interactionNode.hasRelationship(Direction.OUTGOING, NodeUtil.asNeo4j(RelTypes.ACCESSED_AT)));
}
Also used : Study(org.eol.globi.domain.Study) TaxonImpl(org.eol.globi.domain.TaxonImpl) Taxon(org.eol.globi.domain.Taxon) Node(org.neo4j.graphdb.Node) StudyNode(org.eol.globi.domain.StudyNode) TaxonNode(org.eol.globi.domain.TaxonNode) StudyImpl(org.eol.globi.domain.StudyImpl) RelationshipType(org.neo4j.graphdb.RelationshipType) ArrayList(java.util.ArrayList) DatasetImpl(org.eol.globi.service.DatasetImpl) NodeFactoryNeo4j(org.eol.globi.data.NodeFactoryNeo4j) NodeFactoryWithDatasetContext(org.eol.globi.data.NodeFactoryWithDatasetContext) StudyNode(org.eol.globi.domain.StudyNode) Specimen(org.eol.globi.domain.Specimen) Relationship(org.neo4j.graphdb.Relationship) TaxonIndex(org.eol.globi.data.TaxonIndex) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with StudyNode

use of org.eol.globi.domain.StudyNode in project eol-globi-data by jhpoelen.

the class LinkerDOITest method doLink.

@Test
public void doLink() throws NodeFactoryException, PropertyEnricherException {
    StudyNode study = getNodeFactory().getOrCreateStudy(new StudyImpl("title", "some source", null, "some citation"));
    new LinkerDOI(getGraphDb()).link();
    Study studyResolved = nodeFactory.getOrCreateStudy(study);
    assertThat(studyResolved.getDOI(), is(nullValue()));
    assertThat(study.getDOI(), is(nullValue()));
}
Also used : Study(org.eol.globi.domain.Study) StudyImpl(org.eol.globi.domain.StudyImpl) StudyNode(org.eol.globi.domain.StudyNode) Test(org.junit.Test)

Example 5 with StudyNode

use of org.eol.globi.domain.StudyNode in project eol-globi-data by jhpoelen.

the class LinkerDOITest method addDOIToStudy.

@Test
public void addDOIToStudy() throws NodeFactoryException {
    DOIResolver doiResolver = new DOIResolver() {

        @Override
        public Map<String, String> resolveDoiFor(Collection<String> references) throws IOException {
            Map<String, String> doiMap = new HashMap<>();
            for (String reference : references) {
                doiMap.put(reference, resolveDoiFor(reference));
            }
            return doiMap;
        }

        @Override
        public String resolveDoiFor(String reference) throws IOException {
            return "doi:1234";
        }
    };
    StudyNode study = getNodeFactory().getOrCreateStudy(new StudyImpl("my title", "some source", null, ExternalIdUtil.toCitation("my contr", "some description", null)));
    new LinkerDOI(getGraphDb()).linkStudy(doiResolver, study);
    assertThat(study.getDOI(), is("doi:1234"));
    assertThat(study.getExternalId(), is("http://dx.doi.org/1234"));
    assertThat(study.getCitation(), is("my contr. some description"));
    study = getNodeFactory().getOrCreateStudy(new StudyImpl("my other title", "some source", null, ExternalIdUtil.toCitation("my contr", "some description", null)));
    new LinkerDOI(getGraphDb()).linkStudy(new DOIResolverThatExplodes(), study);
    assertThat(study.getDOI(), nullValue());
    assertThat(study.getExternalId(), nullValue());
    assertThat(study.getCitation(), is("my contr. some description"));
}
Also used : HashMap(java.util.HashMap) StudyImpl(org.eol.globi.domain.StudyImpl) Collection(java.util.Collection) DOIResolver(org.eol.globi.service.DOIResolver) StudyNode(org.eol.globi.domain.StudyNode) Test(org.junit.Test)

Aggregations

StudyNode (org.eol.globi.domain.StudyNode)15 Node (org.neo4j.graphdb.Node)8 StudyImpl (org.eol.globi.domain.StudyImpl)7 TaxonNode (org.eol.globi.domain.TaxonNode)6 Test (org.junit.Test)6 Relationship (org.neo4j.graphdb.Relationship)4 HashSet (java.util.HashSet)3 StudyNodeListener (org.eol.globi.util.StudyNodeListener)3 Transaction (org.neo4j.graphdb.Transaction)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Study (org.eol.globi.domain.Study)2 DatasetImpl (org.eol.globi.service.DatasetImpl)2 Collection (java.util.Collection)1 List (java.util.List)1 Map (java.util.Map)1 StopWatch (org.apache.commons.lang3.time.StopWatch)1 ObjectMapper (org.codehaus.jackson.map.ObjectMapper)1 ObjectNode (org.codehaus.jackson.node.ObjectNode)1 NodeFactoryNeo4j (org.eol.globi.data.NodeFactoryNeo4j)1