Search in sources :

Example 1 with NonResolvingTaxonIndex

use of org.eol.globi.taxon.NonResolvingTaxonIndex in project eol-globi-data by jhpoelen.

the class ExporterSiteMapForNamesTest method writeSiteMapWithNames.

@Test
public void writeSiteMapWithNames() throws StudyImporterException, IOException {
    taxonIndex = new NonResolvingTaxonIndex(getGraphDb());
    Study study = nodeFactory.getOrCreateStudy(new StudyImpl("title", "source", null, "citation 123"));
    TaxonImpl homoSapiens = new TaxonImpl("Homo sapiens", "homoSapiensId");
    homoSapiens.setPath("one two three");
    final Specimen human = nodeFactory.createSpecimen(study, homoSapiens);
    TaxonImpl dogTaxon = new TaxonImpl("Canis familiaris", null);
    final Specimen dog = nodeFactory.createSpecimen(study, dogTaxon);
    human.ate(dog);
    resolveNames();
    final File baseDirNames = createBaseDir("target/sitemap/names");
    final GraphExporter siteMapForNames = new ExporterSiteMapForNames();
    siteMapForNames.export(getGraphDb(), baseDirNames.getAbsolutePath());
    assertSiteMap(baseDirNames, "http://www.globalbioticinteractions.org/?interactionType=interactsWith&sourceTaxon=Homo%20sapiens", "https://depot.globalbioticinteractions.org/snapshot/target/data/sitemap/names/sitemap.xml.gz");
}
Also used : Study(org.eol.globi.domain.Study) Specimen(org.eol.globi.domain.Specimen) NonResolvingTaxonIndex(org.eol.globi.taxon.NonResolvingTaxonIndex) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) File(java.io.File) Test(org.junit.Test)

Example 2 with NonResolvingTaxonIndex

use of org.eol.globi.taxon.NonResolvingTaxonIndex in project eol-globi-data by jhpoelen.

the class LinkerTaxonIndexTest method findByStringWithWhitespaces.

@Test
public void findByStringWithWhitespaces() throws NodeFactoryException {
    NonResolvingTaxonIndex taxonService = new NonResolvingTaxonIndex(getGraphDb());
    taxonService.getOrCreateTaxon(setTaxonProps(new TaxonImpl("Homo sapiens")));
    resolveNames();
    new LinkerTaxonIndex(getGraphDb()).link();
    assertThat(getGraphDb().index().existsForNodes("taxonNameSuggestions"), is(true));
    Index<Node> index = getGraphDb().index().forNodes("taxonNameSuggestions");
    Query query = new TermQuery(new Term("name", "name"));
    IndexHits<Node> hits = index.query(query);
    assertThat(hits.size(), is(1));
    hits = index.query("name", "s nme~");
    assertThat(hits.size(), is(1));
    hits = index.query("name", "geRman~");
    assertThat(hits.size(), is(1));
    hits = index.query("name:geRman~ AND name:som~");
    assertThat(hits.size(), is(1));
    hits = index.query("name:hmo~ AND name:SApiens~");
    assertThat(hits.size(), is(1));
    hits = index.query("name:hmo~ AND name:sapiens~");
    assertThat(hits.size(), is(1));
    // queries are case sensitive . . . should all be lower cased.
    hits = index.query("name:HMO~ AND name:saPIENS~");
    assertThat(hits.size(), is(0));
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) NonResolvingTaxonIndex(org.eol.globi.taxon.NonResolvingTaxonIndex) TaxonImpl(org.eol.globi.domain.TaxonImpl) Node(org.neo4j.graphdb.Node) TaxonNode(org.eol.globi.domain.TaxonNode) Term(org.apache.lucene.index.Term) ResolvingTaxonIndexTest(org.eol.globi.taxon.ResolvingTaxonIndexTest) Test(org.junit.Test)

Example 3 with NonResolvingTaxonIndex

use of org.eol.globi.taxon.NonResolvingTaxonIndex in project eol-globi-data by jhpoelen.

the class NameResolverTest method iNaturalistTaxon.

@Test
public void iNaturalistTaxon() throws NodeFactoryException {
    Specimen someOtherOrganism = nodeFactory.createSpecimen(nodeFactory.createStudy(new StudyImpl("bla", null, null, null)), new TaxonImpl("Blaus bla", "INAT_TAXON:58831"));
    Specimen someOtherOrganism2 = nodeFactory.createSpecimen(nodeFactory.createStudy(new StudyImpl("bla", null, null, null)), new TaxonImpl("Redus rha", "INAT_TAXON:126777"));
    someOtherOrganism.ate(someOtherOrganism2);
    final NameResolver nameResolver = new NameResolver(getGraphDb(), new NonResolvingTaxonIndex(getGraphDb()));
    nameResolver.setBatchSize(1L);
    nameResolver.resolve();
    Taxon resolvedTaxon = taxonIndex.findTaxonById("INAT_TAXON:58831");
    assertThat(resolvedTaxon, is(notNullValue()));
    assertThat(resolvedTaxon.getExternalId(), is("INAT_TAXON:58831"));
    assertThat(resolvedTaxon.getName(), is("Blaus bla"));
    Taxon resolvedTaxon2 = taxonIndex.findTaxonByName("Blaus bla");
    assertThat(resolvedTaxon2, is(notNullValue()));
    assertThat(resolvedTaxon2.getExternalId(), is("INAT_TAXON:58831"));
}
Also used : Specimen(org.eol.globi.domain.Specimen) NonResolvingTaxonIndex(org.eol.globi.taxon.NonResolvingTaxonIndex) TaxonImpl(org.eol.globi.domain.TaxonImpl) Taxon(org.eol.globi.domain.Taxon) StudyImpl(org.eol.globi.domain.StudyImpl) Test(org.junit.Test)

Example 4 with NonResolvingTaxonIndex

use of org.eol.globi.taxon.NonResolvingTaxonIndex in project eol-globi-data by jhpoelen.

the class Normalizer method resolveAndLinkTaxa.

private void resolveAndLinkTaxa(CommandLine cmdLine, GraphDatabaseService graphService) {
    if (cmdLine == null || !cmdLine.hasOption(OPTION_SKIP_RESOLVE_CITATIONS)) {
        LOG.info("resolving citations to DOIs ...");
        new LinkerDOI(graphService, new DOIResolverCache()).link();
        new LinkerDOI(graphService).link();
    } else {
        LOG.info("skipping citation resolving ...");
    }
    if (cmdLine == null || !cmdLine.hasOption(OPTION_SKIP_TAXON_CACHE)) {
        LOG.info("resolving names with taxon cache ...");
        final TaxonCacheService taxonCacheService = new TaxonCacheService("/taxa/taxonCache.tsv.gz", "/taxa/taxonMap.tsv.gz");
        try {
            ResolvingTaxonIndex index = new ResolvingTaxonIndex(taxonCacheService, graphService);
            index.setIndexResolvedTaxaOnly(true);
            TaxonFilter taxonCacheFilter = new TaxonFilter() {

                private KnownBadNameFilter knownBadNameFilter = new KnownBadNameFilter();

                @Override
                public boolean shouldInclude(Taxon taxon) {
                    return taxon != null && knownBadNameFilter.shouldInclude(taxon);
                }
            };
            new NameResolver(graphService, index, taxonCacheFilter).resolve();
            LOG.info("adding same and similar terms for resolved taxa...");
            List<Linker> linkers = new ArrayList<>();
            linkers.add(new LinkerTermMatcher(graphService, taxonCacheService));
            appendOpenTreeTaxonLinker(graphService, linkers);
            linkers.forEach(LinkUtil::doTimedLink);
            LOG.info("adding same and similar terms for resolved taxa done.");
        } finally {
            taxonCacheService.shutdown();
        }
        LOG.info("resolving names with taxon cache done.");
    } else {
        LOG.info("skipping taxon cache ...");
    }
    if (cmdLine == null || !cmdLine.hasOption(OPTION_SKIP_RESOLVE)) {
        new NameResolver(graphService, new NonResolvingTaxonIndex(graphService)).resolve();
        new TaxonInteractionIndexer(graphService).index();
    } else {
        LOG.info("skipping taxa resolving ...");
    }
    if (cmdLine == null || !cmdLine.hasOption(OPTION_SKIP_LINK)) {
        List<Linker> linkers = new ArrayList<>();
        linkers.add(new LinkerTaxonIndex(graphService));
        linkers.forEach(LinkUtil::doTimedLink);
    } else {
        LOG.info("skipping linking ...");
    }
    if (cmdLine == null || !cmdLine.hasOption(OPTION_SKIP_LINK_THUMBNAILS)) {
        LinkUtil.doTimedLink(new ImageLinker(graphService, null));
    } else {
        LOG.info("skipping linking of taxa to thumbnails ...");
    }
}
Also used : TaxonCacheService(org.eol.globi.taxon.TaxonCacheService) DOIResolverCache(org.eol.globi.service.DOIResolverCache) NonResolvingTaxonIndex(org.eol.globi.taxon.NonResolvingTaxonIndex) Taxon(org.eol.globi.domain.Taxon) ArrayList(java.util.ArrayList) NonResolvingTaxonIndex(org.eol.globi.taxon.NonResolvingTaxonIndex) ResolvingTaxonIndex(org.eol.globi.taxon.ResolvingTaxonIndex)

Example 5 with NonResolvingTaxonIndex

use of org.eol.globi.taxon.NonResolvingTaxonIndex in project eol-globi-data by jhpoelen.

the class TaxonInteractionIndexerTest method indexNoNameNoMatch.

@Test
public void indexNoNameNoMatch() throws NodeFactoryException {
    Specimen human = nodeFactory.createSpecimen(nodeFactory.createStudy(new StudyImpl("bla", null, null, null)), new TaxonImpl("Homo sapiens", PropertyAndValueDictionary.NO_MATCH));
    Specimen animal = nodeFactory.createSpecimen(nodeFactory.createStudy(new StudyImpl("bla", null, null, null)), new TaxonImpl("Canis lupus", PropertyAndValueDictionary.NO_MATCH));
    human.ate(animal);
    for (int i = 0; i < 10; i++) {
        Specimen fish = nodeFactory.createSpecimen(nodeFactory.createStudy(new StudyImpl("bla", null, null, null)), new TaxonImpl("Arius felis", null));
        human.ate(fish);
    }
    assertNull(taxonIndex.findTaxonById(PropertyAndValueDictionary.NO_MATCH));
    assertNull(taxonIndex.findTaxonByName("Homo sapiens"));
    new NameResolver(getGraphDb(), new NonResolvingTaxonIndex(getGraphDb())).resolve();
    assertNotNull(taxonIndex.findTaxonByName("Homo sapiens"));
    assertNull(taxonIndex.findTaxonById(PropertyAndValueDictionary.NO_MATCH));
    assertNull(taxonIndex.findTaxonByName(PropertyAndValueDictionary.NO_NAME));
}
Also used : Specimen(org.eol.globi.domain.Specimen) NonResolvingTaxonIndex(org.eol.globi.taxon.NonResolvingTaxonIndex) TaxonImpl(org.eol.globi.domain.TaxonImpl) StudyImpl(org.eol.globi.domain.StudyImpl) Test(org.junit.Test)

Aggregations

NonResolvingTaxonIndex (org.eol.globi.taxon.NonResolvingTaxonIndex)8 TaxonImpl (org.eol.globi.domain.TaxonImpl)7 Test (org.junit.Test)7 Specimen (org.eol.globi.domain.Specimen)5 StudyImpl (org.eol.globi.domain.StudyImpl)5 Taxon (org.eol.globi.domain.Taxon)5 ArrayList (java.util.ArrayList)2 File (java.io.File)1 Term (org.apache.lucene.index.Term)1 Query (org.apache.lucene.search.Query)1 TermQuery (org.apache.lucene.search.TermQuery)1 Study (org.eol.globi.domain.Study)1 TaxonNode (org.eol.globi.domain.TaxonNode)1 DOIResolverCache (org.eol.globi.service.DOIResolverCache)1 ResolvingTaxonIndex (org.eol.globi.taxon.ResolvingTaxonIndex)1 ResolvingTaxonIndexTest (org.eol.globi.taxon.ResolvingTaxonIndexTest)1 TaxonCacheService (org.eol.globi.taxon.TaxonCacheService)1 Node (org.neo4j.graphdb.Node)1 Relationship (org.neo4j.graphdb.Relationship)1