use of org.eol.globi.taxon.NonResolvingTaxonIndex in project eol-globi-data by jhpoelen.
the class ExporterSiteMapForNamesTest method writeSiteMapWithNames.
@Test
public void writeSiteMapWithNames() throws StudyImporterException, IOException {
taxonIndex = new NonResolvingTaxonIndex(getGraphDb());
Study study = nodeFactory.getOrCreateStudy(new StudyImpl("title", "source", null, "citation 123"));
TaxonImpl homoSapiens = new TaxonImpl("Homo sapiens", "homoSapiensId");
homoSapiens.setPath("one two three");
final Specimen human = nodeFactory.createSpecimen(study, homoSapiens);
TaxonImpl dogTaxon = new TaxonImpl("Canis familiaris", null);
final Specimen dog = nodeFactory.createSpecimen(study, dogTaxon);
human.ate(dog);
resolveNames();
final File baseDirNames = createBaseDir("target/sitemap/names");
final GraphExporter siteMapForNames = new ExporterSiteMapForNames();
siteMapForNames.export(getGraphDb(), baseDirNames.getAbsolutePath());
assertSiteMap(baseDirNames, "http://www.globalbioticinteractions.org/?interactionType=interactsWith&sourceTaxon=Homo%20sapiens", "https://depot.globalbioticinteractions.org/snapshot/target/data/sitemap/names/sitemap.xml.gz");
}
use of org.eol.globi.taxon.NonResolvingTaxonIndex in project eol-globi-data by jhpoelen.
the class LinkerTaxonIndexTest method findByStringWithWhitespaces.
@Test
public void findByStringWithWhitespaces() throws NodeFactoryException {
NonResolvingTaxonIndex taxonService = new NonResolvingTaxonIndex(getGraphDb());
taxonService.getOrCreateTaxon(setTaxonProps(new TaxonImpl("Homo sapiens")));
resolveNames();
new LinkerTaxonIndex(getGraphDb()).link();
assertThat(getGraphDb().index().existsForNodes("taxonNameSuggestions"), is(true));
Index<Node> index = getGraphDb().index().forNodes("taxonNameSuggestions");
Query query = new TermQuery(new Term("name", "name"));
IndexHits<Node> hits = index.query(query);
assertThat(hits.size(), is(1));
hits = index.query("name", "s nme~");
assertThat(hits.size(), is(1));
hits = index.query("name", "geRman~");
assertThat(hits.size(), is(1));
hits = index.query("name:geRman~ AND name:som~");
assertThat(hits.size(), is(1));
hits = index.query("name:hmo~ AND name:SApiens~");
assertThat(hits.size(), is(1));
hits = index.query("name:hmo~ AND name:sapiens~");
assertThat(hits.size(), is(1));
// queries are case sensitive . . . should all be lower cased.
hits = index.query("name:HMO~ AND name:saPIENS~");
assertThat(hits.size(), is(0));
}
use of org.eol.globi.taxon.NonResolvingTaxonIndex in project eol-globi-data by jhpoelen.
the class NameResolverTest method iNaturalistTaxon.
@Test
public void iNaturalistTaxon() throws NodeFactoryException {
Specimen someOtherOrganism = nodeFactory.createSpecimen(nodeFactory.createStudy(new StudyImpl("bla", null, null, null)), new TaxonImpl("Blaus bla", "INAT_TAXON:58831"));
Specimen someOtherOrganism2 = nodeFactory.createSpecimen(nodeFactory.createStudy(new StudyImpl("bla", null, null, null)), new TaxonImpl("Redus rha", "INAT_TAXON:126777"));
someOtherOrganism.ate(someOtherOrganism2);
final NameResolver nameResolver = new NameResolver(getGraphDb(), new NonResolvingTaxonIndex(getGraphDb()));
nameResolver.setBatchSize(1L);
nameResolver.resolve();
Taxon resolvedTaxon = taxonIndex.findTaxonById("INAT_TAXON:58831");
assertThat(resolvedTaxon, is(notNullValue()));
assertThat(resolvedTaxon.getExternalId(), is("INAT_TAXON:58831"));
assertThat(resolvedTaxon.getName(), is("Blaus bla"));
Taxon resolvedTaxon2 = taxonIndex.findTaxonByName("Blaus bla");
assertThat(resolvedTaxon2, is(notNullValue()));
assertThat(resolvedTaxon2.getExternalId(), is("INAT_TAXON:58831"));
}
use of org.eol.globi.taxon.NonResolvingTaxonIndex in project eol-globi-data by jhpoelen.
the class Normalizer method resolveAndLinkTaxa.
private void resolveAndLinkTaxa(CommandLine cmdLine, GraphDatabaseService graphService) {
if (cmdLine == null || !cmdLine.hasOption(OPTION_SKIP_RESOLVE_CITATIONS)) {
LOG.info("resolving citations to DOIs ...");
new LinkerDOI(graphService, new DOIResolverCache()).link();
new LinkerDOI(graphService).link();
} else {
LOG.info("skipping citation resolving ...");
}
if (cmdLine == null || !cmdLine.hasOption(OPTION_SKIP_TAXON_CACHE)) {
LOG.info("resolving names with taxon cache ...");
final TaxonCacheService taxonCacheService = new TaxonCacheService("/taxa/taxonCache.tsv.gz", "/taxa/taxonMap.tsv.gz");
try {
ResolvingTaxonIndex index = new ResolvingTaxonIndex(taxonCacheService, graphService);
index.setIndexResolvedTaxaOnly(true);
TaxonFilter taxonCacheFilter = new TaxonFilter() {
private KnownBadNameFilter knownBadNameFilter = new KnownBadNameFilter();
@Override
public boolean shouldInclude(Taxon taxon) {
return taxon != null && knownBadNameFilter.shouldInclude(taxon);
}
};
new NameResolver(graphService, index, taxonCacheFilter).resolve();
LOG.info("adding same and similar terms for resolved taxa...");
List<Linker> linkers = new ArrayList<>();
linkers.add(new LinkerTermMatcher(graphService, taxonCacheService));
appendOpenTreeTaxonLinker(graphService, linkers);
linkers.forEach(LinkUtil::doTimedLink);
LOG.info("adding same and similar terms for resolved taxa done.");
} finally {
taxonCacheService.shutdown();
}
LOG.info("resolving names with taxon cache done.");
} else {
LOG.info("skipping taxon cache ...");
}
if (cmdLine == null || !cmdLine.hasOption(OPTION_SKIP_RESOLVE)) {
new NameResolver(graphService, new NonResolvingTaxonIndex(graphService)).resolve();
new TaxonInteractionIndexer(graphService).index();
} else {
LOG.info("skipping taxa resolving ...");
}
if (cmdLine == null || !cmdLine.hasOption(OPTION_SKIP_LINK)) {
List<Linker> linkers = new ArrayList<>();
linkers.add(new LinkerTaxonIndex(graphService));
linkers.forEach(LinkUtil::doTimedLink);
} else {
LOG.info("skipping linking ...");
}
if (cmdLine == null || !cmdLine.hasOption(OPTION_SKIP_LINK_THUMBNAILS)) {
LinkUtil.doTimedLink(new ImageLinker(graphService, null));
} else {
LOG.info("skipping linking of taxa to thumbnails ...");
}
}
use of org.eol.globi.taxon.NonResolvingTaxonIndex in project eol-globi-data by jhpoelen.
the class TaxonInteractionIndexerTest method indexNoNameNoMatch.
@Test
public void indexNoNameNoMatch() throws NodeFactoryException {
Specimen human = nodeFactory.createSpecimen(nodeFactory.createStudy(new StudyImpl("bla", null, null, null)), new TaxonImpl("Homo sapiens", PropertyAndValueDictionary.NO_MATCH));
Specimen animal = nodeFactory.createSpecimen(nodeFactory.createStudy(new StudyImpl("bla", null, null, null)), new TaxonImpl("Canis lupus", PropertyAndValueDictionary.NO_MATCH));
human.ate(animal);
for (int i = 0; i < 10; i++) {
Specimen fish = nodeFactory.createSpecimen(nodeFactory.createStudy(new StudyImpl("bla", null, null, null)), new TaxonImpl("Arius felis", null));
human.ate(fish);
}
assertNull(taxonIndex.findTaxonById(PropertyAndValueDictionary.NO_MATCH));
assertNull(taxonIndex.findTaxonByName("Homo sapiens"));
new NameResolver(getGraphDb(), new NonResolvingTaxonIndex(getGraphDb())).resolve();
assertNotNull(taxonIndex.findTaxonByName("Homo sapiens"));
assertNull(taxonIndex.findTaxonById(PropertyAndValueDictionary.NO_MATCH));
assertNull(taxonIndex.findTaxonByName(PropertyAndValueDictionary.NO_NAME));
}
Aggregations