use of org.eol.globi.taxon.TaxonFuzzySearchIndex in project eol-globi-data by jhpoelen.
the class LinkerTaxonIndex method link.
public void link() {
Index<Node> taxons = graphDb.index().forNodes("taxons");
Index<Node> ids = graphDb.index().forNodes(INDEX_TAXON_NAMES_AND_IDS, MapUtil.stringMap(IndexManager.PROVIDER, "lucene", "type", "fulltext"));
TaxonFuzzySearchIndex fuzzySearchIndex = new TaxonFuzzySearchIndex(graphDb);
IndexHits<Node> hits = taxons.query("*:*");
for (Node hit : hits) {
List<String> taxonIds = new ArrayList<>();
List<String> taxonPathIdsAndNames = new ArrayList<>();
TaxonNode taxonNode = new TaxonNode(hit);
addTaxonId(taxonIds, taxonNode);
addPathIdAndNames(taxonPathIdsAndNames, taxonNode);
addToFuzzyIndex(graphDb, fuzzySearchIndex, hit, taxonNode);
Iterable<Relationship> rels = hit.getRelationships(Direction.OUTGOING, NodeUtil.asNeo4j(RelTypes.SAME_AS));
for (Relationship rel : rels) {
TaxonNode sameAsTaxon = new TaxonNode(rel.getEndNode());
addTaxonId(taxonIds, sameAsTaxon);
addPathIdAndNames(taxonPathIdsAndNames, sameAsTaxon);
addToFuzzyIndex(graphDb, fuzzySearchIndex, hit, sameAsTaxon);
}
Transaction tx = graphDb.beginTx();
try {
taxonPathIdsAndNames.addAll(taxonIds);
String aggregateIds = StringUtils.join(taxonPathIdsAndNames, CharsetConstant.SEPARATOR);
ids.add(hit, PropertyAndValueDictionary.PATH, aggregateIds);
hit.setProperty(PropertyAndValueDictionary.EXTERNAL_IDS, aggregateIds);
String aggregateTaxonIds = StringUtils.join(taxonIds, CharsetConstant.SEPARATOR);
hit.setProperty(PropertyAndValueDictionary.NAME_IDS, aggregateTaxonIds);
tx.success();
} finally {
tx.finish();
}
}
hits.close();
}
use of org.eol.globi.taxon.TaxonFuzzySearchIndex in project eol-globi-data by jhpoelen.
the class LinkerTaxonIndexTest method linking.
@Test
public void linking() throws NodeFactoryException {
Taxon taxonFound = new TaxonImpl("Homo sapiens", "Bar:123");
taxonFound.setPath("Animalia | Mammalia | Homo sapiens");
Taxon taxon = taxonIndex.getOrCreateTaxon(taxonFound);
TaxonImpl taxon1 = new TaxonImpl("Homo sapiens also", "FOO:444");
taxon1.setPathIds("BARZ:111 | FOOZ:777");
NodeUtil.connectTaxa(taxon1, (TaxonNode) taxon, getGraphDb(), RelTypes.SAME_AS);
taxon = taxonIndex.getOrCreateTaxon(new TaxonImpl("Bla blaus", null));
taxon.setExternalId("FOO 1234");
resolveNames();
new LinkerTaxonIndex(getGraphDb()).link();
IndexHits<Node> hits = getGraphDb().index().forNodes(LinkerTaxonIndex.INDEX_TAXON_NAMES_AND_IDS).query("*:*");
Node next = hits.next();
assertThat(new TaxonNode(next).getName(), is("Homo sapiens"));
assertThat(hits.hasNext(), is(true));
hits.close();
assertSingleHit(PropertyAndValueDictionary.PATH + ":BAR\\:123");
assertSingleHit(PropertyAndValueDictionary.PATH + ":FOO\\:444");
assertSingleHit(PropertyAndValueDictionary.PATH + ":FOO\\:444 " + PropertyAndValueDictionary.PATH + ":BAR\\:123");
assertSingleHit(PropertyAndValueDictionary.PATH + ":BAR\\:*");
assertSingleHit(PropertyAndValueDictionary.PATH + ":Homo");
assertSingleHit(PropertyAndValueDictionary.PATH + ":\"Homo sapiens\"");
Taxon node = taxonIndex.findTaxonByName("Homo sapiens");
assertThat(((NodeBacked) node).getUnderlyingNode().getProperty(PropertyAndValueDictionary.EXTERNAL_IDS).toString(), is("Animalia | Mammalia | Homo sapiens | BARZ:111 | FOOZ:777 | Bar:123 | FOO:444"));
assertThat(((NodeBacked) node).getUnderlyingNode().getProperty(PropertyAndValueDictionary.NAME_IDS).toString(), is("Bar:123 | FOO:444"));
assertThat(new TaxonFuzzySearchIndex(getGraphDb()).query("name:sapienz~").size(), is(1));
assertThat(new TaxonFuzzySearchIndex(getGraphDb()).query("name:sapienz").size(), is(0));
}
Aggregations