use of org.eol.globi.service.PropertyEnricherException in project eol-globi-data by jhpoelen.
the class LinkerTermMatcher method handleBatch.
private void handleBatch(final GraphDatabaseService graphDb, TermMatcher termMatcher, final Map<Long, TaxonNode> nodeMap, int counter) {
StopWatch stopWatch = new StopWatch();
stopWatch.start();
String msgPrefix = "batch #" + counter / BATCH_SIZE;
LOG.info(msgPrefix + " preparing...");
List<String> nodeIdAndNames = new ArrayList<String>();
for (Map.Entry<Long, TaxonNode> entry : nodeMap.entrySet()) {
String name = entry.getKey() + "|" + entry.getValue().getName();
nodeIdAndNames.add(name);
}
try {
if (nodeIdAndNames.size() > 0) {
termMatcher.findTermsForNames(nodeIdAndNames, new TermMatchListener() {
@Override
public void foundTaxonForName(Long nodeId, String name, Taxon taxon, NameType relType) {
TaxonNode taxonNode = nodeMap.get(nodeId);
if (taxonNode != null && NameType.NONE != relType && !TaxonUtil.likelyHomonym(taxon, taxonNode)) {
NodeUtil.connectTaxa(taxon, taxonNode, graphDb, RelTypes.forType(relType));
}
}
});
}
} catch (PropertyEnricherException ex) {
LOG.error(msgPrefix + " problem matching terms", ex);
}
stopWatch.stop();
LOG.info(msgPrefix + " completed in [" + stopWatch.getTime() + "] ms (" + (1.0 * stopWatch.getTime() / BATCH_SIZE) + " ms/name )");
nodeMap.clear();
}
use of org.eol.globi.service.PropertyEnricherException in project eol-globi-data by jhpoelen.
the class TaxonCacheService method initTaxonIdMap.
private void initTaxonIdMap() throws PropertyEnricherException {
try {
LOG.info("taxon lookup service instantiating...");
File luceneDir = new File(getCacheDir().getAbsolutePath(), "lucene");
boolean preexisting = luceneDir.exists();
createCacheDir(luceneDir, isTemporary());
TaxonLookupServiceImpl taxonLookupService = new TaxonLookupServiceImpl(new SimpleFSDirectory(luceneDir));
taxonLookupService.setMaxHits(getMaxTaxonLinks());
taxonLookupService.start();
if (!isTemporary() && preexisting) {
LOG.info("pre-existing taxon lookup index found, no need to re-index...");
} else {
LOG.info("no pre-existing taxon lookup index found, re-indexing...");
int count = 0;
LOG.info("taxon map loading [" + taxonMapResource + "] ...");
StopWatch watch = new StopWatch();
watch.start();
BufferedReader reader = createBufferedReader(taxonMapResource);
final LabeledCSVParser labeledCSVParser = CSVTSVUtil.createLabeledTSVParser(reader);
while (labeledCSVParser.getLine() != null) {
Taxon provided = TaxonMapParser.parseProvidedTaxon(labeledCSVParser);
Taxon resolved = TaxonMapParser.parseResolvedTaxon(labeledCSVParser);
addIfNeeded(taxonLookupService, provided.getExternalId(), resolved.getExternalId());
addIfNeeded(taxonLookupService, provided.getName(), resolved.getExternalId());
addIfNeeded(taxonLookupService, resolved.getName(), resolved.getExternalId());
count++;
}
watch.stop();
logCacheLoadStats(watch.getTime(), count);
LOG.info("taxon map loading [" + taxonMapResource + "] done.");
}
taxonLookupService.finish();
this.taxonLookupService = taxonLookupService;
LOG.info("taxon lookup service instantiating done.");
} catch (IOException e) {
throw new PropertyEnricherException("problem initiating taxon cache index", e);
}
}
use of org.eol.globi.service.PropertyEnricherException in project eol-globi-data by jhpoelen.
the class TaxonEnricherImpl method enrichTaxon.
private Map<String, String> enrichTaxon(Map<Class, Integer> errorCounts, PropertyEnricher service, Integer errorCount, Map<String, String> properties) throws PropertyEnricherException {
try {
Map<String, String> enrichedProperties = service.enrich(properties);
resetErrorCount(errorCounts, service);
return enrichedProperties;
} catch (PropertyEnricherException ex) {
LOG.warn("failed to find a match for [" + properties + "] in [" + service.getClass().getSimpleName() + "]", ex);
incrementErrorCount(errorCounts, service, errorCount);
throw new PropertyEnricherException("re-throwing", ex);
}
}
use of org.eol.globi.service.PropertyEnricherException in project eol-globi-data by jhpoelen.
the class ResolvingTaxonIndexTest method synonymsAddedToIndexOnce.
@Test
public final void synonymsAddedToIndexOnce() throws NodeFactoryException {
ResolvingTaxonIndex taxonService = createTaxonService(getGraphDb());
taxonService.setEnricher(new PropertyEnricher() {
private boolean firstTime = true;
@Override
public Map<String, String> enrich(Map<String, String> properties) throws PropertyEnricherException {
Taxon taxon = TaxonUtil.mapToTaxon(properties);
if ("not pref".equals(taxon.getName())) {
if (!firstTime) {
fail("should already have indexed [" + taxon.getName() + "]...");
}
taxon.setName("preferred");
taxon.setExternalId("bla:123");
taxon.setPath("one | two | three");
taxon.setPathIds("1 | 2 | 3");
firstTime = false;
}
return TaxonUtil.taxonToMap(taxon);
}
@Override
public void shutdown() {
}
});
this.taxonService = taxonService;
Taxon taxon2 = new TaxonImpl("not pref", null);
taxon2.setPath(null);
TaxonNode first = this.taxonService.getOrCreateTaxon(taxon2);
assertThat(first.getName(), is("preferred"));
assertThat(first.getPath(), is("one | two | three"));
assertThat(first.getPathIds(), is("1 | 2 | 3"));
Taxon taxon1 = new TaxonImpl("not pref", null);
taxon1.setPath(null);
TaxonNode second = this.taxonService.getOrCreateTaxon(taxon1);
assertThat(second.getNodeID(), is(first.getNodeID()));
TaxonNode third = this.taxonService.getOrCreateTaxon(new TaxonImpl("not pref"));
assertThat(third.getNodeID(), is(first.getNodeID()));
TaxonNode foundTaxon = this.taxonService.findTaxonByName("not pref");
assertThat(foundTaxon.getNodeID(), is(first.getNodeID()));
foundTaxon = this.taxonService.findTaxonByName("preferred");
assertThat(foundTaxon.getNodeID(), is(first.getNodeID()));
}
use of org.eol.globi.service.PropertyEnricherException in project eol-globi-data by jhpoelen.
the class EOLService method getEOLPageId.
private Long getEOLPageId(String name, String externalId) throws PropertyEnricherException {
Long eolPageId = null;
TaxonomyProvider taxonomyProvider = ExternalIdUtil.taxonomyProviderFor(externalId);
if (taxonomyProvider != null) {
String idNoPrefix = ExternalIdUtil.stripPrefix(taxonomyProvider, externalId);
if (taxonomyProvider == TaxonomyProvider.EOL) {
try {
eolPageId = Long.parseLong(idNoPrefix);
} catch (NumberFormatException ex) {
throw new PropertyEnricherException("failed to parse eol id [" + idNoPrefix + "]");
}
} else if (EOL_TAXON_PROVIDER_MAP.containsKey(taxonomyProvider)) {
eolPageId = getPageIdFromProvider(EOL_TAXON_PROVIDER_MAP.get(taxonomyProvider), idNoPrefix);
}
} else if (StringUtils.isNotBlank(name) && !PropertyAndValueDictionary.NO_NAME.equals(name)) {
eolPageId = getPageId(name, true);
}
return eolPageId;
}
Aggregations