Search in sources :

Example 1 with PropertyEnricherException

use of org.eol.globi.service.PropertyEnricherException in project eol-globi-data by jhpoelen.

the class LinkerTermMatcher method handleBatch.

private void handleBatch(final GraphDatabaseService graphDb, TermMatcher termMatcher, final Map<Long, TaxonNode> nodeMap, int counter) {
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    String msgPrefix = "batch #" + counter / BATCH_SIZE;
    LOG.info(msgPrefix + " preparing...");
    List<String> nodeIdAndNames = new ArrayList<String>();
    for (Map.Entry<Long, TaxonNode> entry : nodeMap.entrySet()) {
        String name = entry.getKey() + "|" + entry.getValue().getName();
        nodeIdAndNames.add(name);
    }
    try {
        if (nodeIdAndNames.size() > 0) {
            termMatcher.findTermsForNames(nodeIdAndNames, new TermMatchListener() {

                @Override
                public void foundTaxonForName(Long nodeId, String name, Taxon taxon, NameType relType) {
                    TaxonNode taxonNode = nodeMap.get(nodeId);
                    if (taxonNode != null && NameType.NONE != relType && !TaxonUtil.likelyHomonym(taxon, taxonNode)) {
                        NodeUtil.connectTaxa(taxon, taxonNode, graphDb, RelTypes.forType(relType));
                    }
                }
            });
        }
    } catch (PropertyEnricherException ex) {
        LOG.error(msgPrefix + " problem matching terms", ex);
    }
    stopWatch.stop();
    LOG.info(msgPrefix + " completed in [" + stopWatch.getTime() + "] ms (" + (1.0 * stopWatch.getTime() / BATCH_SIZE) + " ms/name )");
    nodeMap.clear();
}
Also used : PropertyEnricherException(org.eol.globi.service.PropertyEnricherException) TaxonNode(org.eol.globi.domain.TaxonNode) Taxon(org.eol.globi.domain.Taxon) ArrayList(java.util.ArrayList) NameType(org.eol.globi.domain.NameType) StopWatch(org.apache.commons.lang.time.StopWatch) HashMap(java.util.HashMap) Map(java.util.Map) TermMatchListener(org.eol.globi.taxon.TermMatchListener)

Example 2 with PropertyEnricherException

use of org.eol.globi.service.PropertyEnricherException in project eol-globi-data by jhpoelen.

the class TaxonCacheService method initTaxonIdMap.

private void initTaxonIdMap() throws PropertyEnricherException {
    try {
        LOG.info("taxon lookup service instantiating...");
        File luceneDir = new File(getCacheDir().getAbsolutePath(), "lucene");
        boolean preexisting = luceneDir.exists();
        createCacheDir(luceneDir, isTemporary());
        TaxonLookupServiceImpl taxonLookupService = new TaxonLookupServiceImpl(new SimpleFSDirectory(luceneDir));
        taxonLookupService.setMaxHits(getMaxTaxonLinks());
        taxonLookupService.start();
        if (!isTemporary() && preexisting) {
            LOG.info("pre-existing taxon lookup index found, no need to re-index...");
        } else {
            LOG.info("no pre-existing taxon lookup index found, re-indexing...");
            int count = 0;
            LOG.info("taxon map loading [" + taxonMapResource + "] ...");
            StopWatch watch = new StopWatch();
            watch.start();
            BufferedReader reader = createBufferedReader(taxonMapResource);
            final LabeledCSVParser labeledCSVParser = CSVTSVUtil.createLabeledTSVParser(reader);
            while (labeledCSVParser.getLine() != null) {
                Taxon provided = TaxonMapParser.parseProvidedTaxon(labeledCSVParser);
                Taxon resolved = TaxonMapParser.parseResolvedTaxon(labeledCSVParser);
                addIfNeeded(taxonLookupService, provided.getExternalId(), resolved.getExternalId());
                addIfNeeded(taxonLookupService, provided.getName(), resolved.getExternalId());
                addIfNeeded(taxonLookupService, resolved.getName(), resolved.getExternalId());
                count++;
            }
            watch.stop();
            logCacheLoadStats(watch.getTime(), count);
            LOG.info("taxon map loading [" + taxonMapResource + "] done.");
        }
        taxonLookupService.finish();
        this.taxonLookupService = taxonLookupService;
        LOG.info("taxon lookup service instantiating done.");
    } catch (IOException e) {
        throw new PropertyEnricherException("problem initiating taxon cache index", e);
    }
}
Also used : PropertyEnricherException(org.eol.globi.service.PropertyEnricherException) Taxon(org.eol.globi.domain.Taxon) BufferedReader(java.io.BufferedReader) LabeledCSVParser(com.Ostermiller.util.LabeledCSVParser) IOException(java.io.IOException) File(java.io.File) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) StopWatch(org.apache.commons.lang3.time.StopWatch)

Example 3 with PropertyEnricherException

use of org.eol.globi.service.PropertyEnricherException in project eol-globi-data by jhpoelen.

the class TaxonEnricherImpl method enrichTaxon.

private Map<String, String> enrichTaxon(Map<Class, Integer> errorCounts, PropertyEnricher service, Integer errorCount, Map<String, String> properties) throws PropertyEnricherException {
    try {
        Map<String, String> enrichedProperties = service.enrich(properties);
        resetErrorCount(errorCounts, service);
        return enrichedProperties;
    } catch (PropertyEnricherException ex) {
        LOG.warn("failed to find a match for [" + properties + "] in [" + service.getClass().getSimpleName() + "]", ex);
        incrementErrorCount(errorCounts, service, errorCount);
        throw new PropertyEnricherException("re-throwing", ex);
    }
}
Also used : PropertyEnricherException(org.eol.globi.service.PropertyEnricherException)

Example 4 with PropertyEnricherException

use of org.eol.globi.service.PropertyEnricherException in project eol-globi-data by jhpoelen.

the class ResolvingTaxonIndexTest method synonymsAddedToIndexOnce.

@Test
public final void synonymsAddedToIndexOnce() throws NodeFactoryException {
    ResolvingTaxonIndex taxonService = createTaxonService(getGraphDb());
    taxonService.setEnricher(new PropertyEnricher() {

        private boolean firstTime = true;

        @Override
        public Map<String, String> enrich(Map<String, String> properties) throws PropertyEnricherException {
            Taxon taxon = TaxonUtil.mapToTaxon(properties);
            if ("not pref".equals(taxon.getName())) {
                if (!firstTime) {
                    fail("should already have indexed [" + taxon.getName() + "]...");
                }
                taxon.setName("preferred");
                taxon.setExternalId("bla:123");
                taxon.setPath("one | two | three");
                taxon.setPathIds("1 | 2 | 3");
                firstTime = false;
            }
            return TaxonUtil.taxonToMap(taxon);
        }

        @Override
        public void shutdown() {
        }
    });
    this.taxonService = taxonService;
    Taxon taxon2 = new TaxonImpl("not pref", null);
    taxon2.setPath(null);
    TaxonNode first = this.taxonService.getOrCreateTaxon(taxon2);
    assertThat(first.getName(), is("preferred"));
    assertThat(first.getPath(), is("one | two | three"));
    assertThat(first.getPathIds(), is("1 | 2 | 3"));
    Taxon taxon1 = new TaxonImpl("not pref", null);
    taxon1.setPath(null);
    TaxonNode second = this.taxonService.getOrCreateTaxon(taxon1);
    assertThat(second.getNodeID(), is(first.getNodeID()));
    TaxonNode third = this.taxonService.getOrCreateTaxon(new TaxonImpl("not pref"));
    assertThat(third.getNodeID(), is(first.getNodeID()));
    TaxonNode foundTaxon = this.taxonService.findTaxonByName("not pref");
    assertThat(foundTaxon.getNodeID(), is(first.getNodeID()));
    foundTaxon = this.taxonService.findTaxonByName("preferred");
    assertThat(foundTaxon.getNodeID(), is(first.getNodeID()));
}
Also used : PropertyEnricherException(org.eol.globi.service.PropertyEnricherException) TaxonNode(org.eol.globi.domain.TaxonNode) PropertyEnricher(org.eol.globi.service.PropertyEnricher) Taxon(org.eol.globi.domain.Taxon) TaxonImpl(org.eol.globi.domain.TaxonImpl) TreeMap(java.util.TreeMap) Map(java.util.Map) Test(org.junit.Test)

Example 5 with PropertyEnricherException

use of org.eol.globi.service.PropertyEnricherException in project eol-globi-data by jhpoelen.

the class EOLService method getEOLPageId.

private Long getEOLPageId(String name, String externalId) throws PropertyEnricherException {
    Long eolPageId = null;
    TaxonomyProvider taxonomyProvider = ExternalIdUtil.taxonomyProviderFor(externalId);
    if (taxonomyProvider != null) {
        String idNoPrefix = ExternalIdUtil.stripPrefix(taxonomyProvider, externalId);
        if (taxonomyProvider == TaxonomyProvider.EOL) {
            try {
                eolPageId = Long.parseLong(idNoPrefix);
            } catch (NumberFormatException ex) {
                throw new PropertyEnricherException("failed to parse eol id [" + idNoPrefix + "]");
            }
        } else if (EOL_TAXON_PROVIDER_MAP.containsKey(taxonomyProvider)) {
            eolPageId = getPageIdFromProvider(EOL_TAXON_PROVIDER_MAP.get(taxonomyProvider), idNoPrefix);
        }
    } else if (StringUtils.isNotBlank(name) && !PropertyAndValueDictionary.NO_NAME.equals(name)) {
        eolPageId = getPageId(name, true);
    }
    return eolPageId;
}
Also used : PropertyEnricherException(org.eol.globi.service.PropertyEnricherException) TaxonomyProvider(org.eol.globi.domain.TaxonomyProvider)

Aggregations

PropertyEnricherException (org.eol.globi.service.PropertyEnricherException)12 IOException (java.io.IOException)6 Taxon (org.eol.globi.domain.Taxon)4 URI (java.net.URI)3 URISyntaxException (java.net.URISyntaxException)3 TaxonNode (org.eol.globi.domain.TaxonNode)3 LabeledCSVParser (com.Ostermiller.util.LabeledCSVParser)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 StopWatch (org.apache.commons.lang3.time.StopWatch)2 JsonProcessingException (org.codehaus.jackson.JsonProcessingException)2 TaxonImpl (org.eol.globi.domain.TaxonImpl)2 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 InputStream (java.io.InputStream)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 HashMap (java.util.HashMap)1 TreeMap (java.util.TreeMap)1 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)1 XPathExpressionException (javax.xml.xpath.XPathExpressionException)1