use of org.opensextant.extraction.ExtractionException in project Xponents by OpenSextant.
the class PlaceGeocoder method parseKnownNonPlaces.
/**
* If no geo matches are found, we still parse the data if person name matching is enabled.
* Poor-man's named-entity extraction
*
* @throws ExtractionException
*
*/
private void parseKnownNonPlaces(TextInput input, List<PlaceCandidate> candidates, List<TextMatch> matches) {
if (!isPersonNameMatchingEnabled()) {
return;
}
// If this step fails miserably, do not raise error. Log the error and return nothing found.
//
List<TextMatch> nonPlaces = null;
try {
nonPlaces = personMatcher.extract(input.buffer);
if (nonPlaces.isEmpty()) {
return;
}
} catch (Exception err) {
log.error(err.getMessage());
return;
}
List<TaxonMatch> persons = new ArrayList<>();
List<TaxonMatch> orgs = new ArrayList<>();
log.debug("Matched {}", nonPlaces.size());
for (TextMatch tm : nonPlaces) {
if (!(tm instanceof TaxonMatch)) {
continue;
}
TaxonMatch tag = (TaxonMatch) tm;
//
// For the purposes of geocoding/geoparsing filter out ALL
// TaxonMatches. Any place names should reside back in
// gazetteer. If XTax does have place or location data, that would be new.
//
tm.setFilteredOut(true);
for (Taxon taxon : tag.getTaxons()) {
String node = taxon.name.toLowerCase();
// name spans that are not places.
if (node.startsWith("person.")) {
persons.add(tag);
break;
} else if (node.startsWith("org.")) {
if (taxon.isAcronym && !tm.isUpper()) {
continue;
}
orgs.add(tag);
break;
} else if (node.startsWith("nationality.")) {
persons.add(tag);
// The tag may be absent as some ethnicities may be mixed in and indicate no country.
for (String t : taxon.tagset) {
int x = t.indexOf("cc+");
if (x >= 0) {
String isocode = t.substring(x + 3);
this.countryInScope(isocode);
nationalities.put(tag.getText(), isocode);
}
}
}
}
}
personNameRule.evaluateNamedEntities(candidates, persons, orgs);
matches.addAll(persons);
matches.addAll(orgs);
}
use of org.opensextant.extraction.ExtractionException in project Xponents by OpenSextant.
the class XtractorGroup method process.
/**
* Process one input. If you have no need for formatting output at this time
* use this. If you have complext ExtractionResults where you want to add
* meta attributes, then you would use this approach
*/
public List<TextMatch> process(TextInput input) {
List<TextMatch> oneResultSet = new ArrayList<TextMatch>();
progressMonitor.setNumberOfSteps(extractors.size());
/**
* Process all extraction and compile on a single list.
*/
for (Extractor x : extractors) {
try {
List<TextMatch> results = x.extract(input);
if (results != null && !results.isEmpty()) {
oneResultSet.addAll(results);
}
} catch (ExtractionException loopErr) {
log.error("Extractor=" + x.getName() + "on Input=" + input.id, loopErr);
currErrors.add("Extractor=" + x.getName() + " ERR=" + loopErr.getMessage());
}
}
progressMonitor.completeDocument();
return oneResultSet;
}
Aggregations