use of org.opensextant.extraction.MatchFilter in project Xponents by OpenSextant.
the class XlayerRestlet method configure.
/**
*
* @throws ConfigException
*/
public void configure() throws ConfigException {
// Default - process place/country mentions in document texts.
//
tagger = new PlaceGeocoder();
// tagger.setParameters(this.params); See Xponents concept of Parameters
tagger.enablePersonNameMatching(true);
tagger.configure();
// TODO: refine this filter list. Use "/filters/non-placenames,user.csv" going forward.
//
String userFilterPath = "/filters/non-placenames,user.csv";
URL filterFile = getClass().getResource(userFilterPath);
if (filterFile != null) {
//
try {
MatchFilter filt = new MatchFilter(filterFile);
tagger.setMatchFilter(filt);
} catch (IOException err) {
throw new ConfigException("Setup error with geonames utility or other configuration", err);
}
} else {
info("Optional user filter not found. User exclusion list is file=" + userFilterPath);
}
}
use of org.opensextant.extraction.MatchFilter in project Xponents by OpenSextant.
the class TestGazMatcher method main.
/**
* Do a basic test. Requirements include setting opensextant.solr to solr
* core home. (Xponents/solr, by default) USAGE:
*
* TestGazMatcher file
*
* Prints: all matched, filtered place mentions distinct places distinct
* countries
*/
public static void main(String[] args) throws Exception {
GazetteerMatcher sm = new GazetteerMatcher(true);
URL filterFile = TestGazMatcher.class.getResource("/test-filter.txt");
if (filterFile == null) {
System.err.println("This test requires a 'test-filter.txt' file with non-place names in it." + "\nThese filters should match up with your test documents");
}
MatchFilter filt = new MatchFilter(filterFile);
sm.setMatchFilter(filt);
try {
String docContent = "We drove to Sin City. The we drove to -$IN ĆITŸ .";
System.out.println(docContent);
List<PlaceCandidate> matches = sm.tagText(docContent, "main-test");
for (PlaceCandidate pc : matches) {
printGeoTags(pc);
}
docContent = "Is there some city in 刘家埝 written in Chinese?";
matches = sm.tagCJKText(docContent, "main-test");
for (PlaceCandidate pc : matches) {
printGeoTags(pc);
}
docContent = "Where is seoul?";
matches = sm.tagText(docContent, "main-test");
for (PlaceCandidate pc : matches) {
printGeoTags(pc);
}
String buf = FileUtility.readFile(args[0]);
matches = sm.tagText(buf, "main-test", true);
summarizeFindings(copyFrom(matches));
} catch (Exception err) {
err.printStackTrace();
} finally {
sm.shutdown();
}
}
Aggregations