use of org.opensextant.extractors.geo.PlaceCandidate in project Xponents by OpenSextant.
the class GeoTaggerMapper method match2JSON.
/**
* Convert a TextMatch (Place, Taxon, Pattern, etc.) and convert to JSON.
* This outputs only geocoding objects -- PlaceCandidate or GeocoordMatch.
* Attributes reported are gazetteer metadata, precision, country code, confidence of match, etc.
*
* @param tm
* @return
*/
public static final JSONObject match2JSON(TextMatch tm) {
JSONObject j = prepareOutput(tm);
if (tm instanceof PlaceCandidate) {
PlaceCandidate candidate = (PlaceCandidate) tm;
if (candidate.isCountry) {
j.put("type", "country");
} else {
j.put("type", "place");
}
/* Geotagging can be noisy -- accept only highest confidence matches.
*
*/
if (candidate.getFirstChoice() != null) {
addPlaceData(candidate.getFirstChoice(), j);
j.put("confidence", candidate.getConfidence());
if (candidate.getSecondChoice() != null) {
JSONObject alt = new JSONObject();
addPlaceData(candidate.getSecondChoice(), alt);
alt.put("score", candidate.getSecondChoice().getScore());
j.put("alt-place", alt);
}
}
} else if (tm instanceof GeocoordMatch) {
/* Geocoding coordinates can also be noisy. Accept only high precision matches.
* E.g., +/- 10KM
*/
GeocoordMatch geo = (GeocoordMatch) tm;
addPlaceData(geo, j);
j.put("type", "coordinate");
j.put("method", geo.getMethod());
j.put("confidence", geo.getConfidence());
if (geo.getRelatedPlace() != null) {
JSONObject alt = new JSONObject();
addPlaceData(geo.getRelatedPlace(), alt);
j.put("related-place", alt);
}
}
return j;
}
use of org.opensextant.extractors.geo.PlaceCandidate in project Xponents by OpenSextant.
the class TestGazMatcher method main.
/**
* Do a basic test. Requirements include setting opensextant.solr to solr
* core home. (Xponents/solr, by default) USAGE:
*
* TestGazMatcher file
*
* Prints: all matched, filtered place mentions distinct places distinct
* countries
*/
public static void main(String[] args) throws Exception {
GazetteerMatcher sm = new GazetteerMatcher(true);
URL filterFile = TestGazMatcher.class.getResource("/test-filter.txt");
if (filterFile == null) {
System.err.println("This test requires a 'test-filter.txt' file with non-place names in it." + "\nThese filters should match up with your test documents");
}
MatchFilter filt = new MatchFilter(filterFile);
sm.setMatchFilter(filt);
try {
String docContent = "We drove to Sin City. The we drove to -$IN ĆITŸ .";
System.out.println(docContent);
List<PlaceCandidate> matches = sm.tagText(docContent, "main-test");
for (PlaceCandidate pc : matches) {
printGeoTags(pc);
}
docContent = "Is there some city in 刘家埝 written in Chinese?";
matches = sm.tagCJKText(docContent, "main-test");
for (PlaceCandidate pc : matches) {
printGeoTags(pc);
}
docContent = "Where is seoul?";
matches = sm.tagText(docContent, "main-test");
for (PlaceCandidate pc : matches) {
printGeoTags(pc);
}
String buf = FileUtility.readFile(args[0]);
matches = sm.tagText(buf, "main-test", true);
summarizeFindings(copyFrom(matches));
} catch (Exception err) {
err.printStackTrace();
} finally {
sm.shutdown();
}
}
use of org.opensextant.extractors.geo.PlaceCandidate in project Xponents by OpenSextant.
the class TestPersonFilter method test.
@Test
public void test() {
// Set classpath to point to ./gazetteer/conf
URL p1 = PlaceGeocoder.class.getResource("/filters/person-name-filter.txt");
URL p2 = PlaceGeocoder.class.getResource("/filters/person-title-filter.txt");
URL p3 = PlaceGeocoder.class.getResource("/filters/person-suffix-filter.txt");
try {
PersonNameFilter filt = new PersonNameFilter(p1, p2, p3);
PlaceCandidate p = new PlaceCandidate();
p.setText("John Doe");
p.setPrematchTokens(null);
p.setPostmatchTokens(null);
filt.evaluate(p, null);
print(p.getText() + " pass? " + p.isFilteredOut());
p.setPrematchTokens(" ".split(" "));
p.setPostmatchTokens(" ".split(" "));
filt.evaluate(p, null);
print(p.getText() + " pass? " + p.isFilteredOut());
p.setPrematchTokens("this is Mr. ".split(" "));
p.setPostmatchTokens(null);
filt.evaluate(p, null);
print(p.getText() + " pass? " + p.isFilteredOut());
p.setPrematchTokens("this is Mr. ".split(" "));
p.setPostmatchTokens(" and his wife lives in the city...".split(" "));
filt.evaluate(p, null);
print(p.getText() + " pass? " + p.isFilteredOut());
} catch (ConfigException e) {
// TODO Auto-generated catch block
e.printStackTrace();
fail("Configuration problem -- set CLASSPATH to include ./conf");
}
}
Aggregations