use of org.opensextant.extractors.xcoord.GeocoordMatch in project Xponents by OpenSextant.
the class Transforms method parseAnnotation.
/**
* Convert JSON object for an annotation into a Xponents TextMatch instance.
* Parsing data from JSON/REST representations has very limited capability compared to
* using Java API for processing routines directly.
*
* @param data
* @return
* @throws JSONException
*/
public static TextMatch parseAnnotation(Object data) throws JSONException {
if (!(data instanceof JSONObject)) {
return null;
}
TextMatch m = null;
JSONObject a = (JSONObject) data;
TaxonMatch x = null;
String typ = a.getString("type");
String text = a.getString("matchtext");
switch(typ) {
case "place":
PlaceCandidate placeMatch = new PlaceCandidate();
Place geo = new Place();
placeMatch.setText(text);
Transforms.parseGeocoding(geo, a);
placeMatch.setConfidence(a.optInt("confidence", -1));
placeMatch.choose(geo);
m = placeMatch;
break;
case "coordinate":
GeocoordMatch coord = new GeocoordMatch();
Place coordLoc = new Place();
coord.setText(text);
// How awful:.... need to parse Coord directly
Transforms.parseGeocoding(coordLoc, a);
coord.setLatLon(coordLoc);
coord.setMethod(coordLoc.getMethod());
/* TODO: GeocoordMatch needs to support setters for Geocoding here.
* missing reverse geo info
*
* cc, adm1
*
*/
m = coord;
break;
case "country":
PlaceCandidate countryMatch = new PlaceCandidate();
Place cc = new Place();
countryMatch.setText(text);
cc.setName(text);
countryMatch.setConfidence(a.optInt("confidence", -1));
cc.setCountryCode(a.getString("cc"));
countryMatch.isCountry = true;
countryMatch.choose(cc);
m = countryMatch;
break;
case "person":
x = new TaxonMatch();
Transforms.parseTaxon(x, "person", a);
m = x;
break;
case "org":
x = new TaxonMatch();
Transforms.parseTaxon(x, "org", a);
m = x;
break;
case "taxon":
x = new TaxonMatch();
Transforms.parseTaxon(x, "taxon", a);
m = x;
break;
default:
throw new JSONException("Unknown Annotation " + typ);
}
m.setType(typ);
m.start = a.getInt("offset");
m.end = m.start + a.getInt("length");
return m;
}
use of org.opensextant.extractors.xcoord.GeocoordMatch in project Xponents by OpenSextant.
the class XponentsGeotagger method format.
private Representation format(List<TextMatch> matches, RequestParameters jobParams) throws JSONException {
Representation result = null;
int tagCount = 0;
JSONObject resultContent = new JSONObject();
JSONObject resultMeta = new JSONObject();
resultMeta.put("status", "ok");
resultMeta.put("numfound", 0);
JSONArray resultArray = new JSONArray();
/*
* Super loop: Iterate through all found entities. record Taxons as
* person or orgs record Geo tags as country, place, or geo. geo =
* geocoded place or parsed coordinate (MGRS, DMS, etc)
*
*/
for (TextMatch name : matches) {
/*
* ==========================
* ANNOTATIONS: non-geographic entities that are filtered out, but worth tracking
* ==========================
*/
if (name instanceof TaxonMatch) {
if (jobParams.output_taxons) {
TaxonMatch match = (TaxonMatch) name;
++tagCount;
for (Taxon n : match.getTaxons()) {
JSONObject node = populateMatch(name);
String t = "taxon";
String taxon_name = n.name.toLowerCase();
if (taxon_name.startsWith("org.")) {
t = "org";
} else if (taxon_name.startsWith("person.")) {
t = "person";
}
node.put("type", t);
// Name of taxon
node.put("taxon", n.name);
// Name of catalog or source
node.put("catalog", n.catalog);
// node.put("filtered-out", true);
resultArray.put(node);
break;
}
}
continue;
}
// Ignore non-place tags
if (name.isFilteredOut() || !(name instanceof PlaceCandidate || name instanceof GeocoordMatch)) {
continue;
}
JSONObject node = populateMatch(name);
/*
* ==========================
* ANNOTATIONS: coordinates
* ==========================
*/
if (name instanceof GeocoordMatch) {
++tagCount;
GeocoordMatch geo = (GeocoordMatch) name;
node.put("type", "coordinate");
Transforms.createGeocoding(geo, node);
resultArray.put(node);
continue;
}
if (name.isFilteredOut()) {
debug("Filtered out " + name.getText());
continue;
}
PlaceCandidate place = (PlaceCandidate) name;
Place resolvedPlace = place.getChosen();
/*
* ==========================
* ANNOTATIONS: countries, places, etc.
* ==========================
*/
/*
* Accept all country names as potential geotags Else if name can be
* filtered out, do it now. Otherwise it is a valid place name to
* consider
*/
++tagCount;
if (place.isCountry) {
node.put("name", resolvedPlace.getPlaceName());
node.put("type", "country");
node.put("cc", resolvedPlace.getCountryCode());
node.put("confidence", place.getConfidence());
} else {
/*
* Conf = 20 or greater to be geocoded.
*/
Transforms.createGeocoding(resolvedPlace, node);
node.put("name", resolvedPlace.getPlaceName());
node.put("type", "place");
node.put("confidence", place.getConfidence());
if (place.getConfidence() <= 10) {
node.put("filtered-out", true);
}
}
resultArray.put(node);
}
resultMeta.put("numfound", tagCount);
resultContent.put("response", resultMeta);
resultContent.put("annotations", resultArray);
result = new JsonRepresentation(resultContent.toString(2));
result.setCharacterSet(CharacterSet.UTF_8);
return result;
}
use of org.opensextant.extractors.xcoord.GeocoordMatch in project Xponents by OpenSextant.
the class TestXCoordReporter method save_result.
/**
* Coordinate Test/Eval format
*
*
* Result ID, CCE family, pattern ID, status, message // Reason for failure
* Result ID, CCE family, pattern ID, status, Match ID, matchtext, lat, lon
* etc. // Success implied by match
*
* @TODO: use TestCase here or rely on truth evaluation in Python
* GeocoderEval?
* @param t
* @param results
* @throws IOException
*/
public void save_result(GeocoordTestCase t, TextMatchResult results) throws IOException {
Map<String, Object> row = null;
if (!results.matches.isEmpty()) {
//
for (TextMatch tm : results.matches) {
GeocoordMatch m = (GeocoordMatch) tm;
if (!full_report && (m.is_submatch || m.is_duplicate)) {
// Ignore submatches and duplicates
continue;
}
row = createTestCase(t);
row.put(header[6], results.result_id);
row.put(header[7], (full_report & m.is_submatch) ? "IGNORE" : "PASS");
String msg = results.message;
if (m.is_submatch) {
msg += "; Is Submatch";
}
row.put(header[8], msg);
row.put(header[9], XConstants.get_CCE_family(m.cce_family_id));
row.put(header[10], m.pattern_id);
row.put(header[11], m.getText());
row.put(header[12], "" + m.formatLatitude());
row.put(header[13], "" + m.formatLongitude());
String mgrs = "";
try {
mgrs = m.toMGRS();
} catch (Exception err) {
}
row.put(header[14], mgrs);
row.put(header[15], m.formatPrecision());
row.put(header[16], new Long(m.start));
report.write(row, header, xcoordResultsSpec);
}
} else {
row = createTestCase(t);
row.put(header[6], results.result_id);
boolean expected_failure = false;
if (t != null) {
expected_failure = !t.true_positive;
} else {
// If the match message contains a test payload from the test cases
//
String test_status = results.get_trace().toUpperCase();
expected_failure = test_status.contains("FAIL");
}
// True Negative -- you ignored one correctly
row.put(header[7], expected_failure ? "PASS" : "FAIL");
row.put(header[8], results.get_trace());
report.write(row, header, xcoordResultsSpec);
}
}
use of org.opensextant.extractors.xcoord.GeocoordMatch in project Xponents by OpenSextant.
the class TestGazMatcher method summarizeFindings.
public static void summarizeFindings(List<TextMatch> matches) {
Set<String> placeNames = new TreeSet<>();
Set<String> countryNames = new TreeSet<>();
Set<String> coordinates = new TreeSet<>();
System.out.println("MENTIONS ALL == " + matches.size());
for (TextMatch tm : matches) {
printGeoTags(tm);
if (tm instanceof PlaceCandidate) {
PlaceCandidate p = (PlaceCandidate) tm;
if (tm.isFilteredOut()) {
print("Filtered Out. Rules = " + p.getRules());
continue;
}
if (!p.getRules().isEmpty()) {
print("Rules = " + p.getRules());
}
if (p.isCountry) {
countryNames.add(p.getText());
} else if (p.getChosen() != null) {
print(String.format("\tgeocoded @ %s with conf=%d", p.getChosen(), p.getConfidence()));
ScoredPlace alt = p.getSecondChoice();
if (alt != null) {
print(String.format("\tgeocoded @ %s second place", alt));
}
placeNames.add(p.getText());
} else {
placeNames.add(p.getText());
}
} else if (tm.isFilteredOut()) {
System.out.println("\t(filtered out: " + tm.getText() + ")");
continue;
}
if (tm instanceof GeocoordMatch) {
GeocoordMatch geo = (GeocoordMatch) tm;
coordinates.add(geo.getText());
if (geo.getRelatedPlace() != null) {
System.out.println("Coordinate at place named " + geo.getRelatedPlace());
}
}
}
System.out.println("MENTIONS DISTINCT PLACES == " + placeNames.size());
System.out.println(placeNames);
System.out.println("MENTIONS COUNTRIES == " + countryNames.size());
System.out.println(countryNames);
System.out.println("MENTIONS COORDINATES == " + coordinates.size());
System.out.println(coordinates);
}
use of org.opensextant.extractors.xcoord.GeocoordMatch in project Xponents by OpenSextant.
the class PlaceGeocoder method evaluateCoordinate.
/**
* A method to retrieve one or more distinct admin boundaries containing the
* coordinate. This depends on resolution of gazetteer at hand.
*
* @param g
* geocoordinate found in text.
* @return Place object near the geocoding.
* @throws SolrServerException
* a query against the Solr index may throw a Solr error.
*/
public Place evaluateCoordinate(Geocoding g) throws SolrServerException {
Place found = getGazetteer().placeAt(g, COORDINATE_PROXIMITY_CITY_THRESHOLD, "P");
if (found != null) {
if (g instanceof GeocoordMatch) {
((GeocoordMatch) g).setRelatedPlace(found);
}
return found;
}
found = getGazetteer().placeAt(g, COORDINATE_PROXIMITY_ADM1_THRESHOLD, "A");
if (found != null) {
if (g instanceof GeocoordMatch) {
((GeocoordMatch) g).setRelatedPlace(found);
}
return found;
}
return null;
}
Aggregations