Search in sources :

Example 6 with GeocoordMatch

use of org.opensextant.extractors.xcoord.GeocoordMatch in project Xponents by OpenSextant.

the class GeoTaggerMapper method match2JSON.

/**
     * Convert a TextMatch (Place, Taxon, Pattern, etc.) and convert to JSON.
     * This outputs only geocoding objects -- PlaceCandidate or GeocoordMatch. 
     * Attributes reported are gazetteer metadata, precision, country code, confidence of match, etc.
     * 
     * @param tm
     * @return
     */
public static final JSONObject match2JSON(TextMatch tm) {
    JSONObject j = prepareOutput(tm);
    if (tm instanceof PlaceCandidate) {
        PlaceCandidate candidate = (PlaceCandidate) tm;
        if (candidate.isCountry) {
            j.put("type", "country");
        } else {
            j.put("type", "place");
        }
        /* Geotagging can be noisy -- accept only highest confidence matches.
             * 
             */
        if (candidate.getFirstChoice() != null) {
            addPlaceData(candidate.getFirstChoice(), j);
            j.put("confidence", candidate.getConfidence());
            if (candidate.getSecondChoice() != null) {
                JSONObject alt = new JSONObject();
                addPlaceData(candidate.getSecondChoice(), alt);
                alt.put("score", candidate.getSecondChoice().getScore());
                j.put("alt-place", alt);
            }
        }
    } else if (tm instanceof GeocoordMatch) {
        /* Geocoding coordinates can also be noisy.  Accept only high precision matches.
             * E.g., +/- 10KM
             */
        GeocoordMatch geo = (GeocoordMatch) tm;
        addPlaceData(geo, j);
        j.put("type", "coordinate");
        j.put("method", geo.getMethod());
        j.put("confidence", geo.getConfidence());
        if (geo.getRelatedPlace() != null) {
            JSONObject alt = new JSONObject();
            addPlaceData(geo.getRelatedPlace(), alt);
            j.put("related-place", alt);
        }
    }
    return j;
}
Also used : GeocoordMatch(org.opensextant.extractors.xcoord.GeocoordMatch) JSONObject(net.sf.json.JSONObject) PlaceCandidate(org.opensextant.extractors.geo.PlaceCandidate)

Example 7 with GeocoordMatch

use of org.opensextant.extractors.xcoord.GeocoordMatch in project Xponents by OpenSextant.

the class TestXCoord method focusedTests.

/**
     * Use for limited developmen testing.
     */
protected void focusedTests() {
    log.info("=== ADHOC TESTS ===");
    log.info("Trying some specific DD tests now:\n=========================");
    xcoord.match_DD(true);
    xcoord.match_DMS(true);
    xcoord.match_DM(true);
    xcoord.match_MGRS(false);
    xcoord.match_UTM(false);
    TextMatchResult results = null;
    // = xcoord.extract_coordinates("text before " + "17S 699999 3335554" + " and after", "UTM");
    boolean dd = true;
    boolean dms = true;
    boolean dm = true;
    boolean mgrs = false;
    boolean utm = false;
    //
    xcoord.match_MGRS(mgrs);
    String[] mgrstest = { "1 FEB 2013", "12 GMT 18", "12 ctf 4000", "04\nSMB800999", "12\nDTF\r7070", "12\rDTF\r7070", "12\n\rDTF\r7070", "7MAR13 1600", "17MAR13 1600", "17MAR13 2014", "17MAY13 2014", "17JUN13 2014", "17JUL13 2014", "17SEP13 2014", "17OCT13 2014", "17NOV13 2014", "17DEC13 2014", "17APR13 2014", "17AUG13 2014", "17JAN13 2014", "7JAN13 2001", "17 JAN 13 2014", "7 JAN 13 2001", // Fail -- too much whitespace.
    "04RAA80099\n\t1", // edge case, bare minimum.
    "12FTF82711", // edge case, bare minimum.
    "15 EST 2008", // edge case, bare minimum.
    "14 MRE\n\n 1445", // edge case, bare minimum.
    "4 jul 2008", // edge case, bare minimum.
    "10 Jan 1994", // edge case, bare minimum.
    "10 Jan 13", // no, this is the real bare minimum.
    "10 Jan 94", // 0-padded Northing/Easting?  7 4 or 0007 0004
    "38SMB 461136560", // 0-padded Northing/Easting?  7 4 or 0007 0004
    "38SMB 461103656", // 0-padded Northing/Easting?  7 4 or 0007 0004
    "38SMB 46110 3656", // 0-padded Northing/Easting?  7 4 or 0007 0004
    "38SMB 4611 03656", // even, but whitespace
    "38SMB 46110365 60", // even, but whitespace
    "38SMB 46110365\n60", // odd, and whitespace
    "38SMB 4611035\n60", // MGRS 01, 10JAN 200502
    "38 SMB 4611 3656", // MGRS 01, 10JAN 200502
    "42 RPR 4611 3656", // MGRS 01, 10JAN 200502
    "10 Jan 2005 02", "10 Jan 1995 02" };
    xcoord.match_DD(dd);
    String[] ddtest = { "N 49°2' 0'' / E 38°22' 0''", "1.718114°  44.699603°", "N34.445566° W078.112233°", "00 N 130 WA", "xxxxxxxxxxxxx-385331-17004121.1466dc9989b3545553c65ef91c14c0f3yyyyyyyyyyyyyyyyyyy", "-385331-17004121", "CAN-385331-17004121", "15S5E", //DD04
    "TARGET [1]  LATITUDE: +32.3345  LONGITUDE: -179.3412", //DD04
    "TARGET [1]  LATITUDE= +32.3345  LONGITUDE= -179.3412", "42.3N; 102.4W", "42.3 N; 102.4 W", "23.34N 88.22E", // DD01
    "N32.3345:W179.3412", // DD03
    "+32.3345:-179.3412", // DD03
    " 32.3345:-179.3412", // DD03
    " 32.3345°;-179.3412°", // DD03  leading 0 on lat;
    "032.3345°;-179.3412°", // DD01
    "N32.3345:W179.3412", // DD03  leading 0 on lat;
    "032.3345°N;-179.3412°W", // DD01
    "N32.3345:E179.3412", // DD02
    "32.3345N/179.3412E", // DD02
    "32.33N 179.34E" };
    xcoord.match_DMS(dms);
    xcoord.match_DM(dm);
    String[] dmtest = { "N 49°2' 0'' / E 38°22' 0''", "xxxxxxxxxxxxx-385331-17004121.1466dc9989b3545553c65ef91c14c0f3yyyyyyyyyyyyyyyyyyy", "-385331-17004121", "41º58'46\"N, 87º54'20\"W ", "Latitude: 41º58'46\"N, Longitude: 87º54'20\"W ", "15S5E", //"01-02-03-04 005-06-07-08",           
    " 79.22.333N, 100.22.333W", " N 01° 44' E 101° 22'", "+42 18.0 x -102 24.0", "42 DEG 18.0N 102 DEG 24.0W", "#TEST   DM      01b      01DEG 44 N 101DEG 44 E", "03bv  4218N 10224W", "03bv      42°18'N 102°24'W", "03bv      42° 18'N 102° 24'W", "N 01° 44' E 101° 22'", "1122N-00 11122W-00", "01DEG 44N 101DEG 44E", "42 9-00 N 102 6-00W", "N42 18-00 x W102 24-00", "N01° 44' 55.5\" E101° 22' 33.0\"", "N 01° 44' 55\" E 101° 22'33.0\"", "33-04-05 12:11:10", "31°24' 70°21'", // No HEMI
    "40°55'23.2\" 9°43'51.1\"", // with HEMI
    "-40°55'23.2\" +9°43'51.1\"", "42 9-00 N 102 6-00W;           ", "42 18-009 N 102 24-009W;        ", // No HEMI
    "08°29.067' 13°14.067'", "08°29.067'N 13°14.067'W", "08°29.067'N 113°14.067'W", "40°55'23.2\"N 9°43'51\"E", "42° 18' 00\" 102° 24' 00", "(42° 18' 00\" 102° 24' 00", "01° 44' 55.5\" 101° 22' 33.0\"", "77°55'33.22\"N 127°33'22.11\"W", "40:26:46.123N,79:56:55.000W", "43-04-30.2720N 073-34-58.4170W", "31 53 45.55N 54 16 38.99E", "42.18.009N x 102.24.003W", "42.18.009N 102.24.003W", "42.18.009 N x 102.24.003 W", "014455666N1012233444E", "N7922333W10022333", "01°44'55.5\"N 101°22'33.0\"E;", "N01°44'55.5\" E101°22'33.0\"", "4025131234N 12015191234W", // original
    "5113N 00425E", // original
    "27° 37' 45’’N, 82° 42' 10’’W", // single second hash sym
    "27° 37' 45’N, 82° 42' 10’W", // no lat/lon sep
    "27° 37' 45’’N 82° 42' 10’’W", // no min hash.
    "27° 37 45N, 82° 42 10W" };
    String[] utm_tests = { "12\n\t\nX\t\n245070175", "12\n\nX\n266070175", "12 X 266070175", "12X 266070 175" };
    xcoord.match_UTM(utm);
    int count = 0;
    List<String> tests = new ArrayList<String>();
    if (utm) {
        tests.addAll(Arrays.asList(utm_tests));
    }
    if (dd) {
        tests.addAll(Arrays.asList(ddtest));
    }
    if (dms || dm) {
        tests.addAll(Arrays.asList(dmtest));
    }
    if (mgrs) {
        tests.addAll(Arrays.asList(mgrstest));
    }
    for (String testcase : tests) {
        ++count;
        String test_id = "" + count;
        results = xcoord.extract_coordinates("text before " + testcase + " and after", test_id);
        log.info("TEST (" + count + ") " + testcase + " FOUND:" + (results.matches.isEmpty() ? "NOTHING" : results.matches.size()));
        if (results.matches != null) {
            for (TextMatch m : results.matches) {
                log.info("\t" + m.toString());
                GeocoordMatch g = (GeocoordMatch) m;
                log.info("\t" + g.formatLatitude() + ", " + g.formatLongitude());
            }
        }
    }
    log.info("=== ADHOC TESTS DONE ===");
}
Also used : GeocoordMatch(org.opensextant.extractors.xcoord.GeocoordMatch) TextMatch(org.opensextant.extraction.TextMatch) TextMatchResult(org.opensextant.extractors.flexpat.TextMatchResult)

Aggregations

GeocoordMatch (org.opensextant.extractors.xcoord.GeocoordMatch)7 TextMatch (org.opensextant.extraction.TextMatch)5 PlaceCandidate (org.opensextant.extractors.geo.PlaceCandidate)4 Place (org.opensextant.data.Place)3 JSONObject (org.json.JSONObject)2 TaxonMatch (org.opensextant.extractors.xtax.TaxonMatch)2 TreeSet (java.util.TreeSet)1 JSONObject (net.sf.json.JSONObject)1 JSONArray (org.json.JSONArray)1 JSONException (org.json.JSONException)1 Taxon (org.opensextant.data.Taxon)1 TextMatchResult (org.opensextant.extractors.flexpat.TextMatchResult)1 ScoredPlace (org.opensextant.extractors.geo.ScoredPlace)1 JsonRepresentation (org.restlet.ext.json.JsonRepresentation)1 Representation (org.restlet.representation.Representation)1