Search in sources :

Example 11 with PlaceCandidate

use of org.opensextant.extractors.geo.PlaceCandidate in project Xponents by OpenSextant.

the class GeoTaggerMapper method match2JSON.

/**
     * Convert a TextMatch (Place, Taxon, Pattern, etc.) and convert to JSON.
     * This outputs only geocoding objects -- PlaceCandidate or GeocoordMatch. 
     * Attributes reported are gazetteer metadata, precision, country code, confidence of match, etc.
     * 
     * @param tm
     * @return
     */
public static final JSONObject match2JSON(TextMatch tm) {
    JSONObject j = prepareOutput(tm);
    if (tm instanceof PlaceCandidate) {
        PlaceCandidate candidate = (PlaceCandidate) tm;
        if (candidate.isCountry) {
            j.put("type", "country");
        } else {
            j.put("type", "place");
        }
        /* Geotagging can be noisy -- accept only highest confidence matches.
             * 
             */
        if (candidate.getFirstChoice() != null) {
            addPlaceData(candidate.getFirstChoice(), j);
            j.put("confidence", candidate.getConfidence());
            if (candidate.getSecondChoice() != null) {
                JSONObject alt = new JSONObject();
                addPlaceData(candidate.getSecondChoice(), alt);
                alt.put("score", candidate.getSecondChoice().getScore());
                j.put("alt-place", alt);
            }
        }
    } else if (tm instanceof GeocoordMatch) {
        /* Geocoding coordinates can also be noisy.  Accept only high precision matches.
             * E.g., +/- 10KM
             */
        GeocoordMatch geo = (GeocoordMatch) tm;
        addPlaceData(geo, j);
        j.put("type", "coordinate");
        j.put("method", geo.getMethod());
        j.put("confidence", geo.getConfidence());
        if (geo.getRelatedPlace() != null) {
            JSONObject alt = new JSONObject();
            addPlaceData(geo.getRelatedPlace(), alt);
            j.put("related-place", alt);
        }
    }
    return j;
}
Also used : GeocoordMatch(org.opensextant.extractors.xcoord.GeocoordMatch) JSONObject(net.sf.json.JSONObject) PlaceCandidate(org.opensextant.extractors.geo.PlaceCandidate)

Example 12 with PlaceCandidate

use of org.opensextant.extractors.geo.PlaceCandidate in project Xponents by OpenSextant.

the class TestGazMatcher method main.

/**
     * Do a basic test. Requirements include setting opensextant.solr to solr
     * core home. (Xponents/solr, by default) USAGE:
     * 
     * TestGazMatcher file
     * 
     * Prints: all matched, filtered place mentions distinct places distinct
     * countries
     */
public static void main(String[] args) throws Exception {
    GazetteerMatcher sm = new GazetteerMatcher(true);
    URL filterFile = TestGazMatcher.class.getResource("/test-filter.txt");
    if (filterFile == null) {
        System.err.println("This test requires a 'test-filter.txt' file with non-place names in it." + "\nThese filters should match up with your test documents");
    }
    MatchFilter filt = new MatchFilter(filterFile);
    sm.setMatchFilter(filt);
    try {
        String docContent = "We drove to Sin City. The we drove to -$IN ĆITŸ .";
        System.out.println(docContent);
        List<PlaceCandidate> matches = sm.tagText(docContent, "main-test");
        for (PlaceCandidate pc : matches) {
            printGeoTags(pc);
        }
        docContent = "Is there some city in 刘家埝 written in Chinese?";
        matches = sm.tagCJKText(docContent, "main-test");
        for (PlaceCandidate pc : matches) {
            printGeoTags(pc);
        }
        docContent = "Where is seoul?";
        matches = sm.tagText(docContent, "main-test");
        for (PlaceCandidate pc : matches) {
            printGeoTags(pc);
        }
        String buf = FileUtility.readFile(args[0]);
        matches = sm.tagText(buf, "main-test", true);
        summarizeFindings(copyFrom(matches));
    } catch (Exception err) {
        err.printStackTrace();
    } finally {
        sm.shutdown();
    }
}
Also used : GazetteerMatcher(org.opensextant.extractors.geo.GazetteerMatcher) MatchFilter(org.opensextant.extraction.MatchFilter) URL(java.net.URL) PlaceCandidate(org.opensextant.extractors.geo.PlaceCandidate)

Example 13 with PlaceCandidate

use of org.opensextant.extractors.geo.PlaceCandidate in project Xponents by OpenSextant.

the class TestPersonFilter method test.

@Test
public void test() {
    // Set classpath to point to ./gazetteer/conf
    URL p1 = PlaceGeocoder.class.getResource("/filters/person-name-filter.txt");
    URL p2 = PlaceGeocoder.class.getResource("/filters/person-title-filter.txt");
    URL p3 = PlaceGeocoder.class.getResource("/filters/person-suffix-filter.txt");
    try {
        PersonNameFilter filt = new PersonNameFilter(p1, p2, p3);
        PlaceCandidate p = new PlaceCandidate();
        p.setText("John Doe");
        p.setPrematchTokens(null);
        p.setPostmatchTokens(null);
        filt.evaluate(p, null);
        print(p.getText() + " pass? " + p.isFilteredOut());
        p.setPrematchTokens("             ".split(" "));
        p.setPostmatchTokens("             ".split(" "));
        filt.evaluate(p, null);
        print(p.getText() + " pass? " + p.isFilteredOut());
        p.setPrematchTokens("this is Mr. ".split(" "));
        p.setPostmatchTokens(null);
        filt.evaluate(p, null);
        print(p.getText() + " pass? " + p.isFilteredOut());
        p.setPrematchTokens("this is Mr. ".split(" "));
        p.setPostmatchTokens(" and his wife lives in the city...".split(" "));
        filt.evaluate(p, null);
        print(p.getText() + " pass? " + p.isFilteredOut());
    } catch (ConfigException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        fail("Configuration problem -- set CLASSPATH to include ./conf");
    }
}
Also used : ConfigException(org.opensextant.ConfigException) PersonNameFilter(org.opensextant.extractors.geo.rules.PersonNameFilter) URL(java.net.URL) PlaceCandidate(org.opensextant.extractors.geo.PlaceCandidate) Test(org.junit.Test)

Aggregations

PlaceCandidate (org.opensextant.extractors.geo.PlaceCandidate)13 Place (org.opensextant.data.Place)8 GeocoordMatch (org.opensextant.extractors.xcoord.GeocoordMatch)4 TextMatch (org.opensextant.extraction.TextMatch)3 TaxonMatch (org.opensextant.extractors.xtax.TaxonMatch)3 URL (java.net.URL)2 JSONObject (org.json.JSONObject)2 HashSet (java.util.HashSet)1 TreeSet (java.util.TreeSet)1 JSONObject (net.sf.json.JSONObject)1 JSONArray (org.json.JSONArray)1 JSONException (org.json.JSONException)1 Test (org.junit.Test)1 ConfigException (org.opensextant.ConfigException)1 Country (org.opensextant.data.Country)1 Taxon (org.opensextant.data.Taxon)1 MatchFilter (org.opensextant.extraction.MatchFilter)1 CountryCount (org.opensextant.extractors.geo.CountryCount)1 GazetteerMatcher (org.opensextant.extractors.geo.GazetteerMatcher)1 PlaceCount (org.opensextant.extractors.geo.PlaceCount)1