Search in sources :

Example 1 with PlaceEvidence

use of org.opensextant.extractors.geo.PlaceEvidence in project Xponents by OpenSextant.

the class MajorPlaceRule method evaluate.

/**
     * attach either a Capital or Admin region ID, giving it some weight based on various properties or context.
     */
@Override
public void evaluate(final PlaceCandidate name, final Place geo) {
    PlaceEvidence ev = null;
    if (geo.isNationalCapital()) {
        // IFF no countries are mentioned, Capitals are good proxies for country.
        inferCountry(geo);
        ev = new PlaceEvidence(geo, CAPITAL, weight(weight + 2, geo));
    } else if (geo.isAdmin1()) {
        ev = new PlaceEvidence(geo, ADMIN, weight(weight, geo));
        inferBoundary(geo);
    } else if (popStats != null && geo.isPopulated()) {
        String gh = geohash(geo);
        geo.setGeohash(gh);
        String prefix = gh.substring(0, GEOHASH_RESOLUTION);
        if (popStats.containsKey(prefix)) {
            int pop = popStats.get(prefix);
            if (pop > POP_MIN) {
                geo.setPopulation(pop);
                // 
                // Natural log gives a better, slower curve for population weights.
                // ln(POP_MIN=25000) = 10.1
                // 
                // ln(22,000) = 0.0     wt=0  e^10 = 22,000
                // ln(60,000) = 11.x    wt=1
                // ln(165,000) = 12.x   wt=2
                // ln(444,000) = 13.x   wt=3       
                // Etc.
                // And to make scale even more gradual, wt - 1  or wt/2, wt/3
                // These population stats cannot overtake all other rules entirely.
                // 
                int wt = (int) ((Math.log(geo.getPopulation()) - 10)) / 3;
                ev = new PlaceEvidence(geo, POP, weight(wt, geo));
            }
        }
    }
    if (ev != null) {
        ev.setEvaluated(true);
        name.addEvidence(ev);
        name.incrementPlaceScore(geo, ev.getWeight() * 0.1);
    }
}
Also used : PlaceEvidence(org.opensextant.extractors.geo.PlaceEvidence)

Example 2 with PlaceEvidence

use of org.opensextant.extractors.geo.PlaceEvidence in project Xponents by OpenSextant.

the class NameCodeRule method evaluate.

/**
     * Requirement: List of place candidate is a linked list.
     */
@Override
public void evaluate(final List<PlaceCandidate> names) {
    for (int x = 0; x < names.size() - 1; ++x) {
        PlaceCandidate name = names.get(x);
        PlaceCandidate code = names.get(x + 1);
        if (name.isFilteredOut() || code.isFilteredOut()) {
            continue;
        }
        /*
             * COUNTRY, STATE is not supported under this rule.
             * E.g., Uruguay, Argentina ... This looks like a list of countries
             * However Uruguay is a district in Argentina; Just as Georgia is a state in US
             * and also a country name.
             */
        if (name.isCountry) {
            continue;
        }
        /*
             * Test if SOMENAME, CODE is the case. a1.....a2.b1.., where b1 > a2
             * > a1, but distance is minimal from end of name to start of code.
             *
             */
        if ((code.start - name.end) > MAX_CHAR_DIST) {
            continue;
        }
        /*
             * Not supporting lowercase codes/abbreviations.  'la', 'is', 'un', etc.
             */
        if (code.isLower() && code.getText().length() < 4) {
            continue;
        }
        boolean comma = false;
        if (name.getPostmatchTokens() != null) {
            // Proximity is one factor, but conventional format should weigh more.
            if (",".equals(name.getPostmatchTokens()[0])) {
                comma = true;
            }
        }
        /*
             * by this point a place name tag should be marked as a name or
             * code/abbrev. Match the abbreviation with a geographic location
             * that is a state, county, district, etc.
             */
        Place country = code.isCountry ? code.getChosen() : null;
        log.debug("{} name, code: {} in {}?", NAME, name.getText(), code.getText());
        for (Place geo : code.getPlaces()) {
            if (!geo.isAdministrative() || geo.getCountryCode() == null) {
                continue;
            }
            // Provinces, states, districts, etc. Only. 
            //
            // Make sure you can match an province name or code with the gazetteer entries found:
            //   Boston, Ma.  ==== for 'Ma', resolve to an abbreviation for Massachusetts
            //                     Ignore places called 'Ma'
            // 
            // Place ('Ma') == will have gazetteer metadata indicating if this is a valid abbreviated code for a place. 
            // PlaceCandidate('Ma.') will have textual metadata from given text indicating if it is a code, MA, or abbrev. 'Ma.'
            // 
            // These two situations must match here.   We ignore geo locations that do not fit this profile.
            // 
            boolean lexicalMatch = ((code.isAbbreviation && geo.isAbbreviation()) || (!code.isAbbreviation && !geo.isAbbreviation()));
            // 
            if (!lexicalMatch) {
                continue;
            }
            String adm1 = geo.getHierarchicalPath();
            if (adm1 == null && !code.isCountry) {
                log.debug("ADM1 hierarchical path should not be null");
                continue;
            }
            // Quick determination if these two places have a containment or geopolitical connection
            //                 
            boolean contains = name.presentInHierarchy(adm1) || (country != null ? name.presentInCountry(country.getCountryCode()) : false);
            if (!contains) {
                continue;
            }
            /*   CITY, STATE
                 *   CITY, COUNTRY
                 */
            // Associate the CODE to the NAME that precedes it.
            // 
            PlaceEvidence ev = new PlaceEvidence();
            ev.setCountryCode(geo.getCountryCode());
            ev.setAdmin1(geo.getAdmin1());
            // Shunt. Evaluate this rule here.
            ev.setEvaluated(true);
            int wt = weight + (comma ? 2 : 0);
            if (geo.isAbbreviation() && (code.isAbbreviation || code.isAcronym)) {
                ev.setRule(NAME_ADMCODE_RULE);
                ev.setWeight(wt + 1);
            } else {
                ev.setRule(NAME_ADMNAME_RULE);
                ev.setWeight(wt);
            }
            name.addEvidence(ev);
            if (boundaryObserver != null) {
                boundaryObserver.boundaryLevel1InScope(geo);
            }
            // 
            for (Place nameGeo : name.getPlaces()) {
                if (!(nameGeo.isPopulated() || nameGeo.isAdministrative() || nameGeo.isSpot())) {
                    continue;
                }
                if (adm1 != null && adm1.equals(nameGeo.getHierarchicalPath())) {
                    name.incrementPlaceScore(nameGeo, ev.getWeight());
                } else if (sameCountry(nameGeo, country)) {
                    name.incrementPlaceScore(nameGeo, ev.getWeight());
                }
            }
        }
    }
}
Also used : Place(org.opensextant.data.Place) PlaceEvidence(org.opensextant.extractors.geo.PlaceEvidence) PlaceCandidate(org.opensextant.extractors.geo.PlaceCandidate)

Example 3 with PlaceEvidence

use of org.opensextant.extractors.geo.PlaceEvidence in project Xponents by OpenSextant.

the class LocationChooserRule method evaluate.

/**
     * Yet unchosen location.
     * Consider given evidence first, creating some weight there,
     * then introducing innate properties of possible locations, thereby amplifying the
     * differences in the candidates.
     * 
     */
@Override
public void evaluate(PlaceCandidate name, Place geo) {
    if (boundaryContext.isEmpty() && countryContext.isEmpty()) {
        return;
    }
    double countryScalar = 1.0;
    CountryCount ccnt = countryContext.get(geo.getCountryCode());
    if (ccnt != null) {
        countryScalar = GLOBAL_POINTS * ccnt.getRatio();
    }
    // This is inferred stuff from the document at large.
    if (geo.getHierarchicalPath() != null && boundaryContext.containsKey(geo.getHierarchicalPath())) {
        name.incrementPlaceScore(geo, countryScalar * ADMIN_CONTAINS_PLACE_WT);
    } else if (countryContext.containsKey(geo.getCountryCode())) {
        name.incrementPlaceScore(geo, countryScalar * COUNTRY_CONTAINS_PLACE_WT);
    }
    // 
    for (PlaceEvidence ev : name.getEvidence()) {
        if (ev.wasEvaluated()) {
            continue;
        }
        ev.defaultHierarchicalPath();
        // Evaluate evidence
        if ((ev.getAdmin1() != null && geo.getAdmin1() != null)) {
            if (geo.getHierarchicalPath().equals(ev.getHierarchicalPath())) {
                name.incrementPlaceScore(geo, ADMIN_CONTAINS_PLACE_WT);
            }
        } else {
            if (geo.getCountryCode().equals(ev.getCountryCode())) {
                name.incrementPlaceScore(geo, COUNTRY_CONTAINS_PLACE_WT);
            }
        }
        ev.setEvaluated(true);
        log.debug("\tEvidence: {} {}", ev, ev.getAdmin1());
    }
}
Also used : CountryCount(org.opensextant.extractors.geo.CountryCount) PlaceEvidence(org.opensextant.extractors.geo.PlaceEvidence)

Aggregations

PlaceEvidence (org.opensextant.extractors.geo.PlaceEvidence)3 Place (org.opensextant.data.Place)1 CountryCount (org.opensextant.extractors.geo.CountryCount)1 PlaceCandidate (org.opensextant.extractors.geo.PlaceCandidate)1