Examples with TextEntity - org.opensextant.extraction.TextEntity

Example 1 with TextEntity

use of org.opensextant.extraction.TextEntity in project Xponents by OpenSextant.

the class TestExtraction method test.

/** */
public void test() {
    TextEntity o1 = new TextEntity();
    o1.end = 15;
    o1.start = 10;
    TextEntity o2 = new TextEntity();
    o2.end = 17;
    o2.start = 11;
    System.out.println(o2.isWithin(o1));
    System.out.println(o2.isSameMatch(o1));
    System.out.println(o2.isOverlap(o1));
}

Also used : TextEntity(org.opensextant.extraction.TextEntity)

Example 2 with TextEntity

use of org.opensextant.extraction.TextEntity in project Xponents by OpenSextant.

the class RegexPatternManager method group_matches.

/**
     * Matched fields as TextEntities
     *
     * @param p the p
     * @param matched the matched
     * @return the map
     */
public Map<String, TextEntity> group_matches(RegexPattern p, java.util.regex.Matcher matched) {
    Map<String, TextEntity> pairs = new HashMap<String, TextEntity>();
    int cnt = matched.groupCount();
    for (int x = 0; x < cnt; ++x) {
        // Put the matcher group in a hash with an appropriate name.
        String nm = p.regex_groups.get(x);
        TextEntity e = new TextEntity();
        e.setText(matched.group(x + 1));
        e.start = matched.start(x + 1);
        pairs.put(nm, e);
    }
    return pairs;
}

Also used : HashMap(java.util.HashMap) TextEntity(org.opensextant.extraction.TextEntity)

Example 3 with TextEntity

use of org.opensextant.extraction.TextEntity in project Xponents by OpenSextant.

the class GeocoordMatch method setSeparator.

/**
     *
     * @param fields
     *            regex fields to search
     */
protected void setSeparator(Map<String, TextEntity> fields) {
    for (String k : separators) {
        TextEntity val = fields.get(k);
        if (val != null) {
            offsetSeparator = val.start;
            separator = val.getText();
            return;
        }
    }
}

Also used : TextEntity(org.opensextant.extraction.TextEntity)

Example 4 with TextEntity

use of org.opensextant.extraction.TextEntity in project Xponents by OpenSextant.

the class GeocoordNormalization method normalize_coordinate.

/**
     * The match object is normalized, setting the coord_text and other data
     * from parsing "text" and knowing which pattern family was matched.
     *
     * @param m match
     * @param groups fields
     * @throws NormalizationException
     */
public static void normalize_coordinate(GeocoordMatch m, Map<String, TextEntity> groups) throws NormalizationException {
    // Hoaky Java 6 issue:  REGEX does not use named groups, so here we map both the value to
    // a text/offset pair (in groups) and provide just the key/text pairs  (_elements)
    //
    Map<String, String> fieldValues = new HashMap<String, String>();
    for (String name : groups.keySet()) {
        TextEntity val = groups.get(name);
        fieldValues.put(name, val.getText());
    }
    //
    if (m.cce_family_id == XConstants.DD_PATTERN) {
        // get lat text
        // lon text -- remove whitespace from both
        // coord_text = lat + ' ' + lon
        // set lat, lon
        //
        // decDegLat, decDegLon, degSym, hemiLat, hemiLon
        //
        DMSOrdinate ddlat = new DMSOrdinate(groups, fieldValues, DMLAT, m.getText());
        DMSOrdinate ddlon = new DMSOrdinate(groups, fieldValues, DMLON, m.getText());
        // Yield a cooridnate-only version of text; "+42.4440 -102.3333"
        // preserving the innate precision given in the original text.
        //
        m.lat_text = ddlat.text;
        m.lon_text = ddlon.text;
        m.setSeparator(groups);
        m.setCoordinate(ddlat, ddlon);
        /**
             * DD filters enabled.
             *
             * To Disable: XCoord.RUNTIME_FLAGS XOR XConstants.DD_FILTERS_ON
             */
        if ((XCoord.RUNTIME_FLAGS & XConstants.DD_FILTERS_ON) > 0) {
            /**
                 * With FILTERS ON if lat/lon have no ALPHA hemisphere, i.e.,
                 * ENSW * and if lat/lon text for match has no COORD symbology
                 * then this is likely not a DD coordinate -- filter out.
                 */
            if (!ddlon.hemisphere.isAlpha() && !ddlat.hemisphere.isAlpha()) {
                if (!ddlat.hasSymbols()) {
                    m.setFilteredOut(true);
                }
            }
        } else {
            // DD filters OFF, so do not filter out
            m.setFilteredOut(!GeodeticUtility.validateCoordinate(m.getLatitude(), m.getLongitude()));
        }
        m.coord_text = m.lat_text + " " + m.lon_text;
    } else if (m.cce_family_id == XConstants.DM_PATTERN) {
        // get lat text
        // lon text -- remove whitespace from both
        // coord_text = lat + ' ' + lon
        // set lat, lon
        //
        DMSOrdinate dmlat = new DMSOrdinate(groups, fieldValues, DMLAT, m.getText());
        DMSOrdinate dmlon = new DMSOrdinate(groups, fieldValues, DMLON, m.getText());
        m.lat_text = dmlat.text;
        m.lon_text = dmlon.text;
        m.setSeparator(groups);
        m.setCoordinate(dmlat, dmlon);
        if (!m.isFilteredOut()) {
            m.setFilteredOut(m.evaluateInvalidDashes());
        }
        m.coord_text = m.lat_text + " " + m.lon_text;
    } else if (m.cce_family_id == XConstants.DMS_PATTERN) {
        // remove whitespace
        // set lat, lon
        //
        DMSOrdinate dmlat = new DMSOrdinate(groups, fieldValues, DMLAT, m.getText());
        DMSOrdinate dmlon = new DMSOrdinate(groups, fieldValues, DMLON, m.getText());
        m.lat_text = dmlat.text;
        m.lon_text = dmlon.text;
        m.setSeparator(groups);
        m.setCoordinate(dmlat, dmlon);
        if (!m.isFilteredOut()) {
            m.setFilteredOut(m.evaluateInvalidDashes());
        }
        m.coord_text = m.lat_text + " " + m.lon_text;
    } else if (m.cce_family_id == XConstants.MGRS_PATTERN) {
        // Capture the normalized coord text just to aid in reporting in
        // error situations
        //
        m.coord_text = TextUtils.delete_whitespace(m.getText());
        // TODO: make use of multiple answers.
        try {
            MGRS[] mgrs_candidates = MGRSParser.parseMGRS(m.getText(), m.coord_text, fieldValues);
            //
            if (mgrs_candidates != null) {
                MGRS mgrs = mgrs_candidates[0];
                m.coord_text = mgrs.toString();
                Geodetic2DPoint pt = mgrs.toGeodetic2DPoint();
                m.setLatitude(pt.getLatitudeAsDegrees());
                m.setLongitude(pt.getLongitudeAsDegrees());
                m.setBalanced(true);
                if (mgrs_candidates.length == 2) {
                    mgrs = mgrs_candidates[1];
                    GeocoordMatch m2 = new GeocoordMatch();
                    m2.copy(m);
                    m2.coord_text = mgrs.toString();
                    pt = mgrs.toGeodetic2DPoint();
                    m2.setLatitude(pt.getLatitudeAsDegrees());
                    m2.setLongitude(pt.getLongitudeAsDegrees());
                    // Really balanced?
                    m2.setBalanced(true);
                    m.addOtherInterpretation(m2);
                }
            }
        } catch (java.lang.IllegalArgumentException parseErr) {
            //.debug("Failed to parse MGRS pattern with text=" + m.getText() + " COORD?:"
            //        + m.coord_text, parseErr);
            // No normalization was possible as this match represents an invalid MGRS value
            //
            m.setFilteredOut(true);
        } catch (Exception err) {
            throw new NormalizationException("Failed to parse MGRS", err);
        }
    } else if (m.cce_family_id == XConstants.UTM_PATTERN) {
        m.coord_text = TextUtils.delete_whitespace(m.getText());
        try {
            UTM utm = UTMParser.parseUTM(m.coord_text, fieldValues);
            if (utm != null) {
                Geodetic2DPoint pt = utm.getGeodetic();
                m.setLatitude(pt.getLatitudeAsDegrees());
                m.setLongitude(pt.getLongitudeAsDegrees());
                m.coord_text = utm.toString();
            }
        } catch (java.lang.IllegalArgumentException parseErr) {
            throw new NormalizationException("Failed to parse UTM pattern with text=" + m.getText() + " COORD?:" + m.coord_text, parseErr);
        // No normalization done.
        } catch (Exception err) {
            throw new NormalizationException("Failed to parse UTM pattern", err);
        }
    }
}

Also used : HashMap(java.util.HashMap) TextEntity(org.opensextant.extraction.TextEntity) NormalizationException(org.opensextant.extraction.NormalizationException) UTM(org.opensextant.geodesy.UTM) NormalizationException(org.opensextant.extraction.NormalizationException) Geodetic2DPoint(org.opensextant.geodesy.Geodetic2DPoint) MGRS(org.opensextant.geodesy.MGRS)

Aggregations

TextEntity (org.opensextant.extraction.TextEntity)4 HashMap (java.util.HashMap)2 NormalizationException (org.opensextant.extraction.NormalizationException)1 Geodetic2DPoint (org.opensextant.geodesy.Geodetic2DPoint)1 MGRS (org.opensextant.geodesy.MGRS)1 UTM (org.opensextant.geodesy.UTM)1