use of org.opensextant.extraction.TextEntity in project Xponents by OpenSextant.
the class TestExtraction method test.
/** */
public void test() {
TextEntity o1 = new TextEntity();
o1.end = 15;
o1.start = 10;
TextEntity o2 = new TextEntity();
o2.end = 17;
o2.start = 11;
System.out.println(o2.isWithin(o1));
System.out.println(o2.isSameMatch(o1));
System.out.println(o2.isOverlap(o1));
}
use of org.opensextant.extraction.TextEntity in project Xponents by OpenSextant.
the class RegexPatternManager method group_matches.
/**
* Matched fields as TextEntities
*
* @param p the p
* @param matched the matched
* @return the map
*/
public Map<String, TextEntity> group_matches(RegexPattern p, java.util.regex.Matcher matched) {
Map<String, TextEntity> pairs = new HashMap<String, TextEntity>();
int cnt = matched.groupCount();
for (int x = 0; x < cnt; ++x) {
// Put the matcher group in a hash with an appropriate name.
String nm = p.regex_groups.get(x);
TextEntity e = new TextEntity();
e.setText(matched.group(x + 1));
e.start = matched.start(x + 1);
pairs.put(nm, e);
}
return pairs;
}
use of org.opensextant.extraction.TextEntity in project Xponents by OpenSextant.
the class GeocoordMatch method setSeparator.
/**
*
* @param fields
* regex fields to search
*/
protected void setSeparator(Map<String, TextEntity> fields) {
for (String k : separators) {
TextEntity val = fields.get(k);
if (val != null) {
offsetSeparator = val.start;
separator = val.getText();
return;
}
}
}
use of org.opensextant.extraction.TextEntity in project Xponents by OpenSextant.
the class GeocoordNormalization method normalize_coordinate.
/**
* The match object is normalized, setting the coord_text and other data
* from parsing "text" and knowing which pattern family was matched.
*
* @param m match
* @param groups fields
* @throws NormalizationException
*/
public static void normalize_coordinate(GeocoordMatch m, Map<String, TextEntity> groups) throws NormalizationException {
// Hoaky Java 6 issue: REGEX does not use named groups, so here we map both the value to
// a text/offset pair (in groups) and provide just the key/text pairs (_elements)
//
Map<String, String> fieldValues = new HashMap<String, String>();
for (String name : groups.keySet()) {
TextEntity val = groups.get(name);
fieldValues.put(name, val.getText());
}
//
if (m.cce_family_id == XConstants.DD_PATTERN) {
// get lat text
// lon text -- remove whitespace from both
// coord_text = lat + ' ' + lon
// set lat, lon
//
// decDegLat, decDegLon, degSym, hemiLat, hemiLon
//
DMSOrdinate ddlat = new DMSOrdinate(groups, fieldValues, DMLAT, m.getText());
DMSOrdinate ddlon = new DMSOrdinate(groups, fieldValues, DMLON, m.getText());
// Yield a cooridnate-only version of text; "+42.4440 -102.3333"
// preserving the innate precision given in the original text.
//
m.lat_text = ddlat.text;
m.lon_text = ddlon.text;
m.setSeparator(groups);
m.setCoordinate(ddlat, ddlon);
/**
* DD filters enabled.
*
* To Disable: XCoord.RUNTIME_FLAGS XOR XConstants.DD_FILTERS_ON
*/
if ((XCoord.RUNTIME_FLAGS & XConstants.DD_FILTERS_ON) > 0) {
/**
* With FILTERS ON if lat/lon have no ALPHA hemisphere, i.e.,
* ENSW * and if lat/lon text for match has no COORD symbology
* then this is likely not a DD coordinate -- filter out.
*/
if (!ddlon.hemisphere.isAlpha() && !ddlat.hemisphere.isAlpha()) {
if (!ddlat.hasSymbols()) {
m.setFilteredOut(true);
}
}
} else {
// DD filters OFF, so do not filter out
m.setFilteredOut(!GeodeticUtility.validateCoordinate(m.getLatitude(), m.getLongitude()));
}
m.coord_text = m.lat_text + " " + m.lon_text;
} else if (m.cce_family_id == XConstants.DM_PATTERN) {
// get lat text
// lon text -- remove whitespace from both
// coord_text = lat + ' ' + lon
// set lat, lon
//
DMSOrdinate dmlat = new DMSOrdinate(groups, fieldValues, DMLAT, m.getText());
DMSOrdinate dmlon = new DMSOrdinate(groups, fieldValues, DMLON, m.getText());
m.lat_text = dmlat.text;
m.lon_text = dmlon.text;
m.setSeparator(groups);
m.setCoordinate(dmlat, dmlon);
if (!m.isFilteredOut()) {
m.setFilteredOut(m.evaluateInvalidDashes());
}
m.coord_text = m.lat_text + " " + m.lon_text;
} else if (m.cce_family_id == XConstants.DMS_PATTERN) {
// remove whitespace
// set lat, lon
//
DMSOrdinate dmlat = new DMSOrdinate(groups, fieldValues, DMLAT, m.getText());
DMSOrdinate dmlon = new DMSOrdinate(groups, fieldValues, DMLON, m.getText());
m.lat_text = dmlat.text;
m.lon_text = dmlon.text;
m.setSeparator(groups);
m.setCoordinate(dmlat, dmlon);
if (!m.isFilteredOut()) {
m.setFilteredOut(m.evaluateInvalidDashes());
}
m.coord_text = m.lat_text + " " + m.lon_text;
} else if (m.cce_family_id == XConstants.MGRS_PATTERN) {
// Capture the normalized coord text just to aid in reporting in
// error situations
//
m.coord_text = TextUtils.delete_whitespace(m.getText());
// TODO: make use of multiple answers.
try {
MGRS[] mgrs_candidates = MGRSParser.parseMGRS(m.getText(), m.coord_text, fieldValues);
//
if (mgrs_candidates != null) {
MGRS mgrs = mgrs_candidates[0];
m.coord_text = mgrs.toString();
Geodetic2DPoint pt = mgrs.toGeodetic2DPoint();
m.setLatitude(pt.getLatitudeAsDegrees());
m.setLongitude(pt.getLongitudeAsDegrees());
m.setBalanced(true);
if (mgrs_candidates.length == 2) {
mgrs = mgrs_candidates[1];
GeocoordMatch m2 = new GeocoordMatch();
m2.copy(m);
m2.coord_text = mgrs.toString();
pt = mgrs.toGeodetic2DPoint();
m2.setLatitude(pt.getLatitudeAsDegrees());
m2.setLongitude(pt.getLongitudeAsDegrees());
// Really balanced?
m2.setBalanced(true);
m.addOtherInterpretation(m2);
}
}
} catch (java.lang.IllegalArgumentException parseErr) {
//.debug("Failed to parse MGRS pattern with text=" + m.getText() + " COORD?:"
// + m.coord_text, parseErr);
// No normalization was possible as this match represents an invalid MGRS value
//
m.setFilteredOut(true);
} catch (Exception err) {
throw new NormalizationException("Failed to parse MGRS", err);
}
} else if (m.cce_family_id == XConstants.UTM_PATTERN) {
m.coord_text = TextUtils.delete_whitespace(m.getText());
try {
UTM utm = UTMParser.parseUTM(m.coord_text, fieldValues);
if (utm != null) {
Geodetic2DPoint pt = utm.getGeodetic();
m.setLatitude(pt.getLatitudeAsDegrees());
m.setLongitude(pt.getLongitudeAsDegrees());
m.coord_text = utm.toString();
}
} catch (java.lang.IllegalArgumentException parseErr) {
throw new NormalizationException("Failed to parse UTM pattern with text=" + m.getText() + " COORD?:" + m.coord_text, parseErr);
// No normalization done.
} catch (Exception err) {
throw new NormalizationException("Failed to parse UTM pattern", err);
}
}
}
Aggregations