use of org.opensextant.extractors.flexpat.TextMatchResult in project Xponents by OpenSextant.
the class XCoord method extract_coordinates.
/**
* Limit the extraction to a particular family of coordinates. Diagnostic
* messages appear in TextMatchResultSet only when debug = ON.
*
* @param text
* text to match
* @param text_id
* id for text
* @param family
* pattern family or XConstants.ALL_PATTERNS
* @return TextMatchResultSet result set. If input is null, result set is
* null
*/
public TextMatchResult extract_coordinates(String text, String text_id, int family) {
if (text == null) {
return null;
}
int bufsize = text.length();
TextMatchResult results = new TextMatchResult();
results.result_id = text_id;
results.matches = new ArrayList<TextMatch>();
int patternsComplete = 0;
int found = 0;
for (RegexPattern repat : patterns.get_patterns()) {
log.debug("pattern={}", repat.id);
if (!repat.enabled) {
log.debug("CFG pattern={} not enabled", repat.id);
continue;
}
GeocoordPattern pat = (GeocoordPattern) repat;
// To limit multiple use enable_XXXX()
if (family != XConstants.ALL_PATTERNS && pat.cce_family_id != family) {
log.debug("CFG pattern={} not requested", pat.id);
continue;
}
Matcher match = pat.regex.matcher(text);
results.evaluated = true;
while (match.find()) {
++found;
GeocoordMatch coord = new GeocoordMatch();
// MATCH METHOD aka Pattern ID aka CCE instance
coord.pattern_id = pat.id;
coord.cce_family_id = pat.cce_family_id;
coord.cce_variant = pat.cce_variant;
coord.start = match.start();
coord.end = match.end();
coord.setText(match.group());
if ((RUNTIME_FLAGS & XConstants.CONTEXT_FILTERS_ON) > 0) {
if (this.filterOutContext(text, coord.start)) {
log.debug("Filtered out noisy match, {} found by {}", coord.getText(), pat.id);
continue;
}
}
// Normalize
try {
GeocoordNormalization.normalize_coordinate(coord, patterns.group_matches(pat, match));
} catch (NormalizationException normErr) {
if (debug) {
// Quietly ignore
results.message = "Parse error with '" + coord.getText() + "'";
log.error(results.message, normErr);
}
continue;
}
//
if (GeocoordNormalization.filter_out(coord)) {
if (debug) {
results.message = "Filtered out coordinate pattern=" + pat.id + " value='" + coord.getText() + "'";
log.info("Normalization Filter fired, MSG=" + results.message);
}
continue;
}
// Establish precision
GeocoordNormalization.set_precision(coord);
/**
* Caller may want to disable getContext operation here for
* short texts.... or for any use case. This is more helpful for
* longer texts with many annotations.
*/
if ((XCoord.RUNTIME_FLAGS & XConstants.FLAG_EXTRACT_CONTEXT) > 0) {
// returns indices for two windows before and after match
int[] slices = TextUtils.get_text_window(coord.start, coord.getLength(), bufsize, match_width);
// This sets the context window before/after.
//
coord.setContext(// left l1 to left l2
TextUtils.delete_eol(text.substring(slices[0], slices[1])), // right r1 to r2
TextUtils.delete_eol(text.substring(slices[2], slices[3])));
}
set_match_id(coord, found);
results.matches.add(coord);
//
if (coord.hasOtherIterpretations()) {
for (GeocoordMatch m2 : coord.getOtherInterpretations()) {
// Other interpretations may have different coord text.
// String _c = m2.coord_text;
m2.copyMetadata(coord);
// Preserve coordinate text of interpretation.
// m2.coord_text = _c;
results.matches.add(m2);
}
}
}
patternsComplete++;
updateProgress(patternsComplete / (double) patterns.get_patterns().size() + 1);
}
// "pass" is the wrong idea. If no data was found
// because there was no data, then it still passes.
//
results.pass = !results.matches.isEmpty();
PatternManager.reduce_matches(results.matches);
return results;
}
use of org.opensextant.extractors.flexpat.TextMatchResult in project Xponents by OpenSextant.
the class TestPoLiReporter method testUserFile.
/**
* Run patterns over a single file using a pre-configured PoLi. Use -c
* config -u file test
*/
public void testUserFile(String f) throws IOException, NormalizationException {
// poli.configure(new File(f));
String fname = FilenameUtils.getBaseName(f);
createResultsFile("results/test_" + fname + ".csv");
// List<TextMatch> allResults = new ArrayList<>();
log.info("TESTING FILE: " + f);
for (PatternTestCase test : poli.getPatternManager().testcases) {
log.info("TEST " + test.id);
TextMatchResult results = poli.extract_patterns(test.text, test.id, test.family);
if (results.evaluated && !results.matches.isEmpty()) {
try {
for (TextMatch m : results.matches) {
// log.debug("TEST " + test.id + " FOUND: " +
// m.toString());
Map<String, Object> row = createResultRow(test, m);
report.write(row, header, poliResultsSpec);
}
} catch (IOException ioerr) {
log.error("Failed to write result for " + test.id, ioerr);
}
} else {
log.info("TEST " + test.id + " STATUS: FAILED");
}
}
String inputText = FileUtils.readFileToString(new File(f));
poli.enableAll();
String fileID = "FILE:" + fname;
PatternTestCase fileTestCase = new PatternTestCase(fileID, "all", "(file text)");
TextMatchResult results = poli.extract_patterns(inputText, fileID, null);
if (results.evaluated && !results.matches.isEmpty()) {
try {
for (TextMatch m : results.matches) {
// log.debug("TEST " + test.id + " FOUND: " +
// m.toString());
Map<String, Object> row = createResultRow(fileTestCase, m);
report.write(row, header, poliResultsSpec);
}
} catch (IOException ioerr) {
log.error("Failed to write result for " + fileID, ioerr);
}
} else {
log.info("FILE TEST " + fileID + " STATUS: FAILED");
}
closeReport();
}
use of org.opensextant.extractors.flexpat.TextMatchResult in project Xponents by OpenSextant.
the class TestXCoord method systemTests.
/**
* Using the TestUtility, all patterns are tested and reported to the
* results folder.
*/
public void systemTests() {
RegexPatternManager mgr = xcoord.getPatternManager();
log.info("\n\n=== SYSTEM TESTS ===\n\n");
if (!mgr.testing) {
log.info("TESTING OFF -- TURN ON DEBUG in LOG4J");
return;
}
xcoord.match_UTM(true);
xcoord.match_MGRS(true);
xcoord.match_DD(true);
xcoord.match_DMS(true);
xcoord.match_DM(true);
try {
TestXCoordReporter tester = new TestXCoordReporter("./results/xcoord_System.csv");
for (PatternTestCase tst : mgr.testcases) {
TextMatchResult results = xcoord.extract_coordinates(tst.text, tst.id, tst.family_id);
results.add_trace("Test Payload: " + tst.text);
if (!results.evaluated) {
continue;
}
log.info("=========SYSTEM TEST " + tst.id + " FOUND:" + (results.matches.isEmpty() ? "NOTHING" : results.matches.size()));
tester.save_result(null, results);
}
tester.close_report();
} catch (Exception err) {
log.error("Not finishing tests", err);
return;
}
log.info("=== SYSTEM TESTS DONE ===");
}
use of org.opensextant.extractors.flexpat.TextMatchResult in project Xponents by OpenSextant.
the class TestXCoord method fileTests.
/**
*
* @param file
*/
public void fileTests(String file) {
log.info("\n\n=== TEXT FILE TESTS ===\n\n");
TestXCoordReporter tester = null;
String buffer = null;
try {
String _file = file.trim();
buffer = FileUtility.readFile(_file);
String fname = FilenameUtils.getBaseName(_file);
tester = new TestXCoordReporter("./results/xcoord_" + fname + ".csv");
} catch (IOException err) {
log.error("Failed to open test file", err);
return;
}
xcoord.enableAll();
try {
String jobid = TextUtils.text_id(buffer);
log.info("Extract coordinates; All patterns enabled");
TextMatchResult results = xcoord.extract_coordinates(buffer, jobid);
tester.save_result(null, results);
} catch (Exception err) {
log.error("Failed to write report", err);
}
tester.close_report();
log.info("=== TEXT FILE TESTS DONE ===");
}
use of org.opensextant.extractors.flexpat.TextMatchResult in project Xponents by OpenSextant.
the class TestXCoord method fileTestByLines.
/**
* This will accomodate any test file that has at least the following style:
*
* FAMILY-XXX COORDINATE TEXT "FAIL"
*
* Where the first FAMILY token is
*
* @param coordfile
*/
public void fileTestByLines(String coordfile) {
xcoord.match_UTM(true);
xcoord.match_MGRS(true);
xcoord.match_DD(true);
xcoord.match_DMS(true);
xcoord.match_DM(true);
try {
String _file = coordfile.trim();
String fname = FilenameUtils.getBaseName(_file);
TestXCoordReporter tester = new TestXCoordReporter("./results/xcoord_" + fname + "-lines.csv");
java.io.LineNumberReader in = getLineReader(coordfile);
String line = null;
while ((line = in.readLine()) != null) {
String text = line.trim();
if (text.startsWith("#")) {
continue;
}
if (text.isEmpty()) {
continue;
}
String fam = find_family(line);
int famx = XConstants.get_CCE_family(fam);
if (famx == XConstants.UNK_PATTERN) {
log.error("Unknown test pattern TEXT=" + text);
continue;
}
GeocoordTestCase tst = new GeocoordTestCase("#" + in.getLineNumber(), fam, text);
TextMatchResult results = xcoord.extract_coordinates(tst.text, tst.id);
/**
* tst.family_id
*/
results.add_trace("Test Payload: " + tst.text);
if (!results.evaluated) {
continue;
}
log.info("=========FILE TEST " + tst.id + " FOUND:" + (results.matches.isEmpty() ? "NOTHING" : results.matches.size()));
tester.save_result(tst, results);
}
tester.close_report();
log.info("=== FILE TESTS DONE ===");
} catch (Exception err) {
log.error("TEST BY LINES", err);
}
}
Aggregations