use of org.opensextant.extraction.TextMatch in project Xponents by OpenSextant.
the class TestPoLiReporter method testUserFile.
/**
* Run patterns over a single file using a pre-configured PoLi. Use -c
* config -u file test
*/
public void testUserFile(String f) throws IOException, NormalizationException {
// poli.configure(new File(f));
String fname = FilenameUtils.getBaseName(f);
createResultsFile("results/test_" + fname + ".csv");
// List<TextMatch> allResults = new ArrayList<>();
log.info("TESTING FILE: " + f);
for (PatternTestCase test : poli.getPatternManager().testcases) {
log.info("TEST " + test.id);
TextMatchResult results = poli.extract_patterns(test.text, test.id, test.family);
if (results.evaluated && !results.matches.isEmpty()) {
try {
for (TextMatch m : results.matches) {
// log.debug("TEST " + test.id + " FOUND: " +
// m.toString());
Map<String, Object> row = createResultRow(test, m);
report.write(row, header, poliResultsSpec);
}
} catch (IOException ioerr) {
log.error("Failed to write result for " + test.id, ioerr);
}
} else {
log.info("TEST " + test.id + " STATUS: FAILED");
}
}
String inputText = FileUtils.readFileToString(new File(f));
poli.enableAll();
String fileID = "FILE:" + fname;
PatternTestCase fileTestCase = new PatternTestCase(fileID, "all", "(file text)");
TextMatchResult results = poli.extract_patterns(inputText, fileID, null);
if (results.evaluated && !results.matches.isEmpty()) {
try {
for (TextMatch m : results.matches) {
// log.debug("TEST " + test.id + " FOUND: " +
// m.toString());
Map<String, Object> row = createResultRow(fileTestCase, m);
report.write(row, header, poliResultsSpec);
}
} catch (IOException ioerr) {
log.error("Failed to write result for " + fileID, ioerr);
}
} else {
log.info("FILE TEST " + fileID + " STATUS: FAILED");
}
closeReport();
}
use of org.opensextant.extraction.TextMatch in project Xponents by OpenSextant.
the class XTemporal method extract_dates.
/**
* A direct call to extract dates; which is useful for diagnostics and
* development/testing.
*
* @param text
* @param text_id
* @return
*/
public TextMatchResult extract_dates(String text, String text_id) {
TextMatchResult results = new TextMatchResult();
results.matches = new ArrayList<TextMatch>();
results.result_id = text_id;
int found = 0;
int patternsComplete = 0;
for (RegexPattern pat : patterns.get_patterns()) {
log.debug("pattern={}", pat.id);
if (!pat.enabled) {
// results.message = "pattern=" + pat.id + " not enabled. ";
log.debug("CFG pattern={} not enabled.", pat.id);
continue;
}
Matcher match = pat.regex.matcher(text);
results.evaluated = true;
while (match.find()) {
++found;
DateMatch dt = new DateMatch();
dt.pattern_id = pat.id;
dt.start = match.start();
dt.end = match.end();
dt.setText(match.group());
try {
DateNormalization.normalize_date(patterns.group_map(pat, match), dt);
if (dt.datenorm == null) {
continue;
}
if ("YMD".equalsIgnoreCase(pat.family)) {
if (this.isDistantPastYMD(dt.datenorm)) {
continue;
}
}
dt.datenorm_text = DateNormalization.format_date(dt.datenorm);
// Flags worth setting here.
dt.isDistantPast = isDistantPast(dt.datenorm.getTime());
dt.isFuture = isFuture(dt.datenorm.getTime());
set_match_id(dt, found);
results.pass = true;
} catch (Exception err) {
// Not a date.
results.pass = false;
continue;
}
results.matches.add(dt);
}
patternsComplete++;
updateProgress(patternsComplete / (double) patterns.get_patterns().size() + 1);
}
results.pass = !results.matches.isEmpty();
PatternManager.reduce_matches(results.matches);
return results;
}
use of org.opensextant.extraction.TextMatch in project Xponents by OpenSextant.
the class TestPoLiReporter method test.
/**
* System tests
*/
public void test() throws IOException {
poli.enableAll();
createResultsFile("results/test_System.csv");
// List<TextMatch> allResults = new ArrayList<>();
log.info("TESTING ALL SYSTEM PATTERNS");
for (PatternTestCase test : this.poli.getPatternManager().testcases) {
log.info("TEST " + test.id);
TextMatchResult results = this.poli.extract_patterns(test.text, test.id, test.family);
if (results.evaluated && !results.matches.isEmpty()) {
try {
for (TextMatch m : results.matches) {
// log.debug("TEST " + test.id + " FOUND: " +
// m.toString());
Map<String, Object> row = createResultRow(test, m);
report.write(row, header, poliResultsSpec);
}
} catch (IOException ioerr) {
log.error("Failed to write result for " + test.id, ioerr);
}
} else {
Map<String, Object> row = createResultRow(test, null);
report.write(row, header, poliResultsSpec);
log.info("TEST " + test.id + " STATUS: FAILED");
}
}
closeReport();
}
use of org.opensextant.extraction.TextMatch in project Xponents by OpenSextant.
the class TestXCoordReporter method save_result.
/**
* Coordinate Test/Eval format
*
*
* Result ID, CCE family, pattern ID, status, message // Reason for failure
* Result ID, CCE family, pattern ID, status, Match ID, matchtext, lat, lon
* etc. // Success implied by match
*
* @TODO: use TestCase here or rely on truth evaluation in Python
* GeocoderEval?
* @param t
* @param results
* @throws IOException
*/
public void save_result(GeocoordTestCase t, TextMatchResult results) throws IOException {
Map<String, Object> row = null;
if (!results.matches.isEmpty()) {
//
for (TextMatch tm : results.matches) {
GeocoordMatch m = (GeocoordMatch) tm;
if (!full_report && (m.is_submatch || m.is_duplicate)) {
// Ignore submatches and duplicates
continue;
}
row = createTestCase(t);
row.put(header[6], results.result_id);
row.put(header[7], (full_report & m.is_submatch) ? "IGNORE" : "PASS");
String msg = results.message;
if (m.is_submatch) {
msg += "; Is Submatch";
}
row.put(header[8], msg);
row.put(header[9], XConstants.get_CCE_family(m.cce_family_id));
row.put(header[10], m.pattern_id);
row.put(header[11], m.getText());
row.put(header[12], "" + m.formatLatitude());
row.put(header[13], "" + m.formatLongitude());
String mgrs = "";
try {
mgrs = m.toMGRS();
} catch (Exception err) {
}
row.put(header[14], mgrs);
row.put(header[15], m.formatPrecision());
row.put(header[16], new Long(m.start));
report.write(row, header, xcoordResultsSpec);
}
} else {
row = createTestCase(t);
row.put(header[6], results.result_id);
boolean expected_failure = false;
if (t != null) {
expected_failure = !t.true_positive;
} else {
// If the match message contains a test payload from the test cases
//
String test_status = results.get_trace().toUpperCase();
expected_failure = test_status.contains("FAIL");
}
// True Negative -- you ignored one correctly
row.put(header[7], expected_failure ? "PASS" : "FAIL");
row.put(header[8], results.get_trace());
report.write(row, header, xcoordResultsSpec);
}
}
use of org.opensextant.extraction.TextMatch in project Xponents by OpenSextant.
the class PatternsOfLife method extract_patterns.
/**
* Extract patterns of a certain family from a block of text.
*
* @param text
* - data to process
* @param text_id
* - identifier for the data
* @param family
* - optional filter; to reuse the same PatManager but extract
* certain patterns only.
*
* @return PoliResult
*/
public TextMatchResult extract_patterns(String text, String text_id, String family) {
TextMatchResult results = new TextMatchResult();
results.result_id = text_id;
results.matches = new ArrayList<TextMatch>();
int bufsize = text.length();
PoliMatch poliMatch = null;
int found = 0;
int patternsComplete = 0;
for (RegexPattern repat : patterns.get_patterns()) {
if (!repat.enabled) {
continue;
}
if (family != null && !repat.id.startsWith(family)) {
continue;
}
Matcher match = repat.regex.matcher(text);
results.evaluated = true;
while (match.find()) {
++found;
Map<String, String> fields = patterns.group_map(repat, match);
if (repat.match_class == null) {
poliMatch = new PoliMatch(fields, match.group());
} else {
try {
poliMatch = (PoliMatch) repat.match_class.newInstance();
poliMatch.setText(match.group());
poliMatch.setGroups(fields);
} catch (InstantiationException classErr1) {
poliMatch = null;
log.error("Could not create... ", classErr1);
} catch (IllegalAccessException classErr2) {
poliMatch = null;
log.error("Could not create... ", classErr2);
}
}
if (poliMatch == null) {
// This would have been thrown at init.
log.error("Could not find pattern family for " + repat.id);
continue;
}
poliMatch.setType(repat.family);
poliMatch.pattern_id = repat.id;
poliMatch.start = match.start();
poliMatch.end = match.end();
poliMatch.normalize();
// Filter -- trivial filter is to filter out any coord that
// cannot
// TODO: Assess filters?
// returns indices for window around text match
int[] slices = TextUtils.get_text_window(poliMatch.start, bufsize, match_width);
// left l1 to left l2
poliMatch.setContext(TextUtils.delete_eol(text.substring(slices[0], slices[1])));
set_match_id(poliMatch, found);
results.matches.add(poliMatch);
}
patternsComplete++;
updateProgress(patternsComplete / (double) patterns.get_patterns().size() + 1);
}
results.pass = !results.matches.isEmpty();
PoliPatternManager.reduce_matches(results.matches);
return results;
}
Aggregations