Search in sources :

Example 6 with TextMatchResult

use of org.opensextant.extractors.flexpat.TextMatchResult in project Xponents by OpenSextant.

the class TestXCoord method fileTruth.

/**
     * This will accomodate any test file that has at least the following style:
     *
     * FAMILY-XXX COORDINATE TEXT "FAIL"
     *
     * Where the first FAMILY token is
     *
     * @param coordfile
     */
public void fileTruth(File coordfile) {
    xcoord.match_UTM(true);
    xcoord.match_MGRS(true);
    xcoord.match_DD(true);
    xcoord.match_DMS(true);
    xcoord.match_DM(true);
    try {
        //String _file = coordfile.trim();
        String fname = FilenameUtils.getBaseName(coordfile.getName());
        TestXCoordReporter tester = new TestXCoordReporter("./results/xcoord_" + fname + "-rows.csv");
        //
        tester.full_report = false;
        CsvMapReader in = open(coordfile);
        String text = null;
        int linenum = 0;
        String[] columns = in.getHeader(true);
        Map<String, String> testRow = null;
        // id, enumeration, test, true_lat, true_lon, remark
        while ((testRow = in.read(columns)) != null) {
            String patid = testRow.get("id");
            if (patid == null) {
                continue;
            }
            patid = patid.trim();
            if (patid.startsWith("#")) {
                continue;
            }
            if (patid.isEmpty()) {
                continue;
            }
            String fam = find_family(patid);
            int famx = XConstants.get_CCE_family(fam);
            if (famx == XConstants.UNK_PATTERN) {
                log.error("Unknown test pattern TEXT=" + text);
                continue;
            }
            text = testRow.get("enumeration");
            linenum = Integer.parseInt(text);
            text = testRow.get("test");
            text = text.replace("$NL", "\n");
            String rmks = testRow.get("remark");
            // "Patid # rowid" == test instance id
            // DMS07#12  -- 12th example of DMS07 test.
            //
            GeocoordTestCase tst = new GeocoordTestCase(patid + "#" + linenum, fam, text);
            tst.match.setLatitude(testRow.get("true_lat"));
            tst.match.setLongitude(testRow.get("true_lon"));
            tst.setRemarks(rmks);
            TextMatchResult results = xcoord.extract_coordinates(tst.text, tst.id);
            /**
                 * tst.family_id
                 */
            results.add_trace("Test Payload: " + tst.text);
            if (!results.evaluated) {
                continue;
            }
            log.info("=========FILE TEST " + tst.id + " FOUND:" + (results.matches.isEmpty() ? "NOTHING" : results.matches.size()));
            tester.save_result(tst, results);
        }
        tester.close_report();
        log.info("=== FILE TESTS DONE ===");
    } catch (Exception err) {
        log.error("TEST BY LINES", err);
    }
}
Also used : GeocoordTestCase(org.opensextant.extractors.xcoord.GeocoordTestCase) CsvMapReader(org.supercsv.io.CsvMapReader) TextMatchResult(org.opensextant.extractors.flexpat.TextMatchResult) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException)

Example 7 with TextMatchResult

use of org.opensextant.extractors.flexpat.TextMatchResult in project Xponents by OpenSextant.

the class XTemporal method extract_dates.

/**
     * A direct call to extract dates; which is useful for diagnostics and
     * development/testing.
     *
     * @param text
     * @param text_id
     * @return
     */
public TextMatchResult extract_dates(String text, String text_id) {
    TextMatchResult results = new TextMatchResult();
    results.matches = new ArrayList<TextMatch>();
    results.result_id = text_id;
    int found = 0;
    int patternsComplete = 0;
    for (RegexPattern pat : patterns.get_patterns()) {
        log.debug("pattern={}", pat.id);
        if (!pat.enabled) {
            // results.message = "pattern=" + pat.id + " not enabled. ";
            log.debug("CFG pattern={} not enabled.", pat.id);
            continue;
        }
        Matcher match = pat.regex.matcher(text);
        results.evaluated = true;
        while (match.find()) {
            ++found;
            DateMatch dt = new DateMatch();
            dt.pattern_id = pat.id;
            dt.start = match.start();
            dt.end = match.end();
            dt.setText(match.group());
            try {
                DateNormalization.normalize_date(patterns.group_map(pat, match), dt);
                if (dt.datenorm == null) {
                    continue;
                }
                if ("YMD".equalsIgnoreCase(pat.family)) {
                    if (this.isDistantPastYMD(dt.datenorm)) {
                        continue;
                    }
                }
                dt.datenorm_text = DateNormalization.format_date(dt.datenorm);
                // Flags worth setting here.
                dt.isDistantPast = isDistantPast(dt.datenorm.getTime());
                dt.isFuture = isFuture(dt.datenorm.getTime());
                set_match_id(dt, found);
                results.pass = true;
            } catch (Exception err) {
                // Not a date.
                results.pass = false;
                continue;
            }
            results.matches.add(dt);
        }
        patternsComplete++;
        updateProgress(patternsComplete / (double) patterns.get_patterns().size() + 1);
    }
    results.pass = !results.matches.isEmpty();
    PatternManager.reduce_matches(results.matches);
    return results;
}
Also used : RegexPattern(org.opensextant.extractors.flexpat.RegexPattern) Matcher(java.util.regex.Matcher) TextMatch(org.opensextant.extraction.TextMatch) TextMatchResult(org.opensextant.extractors.flexpat.TextMatchResult) IOException(java.io.IOException)

Example 8 with TextMatchResult

use of org.opensextant.extractors.flexpat.TextMatchResult in project Xponents by OpenSextant.

the class DateNormalizationTest method ensureTimeZone.

/**
     * Note that this may report false negatives if the JVM's default time
     * zone is UTC.
     */
@Test
public void ensureTimeZone() {
    // Not parseable by default.  pattern is too noisy.
    final TextMatchResult result1 = timeFinder.extract_dates("Oct 07", "dummy");
    System.err.println("1 " + result1.matches.toString());
    assertEquals(0, result1.matches.size());
    final TextMatchResult result2 = timeFinder.extract_dates("Oct 2007", "dummy");
    System.err.println("2 " + result1.matches.toString());
    assertEquals(1, result2.matches.size());
    DateMatch dt = (DateMatch) result2.matches.get(0);
    long noon = (12 * 3600 * 1000);
    assertEquals(1191196800000L + noon, dt.datenorm.getTime());
}
Also used : DateMatch(org.opensextant.extractors.xtemporal.DateMatch) TextMatchResult(org.opensextant.extractors.flexpat.TextMatchResult) Test(org.junit.Test)

Example 9 with TextMatchResult

use of org.opensextant.extractors.flexpat.TextMatchResult in project Xponents by OpenSextant.

the class TestPoLiReporter method test.

/**
     * System tests
     */
public void test() throws IOException {
    poli.enableAll();
    createResultsFile("results/test_System.csv");
    // List<TextMatch> allResults = new ArrayList<>();
    log.info("TESTING ALL SYSTEM PATTERNS");
    for (PatternTestCase test : this.poli.getPatternManager().testcases) {
        log.info("TEST " + test.id);
        TextMatchResult results = this.poli.extract_patterns(test.text, test.id, test.family);
        if (results.evaluated && !results.matches.isEmpty()) {
            try {
                for (TextMatch m : results.matches) {
                    // log.debug("TEST " + test.id + " FOUND: " +
                    // m.toString());
                    Map<String, Object> row = createResultRow(test, m);
                    report.write(row, header, poliResultsSpec);
                }
            } catch (IOException ioerr) {
                log.error("Failed to write result for " + test.id, ioerr);
            }
        } else {
            Map<String, Object> row = createResultRow(test, null);
            report.write(row, header, poliResultsSpec);
            log.info("TEST " + test.id + " STATUS: FAILED");
        }
    }
    closeReport();
}
Also used : PatternTestCase(org.opensextant.extractors.flexpat.PatternTestCase) TextMatch(org.opensextant.extraction.TextMatch) IOException(java.io.IOException) TextMatchResult(org.opensextant.extractors.flexpat.TextMatchResult)

Example 10 with TextMatchResult

use of org.opensextant.extractors.flexpat.TextMatchResult in project Xponents by OpenSextant.

the class TestXTemporal method adhocTests.

/**
     */
public void adhocTests() {
    log.info("=== SYSTEM TESTS START ===");
    xdt.enableAll();
    //xdt.match_MonDayYear(true);
    //xdt.match_DateTime(false);
    String[] tests = { "2010-04-13", "1111-11-11", "12/13/1900", "11/12/1817", "12/30/90", "JUN 00", "JUN '13", "JUN '12", "JUN '17", "JUN '33", "JUN 2017", "JUN 1917" };
    try {
        TestXTemporalReporter tester = new TestXTemporalReporter("./results/xtemp_Adhoc.csv");
        int count = 0;
        for (String tst_text : tests) {
            ++count;
            TextMatchResult results = xdt.extract_dates(tst_text, "" + count);
            results.add_trace("Test Payload: " + tst_text);
            if (!results.evaluated) {
                continue;
            }
            log.info("=========SYSTEM TEST " + count + " FOUND:" + (results.matches == null ? "NOTHING" : results.matches.size()));
            tester.save_result(results);
        }
        tester.close_report();
    } catch (Exception err) {
        log.error("Not finishing tests", err);
        return;
    }
    log.info("=== SYSTEM TESTS DONE ===");
}
Also used : TextMatchResult(org.opensextant.extractors.flexpat.TextMatchResult) ConfigException(org.opensextant.ConfigException)

Aggregations

TextMatchResult (org.opensextant.extractors.flexpat.TextMatchResult)13 IOException (java.io.IOException)7 TextMatch (org.opensextant.extraction.TextMatch)6 FileNotFoundException (java.io.FileNotFoundException)4 PatternTestCase (org.opensextant.extractors.flexpat.PatternTestCase)4 Matcher (java.util.regex.Matcher)3 RegexPattern (org.opensextant.extractors.flexpat.RegexPattern)3 ConfigException (org.opensextant.ConfigException)2 GeocoordTestCase (org.opensextant.extractors.xcoord.GeocoordTestCase)2 File (java.io.File)1 LineNumberReader (java.io.LineNumberReader)1 Test (org.junit.Test)1 NormalizationException (org.opensextant.extraction.NormalizationException)1 RegexPatternManager (org.opensextant.extractors.flexpat.RegexPatternManager)1 GeocoordMatch (org.opensextant.extractors.xcoord.GeocoordMatch)1 DateMatch (org.opensextant.extractors.xtemporal.DateMatch)1 CsvMapReader (org.supercsv.io.CsvMapReader)1