Search in sources :

Example 31 with Pattern

use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.

the class CorefScorer method getFinalConllScore.

public static double getFinalConllScore(String summary) {
    Pattern f1 = Pattern.compile("Coreference:.*F1: (.*)%");
    Matcher f1Matcher = f1.matcher(summary);
    double[] F1s = new double[5];
    int i = 0;
    while (f1Matcher.find()) {
        F1s[i++] = Double.parseDouble(f1Matcher.group(1));
    }
    double finalScore = (F1s[0] + F1s[1] + F1s[3]) / 3;
    return finalScore;
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher)

Example 32 with Pattern

use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.

the class NERGUI method extract.

private void extract() {
    log.info("content type: " + editorPane.getContentType());
    if (!editorPane.getContentType().equals("text/html")) {
        DefaultStyledDocument doc = (DefaultStyledDocument) editorPane.getDocument();
        String text = null;
        try {
            text = doc.getText(0, doc.getLength());
        } catch (Exception e) {
            e.printStackTrace();
        }
        String labeledText = classifier.classifyWithInlineXML(text);
        taggedContents = labeledText;
        Set<String> tags = classifier.labels();
        String background = classifier.backgroundSymbol();
        StringBuilder tagPattern = new StringBuilder();
        for (String tag : tags) {
            if (background.equals(tag)) {
                continue;
            }
            if (tagPattern.length() > 0) {
                tagPattern.append('|');
            }
            tagPattern.append(tag);
        }
        Pattern startPattern = Pattern.compile("<(" + tagPattern + ")>");
        Pattern endPattern = Pattern.compile("</(" + tagPattern + ")>");
        String finalText = labeledText;
        Matcher m = startPattern.matcher(finalText);
        while (m.find()) {
            int start = m.start();
            finalText = m.replaceFirst("");
            m = endPattern.matcher(finalText);
            if (m.find()) {
                int end = m.start();
                String tag = m.group(1);
                finalText = m.replaceFirst("");
                AttributeSet attSet = getAttributeSet(tag);
                try {
                    String entity = finalText.substring(start, end);
                    doc.setCharacterAttributes(start, entity.length(), attSet, false);
                } catch (Exception ex) {
                    ex.printStackTrace();
                    System.exit(-1);
                }
                log.info(tag + ": " + finalText.substring(start, end));
            } else {
                log.info("Couldn't find end pattern!");
            }
            m = startPattern.matcher(finalText);
        }
        editorPane.revalidate();
        editorPane.repaint();
    } else {
        String untaggedContents = editorPane.getText();
        if (untaggedContents == null) {
            untaggedContents = "";
        }
        taggedContents = classifier.classifyWithInlineXML(untaggedContents);
        Set<String> tags = classifier.labels();
        String background = classifier.backgroundSymbol();
        StringBuilder tagPattern = new StringBuilder();
        for (String tag : tags) {
            if (background.equals(tag)) {
                continue;
            }
            if (tagPattern.length() > 0) {
                tagPattern.append('|');
            }
            tagPattern.append(tag);
        }
        Pattern startPattern = Pattern.compile("<(" + tagPattern + ")>");
        Pattern endPattern = Pattern.compile("</(" + tagPattern + ")>");
        String finalText = taggedContents;
        Matcher m = startPattern.matcher(finalText);
        while (m.find()) {
            String tag = m.group(1);
            String color = colorToHTML(tagToColorMap.get(tag));
            String newTag = "<span style=\"background-color: " + color + "; color: white\">";
            finalText = m.replaceFirst(newTag);
            int start = m.start() + newTag.length();
            Matcher m1 = endPattern.matcher(finalText);
            m1.find(m.end());
            String entity = finalText.substring(start, m1.start());
            log.info(tag + ": " + entity);
            finalText = m1.replaceFirst("</span>");
            m = startPattern.matcher(finalText);
        }
        // System.out.println(finalText);
        editorPane.setText(finalText);
        editorPane.revalidate();
        editorPane.repaint();
    // log.info(finalText);
    }
    saveTaggedAs.setEnabled(true);
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher)

Example 33 with Pattern

use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.

the class ChineseQuantifiableEntityNormalizer method normalizeDateString.

/**
   * Normalizes date strings.
   * @param s Input date string
   * @param ctxdate Context date (usually doc_date)
   * @return Normalized Timex expression of the input date string
     */
public static String normalizeDateString(String s, String ctxdate) {
    // TODO [pengqi]: need to handle basic localization ("在七月二日到[八日]间")
    // TODO [pengqi]: need to handle literal numeral dates (usually used in events, e.g. "三一五" for 03-15)
    // TODO [pengqi]: might need to add a pattern for centuries ("上世纪90年代")?
    Pattern p;
    Matcher m;
    String ctxyear = "XXXX", ctxmonth = "XX", ctxday = "XX";
    // set up context date
    if (ctxdate != null) {
        p = Pattern.compile("^" + BASIC_YYYYMMDD_PATTERN + "$");
        m = p.matcher(ctxdate);
        if (m.find() && m.groupCount() == 3) {
            ctxyear = m.group(1);
            ctxmonth = m.group(2);
            ctxday = m.group(3);
        }
    }
    p = Pattern.compile("^" + BIRTH_DECADE_PATTERN + "$");
    m = p.matcher(s);
    if (m.find() && m.groupCount() == 1) {
        StringBuilder res = new StringBuilder();
        res.append(normalizeYear(m.group(1), ctxyear, true).substring(0, 3) + "X");
        res.append("-XX-XX");
        return res.toString();
    }
    p = Pattern.compile("^" + RELATIVE_TIME_PATTERN + "$");
    m = p.matcher(s);
    if (m.find() && m.groupCount() == 1) {
        StringBuilder res = new StringBuilder();
        res.append(ctxyear);
        res.append("-");
        res.append(ctxmonth);
        res.append("-");
        res.append(normalizeMonthOrDay(m.group(1), ctxday));
        return res.toString();
    }
    p = Pattern.compile("^" + BASIC_YYYYMMDD_PATTERN + "$");
    m = p.matcher(s);
    if (m.find() && m.groupCount() == 3) {
        StringBuilder res = new StringBuilder();
        res.append(normalizeYear(m.group(1), ctxyear));
        res.append("-");
        res.append(normalizeMonthOrDay(m.group(2), ctxmonth));
        res.append("-");
        res.append(normalizeMonthOrDay(m.group(3), ctxday));
        return res.toString();
    }
    p = Pattern.compile("^" + BASIC_MMDD_PATTERN + "$");
    m = p.matcher(s);
    if (m.find() && m.groupCount() == 2) {
        StringBuilder res = new StringBuilder();
        res.append(ctxyear);
        res.append("-");
        res.append(normalizeMonthOrDay(m.group(1), ctxmonth));
        res.append("-");
        res.append(normalizeMonthOrDay(m.group(2), ctxday));
        return res.toString();
    }
    p = Pattern.compile("^" + BASIC_DD_PATTERN + "$");
    m = p.matcher(s);
    if (m.find() && m.groupCount() == 1) {
        StringBuilder res = new StringBuilder();
        res.append(ctxyear);
        res.append("-");
        res.append(ctxmonth);
        res.append("-");
        res.append(normalizeMonthOrDay(m.group(1), ctxday));
        return res.toString();
    }
    p = Pattern.compile("^" + ENGLISH_MMDDYYYY_PATTERN + "$");
    m = p.matcher(s);
    if (m.find() && m.groupCount() == 3) {
        StringBuilder res = new StringBuilder();
        if (m.group(3) == null)
            res.append(ctxyear);
        else
            res.append(normalizeYear(m.group(3), ctxyear));
        res.append("-");
        res.append(normalizeMonthOrDay(m.group(1), ctxmonth));
        res.append("-");
        res.append(normalizeMonthOrDay(m.group(2), ctxday));
        return res.toString();
    }
    return s;
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher)

Example 34 with Pattern

use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.

the class ISODateInstance method tokenizeDate.

/* -------------------------- Tokenization and Field Extraction -------------------------- */
//These methods are taken directly from or modified slightly from {@link DateInstance}
private void tokenizeDate(String inputDate) {
    tokens = new ArrayList<>();
    Pattern pat = Pattern.compile("[-]");
    if (inputDate == null) {
        System.out.println("Null input date");
    }
    Matcher m = pat.matcher(inputDate);
    String str = m.replaceAll(" - ");
    str = str.replaceAll(",", " ");
    PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(new BufferedReader(new StringReader(str)));
    while (tokenizer.hasNext()) {
        Word nextToken = tokenizer.next();
        tokens.add(nextToken.toString());
    }
    if (DEBUG) {
        System.out.println("tokens:" + tokens);
    }
}
Also used : Pattern(java.util.regex.Pattern) Word(edu.stanford.nlp.ling.Word) Matcher(java.util.regex.Matcher) BufferedReader(java.io.BufferedReader) StringReader(java.io.StringReader)

Example 35 with Pattern

use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.

the class UnitPrefix method loadPrefixes.

public static List<UnitPrefix> loadPrefixes(String filename) throws IOException {
    Pattern commaPattern = Pattern.compile("\\s*,\\s*");
    BufferedReader br = IOUtils.getBufferedFileReader(filename);
    String headerString = br.readLine();
    String[] header = commaPattern.split(headerString);
    Map<String, Integer> headerIndex = new HashMap<>();
    for (int i = 0; i < header.length; i++) {
        headerIndex.put(header[i], i);
    }
    int iName = headerIndex.get("name");
    int iPrefix = headerIndex.get("prefix");
    int iBase = headerIndex.get("base");
    int iExp = headerIndex.get("exp");
    int iSystem = headerIndex.get("system");
    String line;
    List<UnitPrefix> list = new ArrayList<>();
    while ((line = br.readLine()) != null) {
        String[] fields = commaPattern.split(line);
        double base = Double.parseDouble(fields[iBase]);
        double exp = Double.parseDouble(fields[iExp]);
        double scale = Math.pow(base, exp);
        UnitPrefix unitPrefix = new UnitPrefix(fields[iName], fields[iPrefix], scale, fields[iSystem]);
        list.add(unitPrefix);
    }
    br.close();
    return list;
}
Also used : Pattern(java.util.regex.Pattern) HashMap(java.util.HashMap) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList)

Aggregations

Pattern (java.util.regex.Pattern)3181 Matcher (java.util.regex.Matcher)2116 ArrayList (java.util.ArrayList)387 IOException (java.io.IOException)247 Test (org.junit.Test)238 File (java.io.File)193 HashMap (java.util.HashMap)163 BufferedReader (java.io.BufferedReader)127 Field (java.lang.reflect.Field)119 PatternSyntaxException (java.util.regex.PatternSyntaxException)119 Map (java.util.Map)110 List (java.util.List)93 HashSet (java.util.HashSet)79 InputStreamReader (java.io.InputStreamReader)67 InputStream (java.io.InputStream)43 FileReader (java.io.FileReader)41 FileInputStream (java.io.FileInputStream)40 URL (java.net.URL)35 SmallTest (android.test.suitebuilder.annotation.SmallTest)31 LinkedHashMap (java.util.LinkedHashMap)31