use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.
the class CorefScorer method getFinalConllScore.
public static double getFinalConllScore(String summary) {
Pattern f1 = Pattern.compile("Coreference:.*F1: (.*)%");
Matcher f1Matcher = f1.matcher(summary);
double[] F1s = new double[5];
int i = 0;
while (f1Matcher.find()) {
F1s[i++] = Double.parseDouble(f1Matcher.group(1));
}
double finalScore = (F1s[0] + F1s[1] + F1s[3]) / 3;
return finalScore;
}
use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.
the class NERGUI method extract.
private void extract() {
log.info("content type: " + editorPane.getContentType());
if (!editorPane.getContentType().equals("text/html")) {
DefaultStyledDocument doc = (DefaultStyledDocument) editorPane.getDocument();
String text = null;
try {
text = doc.getText(0, doc.getLength());
} catch (Exception e) {
e.printStackTrace();
}
String labeledText = classifier.classifyWithInlineXML(text);
taggedContents = labeledText;
Set<String> tags = classifier.labels();
String background = classifier.backgroundSymbol();
StringBuilder tagPattern = new StringBuilder();
for (String tag : tags) {
if (background.equals(tag)) {
continue;
}
if (tagPattern.length() > 0) {
tagPattern.append('|');
}
tagPattern.append(tag);
}
Pattern startPattern = Pattern.compile("<(" + tagPattern + ")>");
Pattern endPattern = Pattern.compile("</(" + tagPattern + ")>");
String finalText = labeledText;
Matcher m = startPattern.matcher(finalText);
while (m.find()) {
int start = m.start();
finalText = m.replaceFirst("");
m = endPattern.matcher(finalText);
if (m.find()) {
int end = m.start();
String tag = m.group(1);
finalText = m.replaceFirst("");
AttributeSet attSet = getAttributeSet(tag);
try {
String entity = finalText.substring(start, end);
doc.setCharacterAttributes(start, entity.length(), attSet, false);
} catch (Exception ex) {
ex.printStackTrace();
System.exit(-1);
}
log.info(tag + ": " + finalText.substring(start, end));
} else {
log.info("Couldn't find end pattern!");
}
m = startPattern.matcher(finalText);
}
editorPane.revalidate();
editorPane.repaint();
} else {
String untaggedContents = editorPane.getText();
if (untaggedContents == null) {
untaggedContents = "";
}
taggedContents = classifier.classifyWithInlineXML(untaggedContents);
Set<String> tags = classifier.labels();
String background = classifier.backgroundSymbol();
StringBuilder tagPattern = new StringBuilder();
for (String tag : tags) {
if (background.equals(tag)) {
continue;
}
if (tagPattern.length() > 0) {
tagPattern.append('|');
}
tagPattern.append(tag);
}
Pattern startPattern = Pattern.compile("<(" + tagPattern + ")>");
Pattern endPattern = Pattern.compile("</(" + tagPattern + ")>");
String finalText = taggedContents;
Matcher m = startPattern.matcher(finalText);
while (m.find()) {
String tag = m.group(1);
String color = colorToHTML(tagToColorMap.get(tag));
String newTag = "<span style=\"background-color: " + color + "; color: white\">";
finalText = m.replaceFirst(newTag);
int start = m.start() + newTag.length();
Matcher m1 = endPattern.matcher(finalText);
m1.find(m.end());
String entity = finalText.substring(start, m1.start());
log.info(tag + ": " + entity);
finalText = m1.replaceFirst("</span>");
m = startPattern.matcher(finalText);
}
// System.out.println(finalText);
editorPane.setText(finalText);
editorPane.revalidate();
editorPane.repaint();
// log.info(finalText);
}
saveTaggedAs.setEnabled(true);
}
use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.
the class ChineseQuantifiableEntityNormalizer method normalizeDateString.
/**
* Normalizes date strings.
* @param s Input date string
* @param ctxdate Context date (usually doc_date)
* @return Normalized Timex expression of the input date string
*/
public static String normalizeDateString(String s, String ctxdate) {
// TODO [pengqi]: need to handle basic localization ("在七月二日到[八日]间")
// TODO [pengqi]: need to handle literal numeral dates (usually used in events, e.g. "三一五" for 03-15)
// TODO [pengqi]: might need to add a pattern for centuries ("上世纪90年代")?
Pattern p;
Matcher m;
String ctxyear = "XXXX", ctxmonth = "XX", ctxday = "XX";
// set up context date
if (ctxdate != null) {
p = Pattern.compile("^" + BASIC_YYYYMMDD_PATTERN + "$");
m = p.matcher(ctxdate);
if (m.find() && m.groupCount() == 3) {
ctxyear = m.group(1);
ctxmonth = m.group(2);
ctxday = m.group(3);
}
}
p = Pattern.compile("^" + BIRTH_DECADE_PATTERN + "$");
m = p.matcher(s);
if (m.find() && m.groupCount() == 1) {
StringBuilder res = new StringBuilder();
res.append(normalizeYear(m.group(1), ctxyear, true).substring(0, 3) + "X");
res.append("-XX-XX");
return res.toString();
}
p = Pattern.compile("^" + RELATIVE_TIME_PATTERN + "$");
m = p.matcher(s);
if (m.find() && m.groupCount() == 1) {
StringBuilder res = new StringBuilder();
res.append(ctxyear);
res.append("-");
res.append(ctxmonth);
res.append("-");
res.append(normalizeMonthOrDay(m.group(1), ctxday));
return res.toString();
}
p = Pattern.compile("^" + BASIC_YYYYMMDD_PATTERN + "$");
m = p.matcher(s);
if (m.find() && m.groupCount() == 3) {
StringBuilder res = new StringBuilder();
res.append(normalizeYear(m.group(1), ctxyear));
res.append("-");
res.append(normalizeMonthOrDay(m.group(2), ctxmonth));
res.append("-");
res.append(normalizeMonthOrDay(m.group(3), ctxday));
return res.toString();
}
p = Pattern.compile("^" + BASIC_MMDD_PATTERN + "$");
m = p.matcher(s);
if (m.find() && m.groupCount() == 2) {
StringBuilder res = new StringBuilder();
res.append(ctxyear);
res.append("-");
res.append(normalizeMonthOrDay(m.group(1), ctxmonth));
res.append("-");
res.append(normalizeMonthOrDay(m.group(2), ctxday));
return res.toString();
}
p = Pattern.compile("^" + BASIC_DD_PATTERN + "$");
m = p.matcher(s);
if (m.find() && m.groupCount() == 1) {
StringBuilder res = new StringBuilder();
res.append(ctxyear);
res.append("-");
res.append(ctxmonth);
res.append("-");
res.append(normalizeMonthOrDay(m.group(1), ctxday));
return res.toString();
}
p = Pattern.compile("^" + ENGLISH_MMDDYYYY_PATTERN + "$");
m = p.matcher(s);
if (m.find() && m.groupCount() == 3) {
StringBuilder res = new StringBuilder();
if (m.group(3) == null)
res.append(ctxyear);
else
res.append(normalizeYear(m.group(3), ctxyear));
res.append("-");
res.append(normalizeMonthOrDay(m.group(1), ctxmonth));
res.append("-");
res.append(normalizeMonthOrDay(m.group(2), ctxday));
return res.toString();
}
return s;
}
use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.
the class ISODateInstance method tokenizeDate.
/* -------------------------- Tokenization and Field Extraction -------------------------- */
//These methods are taken directly from or modified slightly from {@link DateInstance}
private void tokenizeDate(String inputDate) {
tokens = new ArrayList<>();
Pattern pat = Pattern.compile("[-]");
if (inputDate == null) {
System.out.println("Null input date");
}
Matcher m = pat.matcher(inputDate);
String str = m.replaceAll(" - ");
str = str.replaceAll(",", " ");
PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(new BufferedReader(new StringReader(str)));
while (tokenizer.hasNext()) {
Word nextToken = tokenizer.next();
tokens.add(nextToken.toString());
}
if (DEBUG) {
System.out.println("tokens:" + tokens);
}
}
use of java.util.regex.Pattern in project CoreNLP by stanfordnlp.
the class UnitPrefix method loadPrefixes.
public static List<UnitPrefix> loadPrefixes(String filename) throws IOException {
Pattern commaPattern = Pattern.compile("\\s*,\\s*");
BufferedReader br = IOUtils.getBufferedFileReader(filename);
String headerString = br.readLine();
String[] header = commaPattern.split(headerString);
Map<String, Integer> headerIndex = new HashMap<>();
for (int i = 0; i < header.length; i++) {
headerIndex.put(header[i], i);
}
int iName = headerIndex.get("name");
int iPrefix = headerIndex.get("prefix");
int iBase = headerIndex.get("base");
int iExp = headerIndex.get("exp");
int iSystem = headerIndex.get("system");
String line;
List<UnitPrefix> list = new ArrayList<>();
while ((line = br.readLine()) != null) {
String[] fields = commaPattern.split(line);
double base = Double.parseDouble(fields[iBase]);
double exp = Double.parseDouble(fields[iExp]);
double scale = Math.pow(base, exp);
UnitPrefix unitPrefix = new UnitPrefix(fields[iName], fields[iPrefix], scale, fields[iSystem]);
list.add(unitPrefix);
}
br.close();
return list;
}
Aggregations