use of java.util.regex.Matcher in project lucida by claritylab.
the class KnowledgeAnnotator method getContent.
/**
* Extracts the relevant content of a question by resolving the group
* identifiers of the format <code>[group_no]</code> in the content string
* that corresponds to the matching pattern.
*
* @return relevant content of the question
*/
protected String getContent() {
String content = qContents.get(index);
Pattern p = Pattern.compile("\\[(\\d*)\\]");
Matcher m = p.matcher(content);
// replace all group IDs by the corresponding parts of the question
while (m.find()) {
int group = Integer.parseInt(m.group(1));
content = content.replace(m.group(), matcher.group(group));
}
return content;
}
use of java.util.regex.Matcher in project lucida by claritylab.
the class WorldFactbookKA method doSearch.
/**
* Searches the World Factbook for country details and returns an array
* containing a single <code>Result</code> object or an empty array, if the
* search failed.
*
* @return array containing a single <code>Result</code> or an empty array
*/
protected Result[] doSearch() {
try {
// get country name and demanded information
String[] content = getContent().split("#");
String info = content[0];
String country = content[1];
// get URL of country web page
String countryPage = countries.get(country.toLowerCase());
if (countryPage == null)
return new Result[0];
URL page = new URL(URL + countryPage);
// retrieve document
BufferedReader in;
String html = "";
in = new BufferedReader(new InputStreamReader(page.openStream(), Charset.forName("iso-8859-1")));
while (in.ready()) {
html += in.readLine() + " ";
}
in.close();
// extract information
Pattern p = Pattern.compile("(?i).*" + info + ":</div>\\s*</td>" + "\\s*<td .*?>(.*?)</td>.*");
Matcher m = p.matcher(html);
if (m.matches()) {
// extract sentence
String sentence = SentenceExtractor.getSentencesFromHtml(m.group(1))[0];
// create result from that sentence
return getResult(sentence, page.toString());
}
} catch (Exception e) {
// print search error message
MsgPrinter.printSearchError(e);
}
// search failed
return new Result[0];
}
use of java.util.regex.Matcher in project lucida by claritylab.
the class CorefResolver method resolvePronounsToTarget.
/**
* Resolves references ONLY to the target description. This method is called
* once for each factoid and list question in the series.
*
* @param target
* the question series including answers to previous questions
* @param next
* the next question in the series to be answered
*/
public static void resolvePronounsToTarget(TRECTarget target, int next) {
String currentTarget = target.getCondensedTarget();
TRECQuestion[] questions = target.getQuestions();
String currentQuestionString = questions[next].getQuestionString();
String temp = isTargetPerson(currentTarget);
boolean personFlag = temp != null;
String currentTargetPerson = currentTarget;
if (personFlag) {
currentTargetPerson = temp;
}
// genitive of current Target
String currentTargetGen = null;
String currentTargetPersonGen = null;
// rest of the sentence after pronoun occured
String rest = null;
// tokenized target
String[] tokens = OpenNLP.tokenize(currentTarget);
// create genitive of currentTarget
if (currentTarget.endsWith("s")) {
currentTargetGen = currentTarget.concat("'");
} else {
currentTargetGen = currentTarget.concat("'s");
}
// create genitive of currentTargetPerson
if (currentTargetPerson.endsWith("s")) {
currentTargetPersonGen = currentTargetPerson.concat("'");
} else {
currentTargetPersonGen = currentTargetPerson.concat("'s");
}
// Collection<String> nplist = find(parse(currentTargetGen), "NP").values();
// System.out.println("-->" + nplist + ": " + nplist.size());
// if (nplist.size() > 1) {
// return;
// }
//
// String max = currentTargetGen;
//
// for (String s : nplist) {
// String curr = unparse(s);
//
// if (curr.length() < max.length()) {
// max = curr;
// }
// }
//
// currentTargetGen = max;
/*
* Resolve personal, possessive and demonstrative pronouns by the target
* as antecedent
*/
String firstPronoun = "";
int firstIndex = Integer.MAX_VALUE;
// Matcher sgpers = singularThirdPersonPronounPattern
// .matcher(currentQuestionString);
String[] splitSgpers = currentQuestionString.split(singularThirdPersonPronounString);
int firstSgpers = splitSgpers[0].length();
if (splitSgpers.length > 1 && firstSgpers < firstIndex) {
firstPronoun = "sgpers";
firstIndex = firstSgpers;
}
// Matcher sgthing = singularThirdThingPronounPattern
// .matcher(currentQuestionString);
String[] splitSgthing = currentQuestionString.split(singularThirdThingPronounString);
int firstSgthing = splitSgthing[0].length();
if (splitSgthing.length > 1 && firstSgthing < firstIndex) {
firstPronoun = "sgthing";
firstIndex = firstSgthing;
}
// Matcher plpers = pluralThirdPersonPronounPattern
// .matcher(currentQuestionString);
String[] splitPlpers = currentQuestionString.split(pluralThirdPersonPronounString);
int firstPlpers = splitPlpers[0].length();
if (splitPlpers.length > 1 && firstPlpers < firstIndex) {
firstPronoun = "plpers";
firstIndex = firstPlpers;
}
// Matcher sgposs = singularThirdPersonPronounPatternGen
// .matcher(currentQuestionString);
String[] splitSgposs = currentQuestionString.split(singularThirdPersonPronounStringGen);
int firstSgposs = splitSgposs[0].length();
if (splitSgposs.length > 1 && firstSgposs < firstIndex) {
firstPronoun = "sgposs";
firstIndex = firstSgposs;
}
// Matcher sgthingposs = singularThirdThingPronounPatternGen
// .matcher(currentQuestionString);
String[] splitSgthingposs = currentQuestionString.split(singularThirdThingPronounStringGen);
int firstSgthingposs = splitSgthingposs[0].length();
if (splitSgthingposs.length > 1 && firstSgthingposs < firstIndex) {
firstPronoun = "sgthingposs";
firstIndex = firstSgthingposs;
}
// Matcher plposs = pluralThirdPersonPronounPatternGen
// .matcher(currentQuestionString);
String[] splitPlposs = currentQuestionString.split(pluralThirdPersonPronounStringGen);
int firstPlposs = splitPlposs[0].length();
if (splitPlposs.length > 1 && firstPlposs < firstIndex) {
firstPronoun = "plposs";
firstIndex = firstPlposs;
}
Matcher her = singularThirdPersonPronounPatternAmb.matcher(currentQuestionString);
String[] splitHer = currentQuestionString.split(singularThirdPersonPronounStringAmb);
int firstHer = splitHer[0].length();
if (splitHer.length > 1 && firstPlposs < firstIndex) {
firstPronoun = "her";
firstIndex = firstHer;
}
Matcher sgdem = singularDemPronounPattern.matcher(currentQuestionString);
String[] splitSgdem = currentQuestionString.split(singularDemPronounString);
int firstSgdem = splitSgdem[0].length();
if (splitSgdem.length > 1 && firstSgdem < firstIndex) {
firstPronoun = "sgdem";
firstIndex = firstSgdem;
}
Matcher pldem = pluralDemPronounPattern.matcher(currentQuestionString);
String[] splitPldem = currentQuestionString.split(pluralDemPronounString);
int firstPldem = splitPldem[0].length();
if (splitPldem.length > 1 && firstPldem < firstIndex) {
firstPronoun = "pldem";
firstIndex = firstPldem;
}
if (personFlag && firstPronoun.equals("sgposs")) {
currentQuestionString = currentQuestionString.replaceFirst(singularThirdPersonPronounStringGen, currentTargetPersonGen);
}
if (firstPronoun.equals("sgthingposs")) {
currentQuestionString = currentQuestionString.replaceFirst(singularThirdThingPronounStringGen, currentTargetGen);
}
if (firstPronoun.equals("plposs")) {
currentQuestionString = currentQuestionString.replaceFirst(pluralThirdPersonPronounStringGen, currentTargetGen);
}
if (personFlag && firstPronoun.equals("her") && her.matches()) {
rest = currentQuestionString.substring(currentQuestionString.indexOf(her.group(2)) + her.group(2).length() + 1).toLowerCase();
String[] questionTokens = OpenNLP.tokenize(rest);
String[] pos = OpenNLP.tagPos(questionTokens);
// pronoun
if (pos[0].equalsIgnoreCase("NN")) {
currentQuestionString = currentQuestionString.replaceFirst(singularThirdPersonPronounStringAmb, currentTargetPersonGen);
} else {
currentQuestionString = currentQuestionString.replaceFirst(singularThirdPersonPronounStringAmb, currentTargetPerson);
}
}
if (firstPronoun.equals("sgdem") && sgdem.matches()) {
// check whether target contains the same word as the rest of the
// question string
rest = currentQuestionString.substring(currentQuestionString.indexOf(sgdem.group(2)) + sgdem.group(2).length() + 1).toLowerCase();
for (int i = 0; i < tokens.length; i++) {
if (rest.contains(tokens[i].toLowerCase())) {
currentQuestionString = currentQuestionString.replaceFirst(" " + tokens[i].toLowerCase() + "\\b", "");
currentQuestionString = currentQuestionString.replaceFirst("\\b" + tokens[i].toLowerCase() + " ", "");
currentQuestionString = currentQuestionString.replaceFirst(singularDemPronounString, currentTarget);
}
}
currentQuestionString = currentQuestionString.replaceFirst(singularDemPronounString, currentTargetGen);
}
if (firstPronoun.equals("pldem") && pldem.matches()) {
// check whether target contains the same word as the rest of the
// question string
rest = currentQuestionString.substring(currentQuestionString.indexOf(pldem.group(2)) + pldem.group(2).length() + 1).toLowerCase();
for (int i = 0; i < tokens.length; i++) {
if (rest.contains(tokens[i].toLowerCase())) {
currentQuestionString = currentQuestionString.replaceFirst(" " + tokens[i].toLowerCase() + "\\b", "");
currentQuestionString = currentQuestionString.replaceFirst("\\b" + tokens[i].toLowerCase() + " ", "");
currentQuestionString = currentQuestionString.replaceFirst(pluralDemPronounString, currentTarget);
}
}
currentQuestionString = currentQuestionString.replaceFirst(pluralDemPronounString, currentTargetGen);
}
if (personFlag && firstPronoun.equals("sgpers")) {
currentQuestionString = currentQuestionString.replaceFirst(singularThirdPersonPronounString, currentTargetPerson);
}
if (firstPronoun.equals("sgthing")) {
currentQuestionString = currentQuestionString.replaceFirst(singularThirdThingPronounString, currentTarget);
}
if (firstPronoun.equals("plpers")) {
currentQuestionString = currentQuestionString.replaceFirst(pluralThirdPersonPronounString, currentTarget);
}
questions[next].setQuestionString(currentQuestionString);
MsgPrinter.printResolvedQuestion(currentQuestionString);
}
use of java.util.regex.Matcher in project lucida by claritylab.
the class CorefResolver method resolvePronouns.
/**
* Resolves references to the target description, previous questions or
* answers. This method is called once for each factoid and list question in
* the series.
*
* @param target
* the question series including answers to previous questions
* @param next
* the next question in the series to be answered
*/
public static void resolvePronouns(TRECTarget target, int next) {
String currentTarget = target.getCondensedTarget();
TRECQuestion[] questions = target.getQuestions();
String currentQuestionString = questions[next].getQuestionString();
// genitive of current Target
String currentTargetGen = null;
// rest of the sentence after pronoun occured
String rest = null;
// tokenized target
String[] tokens = OpenNLP.tokenize(currentTarget);
// expected answer type - 1: thing, 2: person
int exp = 0;
// is target a person?
boolean targetPerson = false;
// is target a thing? - Not used at the moment: Too many problems
boolean targetThing = false;
// create genitive of currentTarget
if (currentTarget.endsWith("s")) {
currentTargetGen = currentTarget.concat("'");
} else {
currentTargetGen = currentTarget.concat("'s");
}
// System.out.println("Target: "+currentTarget );
// System.out.println(next+ "Original:"+currentQuestionString );
String[] targetTypes = target.getTargetTypes();
if ((targetTypes.length == 1) && (targetTypes[0] == "PERSON")) {
targetPerson = true;
}
if ((targetTypes.length != 4) && ((targetTypes[0] != "PERSON") || (targetTypes[1] != "PERSON") || (targetTypes[2] != "PERSON"))) {
targetThing = true;
}
/*
* Resolve personal, possessive and demonstrative pronouns by the target
* as antecedent
*/
Matcher sgpers = singularThirdPersonPronounPattern.matcher(currentQuestionString);
Matcher sgthing = singularThirdThingPronounPattern.matcher(currentQuestionString);
Matcher plpers = pluralThirdPersonPronounPattern.matcher(currentQuestionString);
Matcher sgposs = singularThirdPersonPronounPatternGen.matcher(currentQuestionString);
Matcher sgthingposs = singularThirdThingPronounPatternGen.matcher(currentQuestionString);
Matcher plposs = pluralThirdPersonPronounPatternGen.matcher(currentQuestionString);
Matcher her = singularThirdPersonPronounPatternAmb.matcher(currentQuestionString);
Matcher sgdem = singularDemPronounPattern.matcher(currentQuestionString);
Matcher pldem = pluralDemPronounPattern.matcher(currentQuestionString);
if (sgposs.matches()) {
exp = 2;
// if targetType is a thing, do not use it
if ((!targetThing) || (next == 0)) {
currentQuestionString = currentQuestionString.replaceAll(singularThirdPersonPronounStringGen, currentTargetGen);
} else {
if (usePreviousAnswer(questions, next, exp) != null) {
currentTarget = usePreviousAnswer(questions, next, exp);
}
currentQuestionString = currentQuestionString.replaceAll(singularThirdPersonPronounStringGen, currentTargetGen);
}
}
if (sgthingposs.matches()) {
exp = 1;
// if targetType is a person, do not use it
if ((!targetPerson) || (next == 0)) {
currentQuestionString = currentQuestionString.replaceAll(singularThirdThingPronounStringGen, currentTargetGen);
} else {
if (usePreviousAnswer(questions, next, exp) != null) {
currentTarget = usePreviousAnswer(questions, next, exp);
}
currentQuestionString = currentQuestionString.replaceAll(singularThirdThingPronounStringGen, currentTargetGen);
}
}
if (plposs.matches()) {
currentQuestionString = currentQuestionString.replaceAll(pluralThirdPersonPronounStringGen, currentTargetGen);
}
if (her.matches()) {
rest = currentQuestionString.substring(currentQuestionString.indexOf(her.group(2)) + her.group(2).length() + 1).toLowerCase();
String[] questionTokens = OpenNLP.tokenize(rest);
String[] pos = OpenNLP.tagPos(questionTokens);
// pronoun
if (pos[0].equalsIgnoreCase("NN")) {
currentQuestionString = currentQuestionString.replaceAll(singularThirdPersonPronounStringAmb, currentTargetGen);
} else {
currentQuestionString = currentQuestionString.replaceAll(singularThirdPersonPronounStringAmb, currentTarget);
}
}
if (sgdem.matches()) {
// check whether target contains the same word as the rest of the
// question string
rest = currentQuestionString.substring(currentQuestionString.indexOf(sgdem.group(2)) + sgdem.group(2).length() + 1).toLowerCase();
for (int i = 0; i < tokens.length; i++) {
if (rest.contains(tokens[i].toLowerCase())) {
currentQuestionString = currentQuestionString.replaceAll(" " + tokens[i].toLowerCase() + "\\b", "");
currentQuestionString = currentQuestionString.replaceAll("\\b" + tokens[i].toLowerCase() + " ", "");
currentQuestionString = currentQuestionString.replaceAll(singularDemPronounString, currentTarget);
}
}
currentQuestionString = currentQuestionString.replaceAll(singularDemPronounString, currentTargetGen);
}
if (pldem.matches()) {
// check whether target contains the same word as the rest of the
// question string
rest = currentQuestionString.substring(currentQuestionString.indexOf(pldem.group(2)) + pldem.group(2).length() + 1).toLowerCase();
for (int i = 0; i < tokens.length; i++) {
if (rest.contains(tokens[i].toLowerCase())) {
currentQuestionString = currentQuestionString.replaceAll(" " + tokens[i].toLowerCase() + "\\b", "");
currentQuestionString = currentQuestionString.replaceAll("\\b" + tokens[i].toLowerCase() + " ", "");
currentQuestionString = currentQuestionString.replaceAll(pluralDemPronounString, currentTarget);
}
}
currentQuestionString = currentQuestionString.replaceAll(pluralDemPronounString, currentTargetGen);
}
if (sgpers.matches()) {
exp = 2;
// replaced
if (!(checkPl(tokens)) || (targetThing) || (next == 0)) {
currentQuestionString = currentQuestionString.replaceAll(singularThirdPersonPronounString, currentTarget);
} else {
if (usePreviousAnswer(questions, next, exp) != null) {
currentTarget = usePreviousAnswer(questions, next, exp);
}
currentQuestionString = currentQuestionString.replaceAll(singularThirdPersonPronounString, currentTarget);
}
}
if (sgthing.matches()) {
exp = 1;
// replaced and is a person
if (!(checkPl(tokens)) || (targetPerson) || (next == 0)) {
currentQuestionString = currentQuestionString.replaceAll(singularThirdThingPronounString, currentTarget);
} else {
if (usePreviousAnswer(questions, next, exp) != null) {
currentTarget = usePreviousAnswer(questions, next, exp);
}
currentQuestionString = currentQuestionString.replaceAll(singularThirdThingPronounString, currentTarget);
}
}
if (plpers.matches()) {
currentQuestionString = currentQuestionString.replaceAll(pluralThirdPersonPronounString, currentTarget);
}
questions[next].setQuestionString(currentQuestionString);
// System.out.println(next+ "Replaced:" +
// questions[next].getQuestionString());
// System.out.println("#########################################################");
MsgPrinter.printResolvedQuestion(questions[next].getQuestionString());
}
use of java.util.regex.Matcher in project lucida by claritylab.
the class CorefResolver method isTargetPerson.
private static String isTargetPerson(String currentTarget) {
Matcher tgt = verifyTargetPattern.matcher(currentTarget);
if (!tgt.matches()) {
return null;
}
if (isAllUpper(currentTarget)) {
return null;
}
String[] split = currentTarget.split("\\s+");
int jc = 0;
boolean flagUpper = true;
for (String s : split) {
char c = s.charAt(0);
if (Character.isLowerCase(c)) {
if (!flagUpper) {
return null;
}
jc++;
} else {
flagUpper = false;
}
}
if (flagUpper || jc > 1) {
return null;
}
String temp = "";
for (int i = jc; i < split.length; i++) {
temp += " " + split[i];
}
return temp.substring(1);
}
Aggregations