use of edu.cmu.lti.chineseNLP.util.Tree in project lucida by claritylab.
the class FocusFinder method findFocusWord.
/**
* Given a sentence, builds a parse tree using Charniak's parser, and
* then uses the resulting parse tree to find the focus words.
*
* @param question the input question
* @return the focus word as a String or null, if one does not exist
*/
public static String findFocusWord(String question) {
try {
String treeStr = StanfordParser.parse(question);
log.debug("Parse: " + treeStr);
Tree t = findFocusNode(TreeHelper.buildTree(treeStr, Tree.ENGLISH));
if (t != null) {
log.debug("Focus: " + TreeHelper.getLeaves(t));
return TreeHelper.getLeaves(t);
}
return null;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
use of edu.cmu.lti.chineseNLP.util.Tree in project lucida by claritylab.
the class FocusFinder method getHeadWordOrPhrase.
/**
* Extracts the head word or phrase from the given Tree node, which is assumed
* to be an NP. Whether a phrase should be the head is determined by looking up
* in WordNet all the possible phrases that can be constructed from the immediate children
* of the input Tree and which include the right-most child.
*
* @param tree the Tree node from which to extract the head word or phrase
*/
public static Tree getHeadWordOrPhrase(Tree tree) {
TreeHelper.markHeadNode(tree);
//Tree headChild = tree.getChild(tree.getHeadNodeChildIndex()); // can return null
Tree headChild = tree.getHeadNode();
if (!headChild.isPreterminal())
return getHeadWordOrPhrase(headChild);
List<Tree> pretermChildren = new ArrayList<Tree>();
for (Tree child : tree.getChildren()) {
if (child.isPreterminal() && !child.getLabel().equals("DT"))
pretermChildren.add(child);
}
for (ListIterator<Tree> it = pretermChildren.listIterator(); it.hasNext(); ) {
Tree t = it.next();
StringBuilder phrase = new StringBuilder();
List<Tree> nodes = new ArrayList<Tree>();
nodes.add(t);
phrase.append(t.getHeadWord() + " ");
for (ListIterator<Tree> it2 = pretermChildren.listIterator(it.nextIndex()); it2.hasNext(); ) {
Tree t2 = it2.next();
phrase.append(t2.getHeadWord() + " ");
nodes.add(t2);
}
String phr = phrase.toString().trim();
int phrSpaces = 0;
Matcher m = Pattern.compile(" ").matcher(phr);
while (m.find()) phrSpaces++;
try {
IndexWord indexWord = Dictionary.getInstance().lookupIndexWord(POS.NOUN, phr);
if (indexWord == null)
throw new Exception("Failed to get index word");
int wrdSpaces = 0;
Matcher m2 = Pattern.compile(" ").matcher(indexWord.getLemma());
while (m2.find()) wrdSpaces++;
if (wrdSpaces != phrSpaces)
continue;
} catch (Exception e) {
continue;
}
if (nodes.size() == 1)
return nodes.get(0);
else
return Tree.newNode("NP", nodes);
}
return tree.getHeadNode();
}
use of edu.cmu.lti.chineseNLP.util.Tree in project lucida by claritylab.
the class FocusFinder method initialize.
/**
* Initializes static resources. The input properties which must be defined are:
* <ul>
* <li> edu.cmu.lti.javelin.qa.english.FocusFinder.treeTemplatesFile :
* the location of a file containing tree templates to use. Each tree template
* must be specified on one line. Blank lines and lines beginning with "#"
* are ignored. A tree template is a parenthesized syntactic parse tree which
* can be used as an underspecified template tree to unify with a real syntactic
* parse tree. See {@link edu.cmu.lti.chineseNLP.util.TreeHelper#extractNode
* TreeHelper.extractNode} for more details.
* </ul>
* @throws Exception if the required input property is not defined
*/
public static void initialize() throws Exception {
// return if already initialized
if (isInitialized())
return;
Properties properties = Properties.loadFromClassName(FocusFinder.class.getName());
// initialize JWNL
if (!JWNL.isInitialized()) {
String file_properties = System.getProperty("jwnl.configuration");
if (file_properties == null)
throw new Exception("Required property 'jwnl.configuration' is undefined");
JWNL.initialize(new FileInputStream(file_properties));
}
// load tree templates file
treeTemplatesFile = properties.getProperty("treeTemplatesFile");
if (treeTemplatesFile == null)
throw new Exception("Required property treeTemplatesFile is undefined");
BufferedReader in = new BufferedReader(new FileReader(treeTemplatesFile));
String line;
treeTemplates = new ArrayList<Tree>();
while ((line = in.readLine()) != null) {
if (line.matches("#.*") || line.matches("\\s*"))
continue;
treeTemplates.add(TreeHelper.buildTree(line, Tree.ENGLISH));
}
in.close();
setInitialized(true);
}
use of edu.cmu.lti.chineseNLP.util.Tree in project lucida by claritylab.
the class FocusFinder method findFocusTerm.
/**
* Given a sentence, builds a parse tree using Charniak's parser, and
* then uses the resulting parse tree to find the focus words.
*
* @param question the input question
* @return the focus word as a Term or null, if one does not exist
*/
public static Term findFocusTerm(String question) {
try {
Tree t = findFocusNode(TreeHelper.buildTree(StanfordParser.parse(question), Tree.ENGLISH));
if (t != null) {
Term res = new Term(0, 0, TreeHelper.getLeaves(t));
res.setPOS(t.getLabel());
return res;
}
return null;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
use of edu.cmu.lti.chineseNLP.util.Tree in project lucida by claritylab.
the class FocusFinder method main.
/**
* Extracts and prints out the focus word for each question, given a file
* of questions.
*
* @param args command-line args: "<propertiesFile> <inputQuestionsFile>"
*/
public static void main(String[] args) throws Exception {
if (args.length != 1) {
System.out.println("USAGE: FocusFinder <inputQuestionsFile>");
System.out.println("Output stored in: <inputQuestionsFile>.output");
System.exit(0);
}
FocusFinder.initialize();
StanfordParser.initialize();
List<String> questions = new ArrayList<String>();
BufferedReader in = new BufferedReader(new FileReader(args[0]));
BufferedWriter out = new BufferedWriter(new FileWriter(args[0] + ".output"));
String question;
while ((question = in.readLine()) != null) {
questions.add(question);
}
for (String q : questions) {
Tree t = TreeHelper.buildTree(StanfordParser.parse(q), Tree.ENGLISH);
TreeHelper.markHeadNode(t);
String focus = findFocusWord(t);
if (focus == null)
focus = "-";
out.append("FOCUS." + focus + " " + q + "\n");
}
out.close();
in.close();
}
Aggregations