Search in sources :

Example 6 with Tree

use of edu.cmu.lti.chineseNLP.util.Tree in project lucida by claritylab.

the class FocusFinder method findFocusWord.

/**
     * Given a sentence, builds a parse tree using Charniak's parser, and 
     * then uses the resulting parse tree to find the focus words.
     * 
     * @param question the input question
     * @return the focus word as a String or null, if one does not exist
     */
public static String findFocusWord(String question) {
    try {
        String treeStr = StanfordParser.parse(question);
        log.debug("Parse: " + treeStr);
        Tree t = findFocusNode(TreeHelper.buildTree(treeStr, Tree.ENGLISH));
        if (t != null) {
            log.debug("Focus: " + TreeHelper.getLeaves(t));
            return TreeHelper.getLeaves(t);
        }
        return null;
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
}
Also used : Tree(edu.cmu.lti.chineseNLP.util.Tree)

Example 7 with Tree

use of edu.cmu.lti.chineseNLP.util.Tree in project lucida by claritylab.

the class FocusFinder method getHeadWordOrPhrase.

/**
     * Extracts the head word or phrase from the given Tree node, which is assumed
     * to be an NP.  Whether a phrase should be the head is determined by looking up
     * in WordNet all the possible phrases that can be constructed from the immediate children 
     * of the input Tree and which include the right-most child.
     * 
     * @param tree the Tree node from which to extract the head word or phrase
     */
public static Tree getHeadWordOrPhrase(Tree tree) {
    TreeHelper.markHeadNode(tree);
    //Tree headChild = tree.getChild(tree.getHeadNodeChildIndex()); // can return null
    Tree headChild = tree.getHeadNode();
    if (!headChild.isPreterminal())
        return getHeadWordOrPhrase(headChild);
    List<Tree> pretermChildren = new ArrayList<Tree>();
    for (Tree child : tree.getChildren()) {
        if (child.isPreterminal() && !child.getLabel().equals("DT"))
            pretermChildren.add(child);
    }
    for (ListIterator<Tree> it = pretermChildren.listIterator(); it.hasNext(); ) {
        Tree t = it.next();
        StringBuilder phrase = new StringBuilder();
        List<Tree> nodes = new ArrayList<Tree>();
        nodes.add(t);
        phrase.append(t.getHeadWord() + " ");
        for (ListIterator<Tree> it2 = pretermChildren.listIterator(it.nextIndex()); it2.hasNext(); ) {
            Tree t2 = it2.next();
            phrase.append(t2.getHeadWord() + " ");
            nodes.add(t2);
        }
        String phr = phrase.toString().trim();
        int phrSpaces = 0;
        Matcher m = Pattern.compile(" ").matcher(phr);
        while (m.find()) phrSpaces++;
        try {
            IndexWord indexWord = Dictionary.getInstance().lookupIndexWord(POS.NOUN, phr);
            if (indexWord == null)
                throw new Exception("Failed to get index word");
            int wrdSpaces = 0;
            Matcher m2 = Pattern.compile(" ").matcher(indexWord.getLemma());
            while (m2.find()) wrdSpaces++;
            if (wrdSpaces != phrSpaces)
                continue;
        } catch (Exception e) {
            continue;
        }
        if (nodes.size() == 1)
            return nodes.get(0);
        else
            return Tree.newNode("NP", nodes);
    }
    return tree.getHeadNode();
}
Also used : Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) Tree(edu.cmu.lti.chineseNLP.util.Tree) IndexWord(net.didion.jwnl.data.IndexWord)

Example 8 with Tree

use of edu.cmu.lti.chineseNLP.util.Tree in project lucida by claritylab.

the class FocusFinder method initialize.

/**
     * Initializes static resources. The input properties which must be defined are:
     * <ul>
     *   <li> edu.cmu.lti.javelin.qa.english.FocusFinder.treeTemplatesFile : &nbsp; 
     *   the location of a file containing tree templates to use.  Each tree template
     *   must be specified on one line.  Blank lines and lines beginning with "#" 
     *   are ignored.  A tree template is a parenthesized syntactic parse tree which
     *   can be used as an underspecified template tree to unify with a real syntactic
     *   parse tree.  See {@link edu.cmu.lti.chineseNLP.util.TreeHelper#extractNode 
     *   TreeHelper.extractNode} for more details.
     * </ul>
     * @throws Exception if the required input property is not defined
     */
public static void initialize() throws Exception {
    // return if already initialized
    if (isInitialized())
        return;
    Properties properties = Properties.loadFromClassName(FocusFinder.class.getName());
    // initialize JWNL
    if (!JWNL.isInitialized()) {
        String file_properties = System.getProperty("jwnl.configuration");
        if (file_properties == null)
            throw new Exception("Required property 'jwnl.configuration' is undefined");
        JWNL.initialize(new FileInputStream(file_properties));
    }
    // load tree templates file
    treeTemplatesFile = properties.getProperty("treeTemplatesFile");
    if (treeTemplatesFile == null)
        throw new Exception("Required property treeTemplatesFile is undefined");
    BufferedReader in = new BufferedReader(new FileReader(treeTemplatesFile));
    String line;
    treeTemplates = new ArrayList<Tree>();
    while ((line = in.readLine()) != null) {
        if (line.matches("#.*") || line.matches("\\s*"))
            continue;
        treeTemplates.add(TreeHelper.buildTree(line, Tree.ENGLISH));
    }
    in.close();
    setInitialized(true);
}
Also used : BufferedReader(java.io.BufferedReader) Tree(edu.cmu.lti.chineseNLP.util.Tree) FileReader(java.io.FileReader) Properties(info.ephyra.util.Properties) FileInputStream(java.io.FileInputStream)

Example 9 with Tree

use of edu.cmu.lti.chineseNLP.util.Tree in project lucida by claritylab.

the class FocusFinder method findFocusTerm.

/**
     * Given a sentence, builds a parse tree using Charniak's parser, and 
     * then uses the resulting parse tree to find the focus words.
     * 
     * @param question the input question
     * @return the focus word as a Term or null, if one does not exist
     */
public static Term findFocusTerm(String question) {
    try {
        Tree t = findFocusNode(TreeHelper.buildTree(StanfordParser.parse(question), Tree.ENGLISH));
        if (t != null) {
            Term res = new Term(0, 0, TreeHelper.getLeaves(t));
            res.setPOS(t.getLabel());
            return res;
        }
        return null;
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
}
Also used : Tree(edu.cmu.lti.chineseNLP.util.Tree) Term(edu.cmu.lti.javelin.qa.Term)

Example 10 with Tree

use of edu.cmu.lti.chineseNLP.util.Tree in project lucida by claritylab.

the class FocusFinder method main.

/**
     * Extracts and prints out the focus word for each question, given a file
     * of questions.  
     * 
     * @param args command-line args: "&lt;propertiesFile&gt; &lt;inputQuestionsFile&gt;"
     */
public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.out.println("USAGE: FocusFinder <inputQuestionsFile>");
        System.out.println("Output stored in: <inputQuestionsFile>.output");
        System.exit(0);
    }
    FocusFinder.initialize();
    StanfordParser.initialize();
    List<String> questions = new ArrayList<String>();
    BufferedReader in = new BufferedReader(new FileReader(args[0]));
    BufferedWriter out = new BufferedWriter(new FileWriter(args[0] + ".output"));
    String question;
    while ((question = in.readLine()) != null) {
        questions.add(question);
    }
    for (String q : questions) {
        Tree t = TreeHelper.buildTree(StanfordParser.parse(q), Tree.ENGLISH);
        TreeHelper.markHeadNode(t);
        String focus = findFocusWord(t);
        if (focus == null)
            focus = "-";
        out.append("FOCUS." + focus + "  " + q + "\n");
    }
    out.close();
    in.close();
}
Also used : FileWriter(java.io.FileWriter) ArrayList(java.util.ArrayList) BufferedReader(java.io.BufferedReader) Tree(edu.cmu.lti.chineseNLP.util.Tree) FileReader(java.io.FileReader) BufferedWriter(java.io.BufferedWriter)

Aggregations

Tree (edu.cmu.lti.chineseNLP.util.Tree)11 Term (edu.cmu.lti.javelin.qa.Term)6 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2 ArrayList (java.util.ArrayList)2 Matcher (java.util.regex.Matcher)2 IndexWord (net.didion.jwnl.data.IndexWord)2 Feature (edu.cmu.minorthird.classify.Feature)1 MutableInstance (edu.cmu.minorthird.classify.MutableInstance)1 Properties (info.ephyra.util.Properties)1 BufferedWriter (java.io.BufferedWriter)1 FileInputStream (java.io.FileInputStream)1 FileWriter (java.io.FileWriter)1