Search in sources :

Example 1 with Sentence

use of edu.stanford.nlp.ling.Sentence in project lucida by claritylab.

the class StanfordParser method getPCFGScore.

/**
	 * Parses a sentence and returns the PCFG score as a confidence measure.
	 * 
	 * @param sentence a sentence
	 * @return PCFG score
	 */
@SuppressWarnings("unchecked")
public static double getPCFGScore(String sentence) {
    if (tlp == null || parser == null)
        throw new RuntimeException("Parser has not been initialized");
    // parse the sentence to produce PCFG score
    log.debug("Parsing sentence");
    double score;
    synchronized (parser) {
        Tokenizer tokenizer = tlp.getTokenizerFactory().getTokenizer(new StringReader(sentence));
        List<Word> words = tokenizer.tokenize();
        log.debug("Tokenization: " + words);
        parser.parse(new Sentence(words));
        score = parser.getPCFGScore();
    }
    return score;
}
Also used : Word(edu.stanford.nlp.ling.Word) StringReader(java.io.StringReader) Tokenizer(edu.stanford.nlp.process.Tokenizer) Sentence(edu.stanford.nlp.ling.Sentence)

Example 2 with Sentence

use of edu.stanford.nlp.ling.Sentence in project lucida by claritylab.

the class StanfordParser method parse.

/**
     * Parses a sentence and returns a string representation of the parse tree.
     * 
     * @param sentence a sentence
     * @return Tree whose Label is a MapLabel containing correct begin and end
     * character offsets in keys BEGIN_KEY and END_KEY
     */
@SuppressWarnings("unchecked")
public static String parse(String sentence) {
    if (tlp == null || parser == null)
        throw new RuntimeException("Parser has not been initialized");
    // parse the sentence to produce stanford Tree
    log.debug("Parsing sentence");
    Tree tree = null;
    synchronized (parser) {
        Tokenizer tokenizer = tlp.getTokenizerFactory().getTokenizer(new StringReader(sentence));
        List<Word> words = tokenizer.tokenize();
        log.debug("Tokenization: " + words);
        parser.parse(new Sentence(words));
        tree = parser.getBestParse();
    }
    return tree.toString().replaceAll(" \\[[\\S]+\\]", "");
}
Also used : Word(edu.stanford.nlp.ling.Word) StringReader(java.io.StringReader) Tree(edu.stanford.nlp.trees.Tree) Tokenizer(edu.stanford.nlp.process.Tokenizer) Sentence(edu.stanford.nlp.ling.Sentence)

Example 3 with Sentence

use of edu.stanford.nlp.ling.Sentence in project lucida by claritylab.

the class StanfordPosTagger method tagPos.

/**
	 * Tags the tokens with part of speech
	 * 
	 * @param tokens Array of token strings
	 * @return Part of speech tags
	 */
public static String[] tagPos(String[] tokens) {
    Sentence untagged = createSentence(tokens);
    Sentence tagged = MaxentTagger.tagSentence(untagged);
    String[] pos = new String[tagged.size()];
    for (int i = 0; i < tagged.size(); i++) {
        HasWord w = (HasWord) tagged.get(i);
        String[] s = w.toString().split("/");
        if (s.length > 1)
            pos[i] = s[s.length - 1];
        else
            pos[i] = "";
    }
    return pos;
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) Sentence(edu.stanford.nlp.ling.Sentence)

Example 4 with Sentence

use of edu.stanford.nlp.ling.Sentence in project lucida by claritylab.

the class StanfordPosTagger method createSentence.

/**
	 * Combines the tokens into a <code>Sentence</code> 
	 * 
	 * @param tokens
	 * @return <code>Sentence</code> made of the tokens
	 */
@SuppressWarnings("unchecked")
private static Sentence createSentence(String[] tokens) {
    ArrayList<HasWord> wordList = new ArrayList<HasWord>();
    for (String s : tokens) {
        HasWord w = new Word(s);
        wordList.add(w);
    }
    Sentence sentence = new Sentence();
    sentence.setWords(wordList);
    return sentence;
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) Word(edu.stanford.nlp.ling.Word) HasWord(edu.stanford.nlp.ling.HasWord) ArrayList(java.util.ArrayList) Sentence(edu.stanford.nlp.ling.Sentence)

Example 5 with Sentence

use of edu.stanford.nlp.ling.Sentence in project lucida by claritylab.

the class StanfordPosTagger method tokenize.

/**
	 * Splits the sentence into individual tokens.
	 * 
	 * @param sentence Input sentence
	 * @return Array of tokens
	 */
public static String[] tokenize(String sentence) {
    List t = MaxentTagger.tokenizeText(new StringReader(sentence));
    List<String> tokens = new ArrayList<String>();
    for (int j = 0; j < t.size(); j++) {
        Sentence s1 = (Sentence) t.get(j);
        for (int i = 0; i < s1.length(); i++) {
            HasWord w = s1.getHasWord(i);
            tokens.add(w.word());
        }
    }
    return (String[]) tokens.toArray(new String[tokens.size()]);
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) StringReader(java.io.StringReader) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) Sentence(edu.stanford.nlp.ling.Sentence)

Aggregations

Sentence (edu.stanford.nlp.ling.Sentence)5 HasWord (edu.stanford.nlp.ling.HasWord)3 Word (edu.stanford.nlp.ling.Word)3 StringReader (java.io.StringReader)3 Tokenizer (edu.stanford.nlp.process.Tokenizer)2 ArrayList (java.util.ArrayList)2 Tree (edu.stanford.nlp.trees.Tree)1 List (java.util.List)1