use of edu.stanford.nlp.ling.Sentence in project lucida by claritylab.
the class StanfordParser method getPCFGScore.
/**
* Parses a sentence and returns the PCFG score as a confidence measure.
*
* @param sentence a sentence
* @return PCFG score
*/
@SuppressWarnings("unchecked")
public static double getPCFGScore(String sentence) {
if (tlp == null || parser == null)
throw new RuntimeException("Parser has not been initialized");
// parse the sentence to produce PCFG score
log.debug("Parsing sentence");
double score;
synchronized (parser) {
Tokenizer tokenizer = tlp.getTokenizerFactory().getTokenizer(new StringReader(sentence));
List<Word> words = tokenizer.tokenize();
log.debug("Tokenization: " + words);
parser.parse(new Sentence(words));
score = parser.getPCFGScore();
}
return score;
}
use of edu.stanford.nlp.ling.Sentence in project lucida by claritylab.
the class StanfordParser method parse.
/**
* Parses a sentence and returns a string representation of the parse tree.
*
* @param sentence a sentence
* @return Tree whose Label is a MapLabel containing correct begin and end
* character offsets in keys BEGIN_KEY and END_KEY
*/
@SuppressWarnings("unchecked")
public static String parse(String sentence) {
if (tlp == null || parser == null)
throw new RuntimeException("Parser has not been initialized");
// parse the sentence to produce stanford Tree
log.debug("Parsing sentence");
Tree tree = null;
synchronized (parser) {
Tokenizer tokenizer = tlp.getTokenizerFactory().getTokenizer(new StringReader(sentence));
List<Word> words = tokenizer.tokenize();
log.debug("Tokenization: " + words);
parser.parse(new Sentence(words));
tree = parser.getBestParse();
}
return tree.toString().replaceAll(" \\[[\\S]+\\]", "");
}
use of edu.stanford.nlp.ling.Sentence in project lucida by claritylab.
the class StanfordPosTagger method tagPos.
/**
* Tags the tokens with part of speech
*
* @param tokens Array of token strings
* @return Part of speech tags
*/
public static String[] tagPos(String[] tokens) {
Sentence untagged = createSentence(tokens);
Sentence tagged = MaxentTagger.tagSentence(untagged);
String[] pos = new String[tagged.size()];
for (int i = 0; i < tagged.size(); i++) {
HasWord w = (HasWord) tagged.get(i);
String[] s = w.toString().split("/");
if (s.length > 1)
pos[i] = s[s.length - 1];
else
pos[i] = "";
}
return pos;
}
use of edu.stanford.nlp.ling.Sentence in project lucida by claritylab.
the class StanfordPosTagger method createSentence.
/**
* Combines the tokens into a <code>Sentence</code>
*
* @param tokens
* @return <code>Sentence</code> made of the tokens
*/
@SuppressWarnings("unchecked")
private static Sentence createSentence(String[] tokens) {
ArrayList<HasWord> wordList = new ArrayList<HasWord>();
for (String s : tokens) {
HasWord w = new Word(s);
wordList.add(w);
}
Sentence sentence = new Sentence();
sentence.setWords(wordList);
return sentence;
}
use of edu.stanford.nlp.ling.Sentence in project lucida by claritylab.
the class StanfordPosTagger method tokenize.
/**
* Splits the sentence into individual tokens.
*
* @param sentence Input sentence
* @return Array of tokens
*/
public static String[] tokenize(String sentence) {
List t = MaxentTagger.tokenizeText(new StringReader(sentence));
List<String> tokens = new ArrayList<String>();
for (int j = 0; j < t.size(); j++) {
Sentence s1 = (Sentence) t.get(j);
for (int i = 0; i < s1.length(); i++) {
HasWord w = s1.getHasWord(i);
tokens.add(w.word());
}
}
return (String[]) tokens.toArray(new String[tokens.size()]);
}
Aggregations