use of edu.illinois.cs.cogcomp.lbjava.nlp.Word in project cogcomp-nlp by CogComp.
the class POSBracketToToken method next.
/**
* Returns the next labeled word in the data.
*/
public Object next() {
if (currentWord == null) {
LinkedVector vector = (LinkedVector) super.next();
while (vector != null && vector.size() == 0) vector = (LinkedVector) super.next();
if (vector == null)
return null;
Word w = (Word) vector.get(0);
Token t = currentWord = new Token(w, null, w.partOfSpeech);
t.partOfSpeech = null;
while (w.next != null) {
w = (Word) w.next;
t.next = new Token(w, t, w.partOfSpeech);
t.partOfSpeech = null;
t = (Token) t.next;
}
}
Token result = currentWord;
currentWord = (Token) currentWord.next;
return result;
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.Word in project cogcomp-nlp by CogComp.
the class PlainToTokenParser method next.
/**
* This method returns {@link Token}s until the input is exhausted, at
* which point it returns <code>null</code>.
**/
public Object next() {
while (next == null) {
LinkedVector words = (LinkedVector) parser.next();
if (words == null)
return null;
Word w = (Word) words.get(0);
Token t = new Token(w, null, null);
for (w = (Word) w.next; w != null; w = (Word) w.next) {
t.next = new Token(w, t, null);
t = (Token) t.next;
}
LinkedVector tokens = new LinkedVector(t);
next = (Token) tokens.get(0);
}
Token result = next;
next = (Token) next.next;
return result;
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.Word in project cogcomp-nlp by CogComp.
the class WordsToTokens method convert.
/**
* Given a {@link LinkedVector} containing {@link Word}s, this method
* creates a new {@link LinkedVector} containing {@link Token}s.
*
* @param v A {@link LinkedVector} of {@link Word}s.
* @return A {@link LinkedVector} of {@link Token}s corresponding to the
* input {@link Word}s.
**/
public static LinkedVector convert(LinkedVector v) {
if (v == null)
return null;
if (v.size() == 0)
return v;
Word w = (Word) v.get(0);
Token t = new Token(w, null, null);
for (w = (Word) w.next; w != null; w = (Word) w.next) {
t.next = new Token(w, t, null);
t = (Token) t.next;
}
return new LinkedVector(t);
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.Word in project cogcomp-nlp by CogComp.
the class Decoder method nullifyTaggerCachedFields.
/*
* Lbj does some pretty annoying caching. We need this method for the beamsearch and the
* viterbi.
*/
public static void nullifyTaggerCachedFields(SparseNetworkLearner tagger) {
NEWord w = new NEWord(new Word("lala1"), null, "O");
w.parts = new String[0];
NEWord[] words = { new NEWord(w, null, "O"), new NEWord(w, null, "O"), new NEWord(w, null, "O"), new NEWord(w, null, "O"), new NEWord(w, null, "O"), new NEWord(w, null, "O"), new NEWord(w, null, "O") };
for (int i = 1; i < words.length; i++) {
words[i].parts = new String[0];
words[i].previous = words[i - 1];
words[i].previousIgnoreSentenceBoundary = words[i - 1];
words[i - 1].next = words[i];
words[i - 1].nextIgnoreSentenceBoundary = words[i];
}
for (NEWord word : words) word.neTypeLevel1 = word.neTypeLevel2 = "O";
tagger.classify(words[3]);
}
use of edu.illinois.cs.cogcomp.lbjava.nlp.Word in project cogcomp-nlp by CogComp.
the class NEWord method addTokenToSentence.
/**
* Add the provided token to the sentence, for also do any additional word spliting.
*
* @param sentence the sentence to add the word to.
* @param token the individual token.
* @param tag the tag to annotate the word with.
*/
public static void addTokenToSentence(LinkedVector sentence, String token, String tag) {
NEWord word = new NEWord(new Word(token), null, tag);
addTokenToSentence(sentence, word);
}
Aggregations