Search in sources :

Example 1 with Word

use of edu.illinois.cs.cogcomp.lbjava.nlp.Word in project cogcomp-nlp by CogComp.

the class POSBracketToToken method next.

/**
     * Returns the next labeled word in the data.
     */
public Object next() {
    if (currentWord == null) {
        LinkedVector vector = (LinkedVector) super.next();
        while (vector != null && vector.size() == 0) vector = (LinkedVector) super.next();
        if (vector == null)
            return null;
        Word w = (Word) vector.get(0);
        Token t = currentWord = new Token(w, null, w.partOfSpeech);
        t.partOfSpeech = null;
        while (w.next != null) {
            w = (Word) w.next;
            t.next = new Token(w, t, w.partOfSpeech);
            t.partOfSpeech = null;
            t = (Token) t.next;
        }
    }
    Token result = currentWord;
    currentWord = (Token) currentWord.next;
    return result;
}
Also used : Word(edu.illinois.cs.cogcomp.lbjava.nlp.Word) LinkedVector(edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector)

Example 2 with Word

use of edu.illinois.cs.cogcomp.lbjava.nlp.Word in project cogcomp-nlp by CogComp.

the class PlainToTokenParser method next.

/**
     * This method returns {@link Token}s until the input is exhausted, at
     * which point it returns <code>null</code>.
     **/
public Object next() {
    while (next == null) {
        LinkedVector words = (LinkedVector) parser.next();
        if (words == null)
            return null;
        Word w = (Word) words.get(0);
        Token t = new Token(w, null, null);
        for (w = (Word) w.next; w != null; w = (Word) w.next) {
            t.next = new Token(w, t, null);
            t = (Token) t.next;
        }
        LinkedVector tokens = new LinkedVector(t);
        next = (Token) tokens.get(0);
    }
    Token result = next;
    next = (Token) next.next;
    return result;
}
Also used : Word(edu.illinois.cs.cogcomp.lbjava.nlp.Word) LinkedVector(edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector)

Example 3 with Word

use of edu.illinois.cs.cogcomp.lbjava.nlp.Word in project cogcomp-nlp by CogComp.

the class WordsToTokens method convert.

/**
     * Given a {@link LinkedVector} containing {@link Word}s, this method
     * creates a new {@link LinkedVector} containing {@link Token}s.
     *
     * @param v A {@link LinkedVector} of {@link Word}s.
     * @return A {@link LinkedVector} of {@link Token}s corresponding to the
     * input {@link Word}s.
     **/
public static LinkedVector convert(LinkedVector v) {
    if (v == null)
        return null;
    if (v.size() == 0)
        return v;
    Word w = (Word) v.get(0);
    Token t = new Token(w, null, null);
    for (w = (Word) w.next; w != null; w = (Word) w.next) {
        t.next = new Token(w, t, null);
        t = (Token) t.next;
    }
    return new LinkedVector(t);
}
Also used : Word(edu.illinois.cs.cogcomp.lbjava.nlp.Word) LinkedVector(edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector)

Example 4 with Word

use of edu.illinois.cs.cogcomp.lbjava.nlp.Word in project cogcomp-nlp by CogComp.

the class Decoder method nullifyTaggerCachedFields.

/*
     * Lbj does some pretty annoying caching. We need this method for the beamsearch and the
     * viterbi.
     */
public static void nullifyTaggerCachedFields(SparseNetworkLearner tagger) {
    NEWord w = new NEWord(new Word("lala1"), null, "O");
    w.parts = new String[0];
    NEWord[] words = { new NEWord(w, null, "O"), new NEWord(w, null, "O"), new NEWord(w, null, "O"), new NEWord(w, null, "O"), new NEWord(w, null, "O"), new NEWord(w, null, "O"), new NEWord(w, null, "O") };
    for (int i = 1; i < words.length; i++) {
        words[i].parts = new String[0];
        words[i].previous = words[i - 1];
        words[i].previousIgnoreSentenceBoundary = words[i - 1];
        words[i - 1].next = words[i];
        words[i - 1].nextIgnoreSentenceBoundary = words[i];
    }
    for (NEWord word : words) word.neTypeLevel1 = word.neTypeLevel2 = "O";
    tagger.classify(words[3]);
}
Also used : NEWord(edu.illinois.cs.cogcomp.ner.LbjTagger.NEWord) Word(edu.illinois.cs.cogcomp.lbjava.nlp.Word) NEWord(edu.illinois.cs.cogcomp.ner.LbjTagger.NEWord)

Example 5 with Word

use of edu.illinois.cs.cogcomp.lbjava.nlp.Word in project cogcomp-nlp by CogComp.

the class NEWord method addTokenToSentence.

/**
     * Add the provided token to the sentence, for also do any additional word spliting.
     *
     * @param sentence the sentence to add the word to.
     * @param token the individual token.
     * @param tag the tag to annotate the word with.
     */
public static void addTokenToSentence(LinkedVector sentence, String token, String tag) {
    NEWord word = new NEWord(new Word(token), null, tag);
    addTokenToSentence(sentence, word);
}
Also used : Word(edu.illinois.cs.cogcomp.lbjava.nlp.Word)

Aggregations

Word (edu.illinois.cs.cogcomp.lbjava.nlp.Word)15 LinkedVector (edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector)9 SentenceSplitter (edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter)5 WordSplitter (edu.illinois.cs.cogcomp.lbjava.nlp.WordSplitter)3 Parser (edu.illinois.cs.cogcomp.lbjava.parse.Parser)3 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)2 Sentence (edu.illinois.cs.cogcomp.lbjava.nlp.Sentence)2 NEWord (edu.illinois.cs.cogcomp.ner.LbjTagger.NEWord)2 ArrayList (java.util.ArrayList)2 Chunker (edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker)1 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)1 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)1 Classifier (edu.illinois.cs.cogcomp.lbjava.classify.Classifier)1 PlainToTokenParser (edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser)1 Token (edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)1 POSTagger (edu.illinois.cs.cogcomp.pos.lbjava.POSTagger)1 BufferedReader (java.io.BufferedReader)1 FileNotFoundException (java.io.FileNotFoundException)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1