Search in sources :

Example 6 with Token

use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.

the class LBJavaUtils method recordToLBJTokens.

/**
     * Converts a record into LBJ Tokens for use with LBJ classifiers. If part of speech is present
     * in record, it is added to the LBJ tokens.
     */
public static List<Token> recordToLBJTokens(TextAnnotation record) {
    List<Token> lbjTokens = new LinkedList<>();
    List<List<String>> sentences = tokensAsStrings(record.getView(ViewNames.TOKENS).getConstituents(), record.getView(ViewNames.SENTENCE).getConstituents(), record.getText());
    List<Constituent> tags = null;
    if (record.hasView(ViewNames.POS))
        tags = record.getView(ViewNames.POS).getConstituents();
    int tagIndex = 0;
    for (List<String> sentence : sentences) {
        boolean opendblquote = true;
        Word wprevious = null;
        Token tprevious = null;
        for (String token : sentence) {
            if (token.equals("\"")) {
                token = opendblquote ? "``" : "''";
                opendblquote = !opendblquote;
            } else if (token.equals("(")) {
                token = "-LRB-";
            } else if (token.equals(")")) {
                token = "-RRB-";
            } else if (token.equals("{")) {
                token = "-LCB-";
            } else if (token.equals("}")) {
                token = "-RCB-";
            } else if (token.equals("[")) {
                token = "-LSB-";
            } else if (token.equals("]")) {
                token = "-RSB-";
            }
            Word wcurrent = new Word(token, wprevious);
            if (null != tags && !tags.isEmpty()) {
                Constituent tag = tags.get(tagIndex++);
                wcurrent.partOfSpeech = tag.getLabel();
            }
            Token tcurrent = new Token(wcurrent, tprevious, "");
            lbjTokens.add(tcurrent);
            if (tprevious != null) {
                tprevious.next = tcurrent;
            }
            wprevious = wcurrent;
            tprevious = tcurrent;
        }
    }
    return lbjTokens;
}
Also used : Word(edu.illinois.cs.cogcomp.lbjava.nlp.Word) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token) List(java.util.List) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 7 with Token

use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.

the class TestPOSModels method testAccuracy.

/**
     * Tags the unlabeled data and compares the part-of-speech tags with the labeled data, keeping
     * track of and reporting total accuracy at the end.
     */
public void testAccuracy() {
    WordForm __wordForm = new WordForm();
    Parser labeledParser = new POSBracketToToken(labeledTestFile);
    int numSeen = 0;
    int numEqual = 0;
    Token labeledWord = (Token) labeledParser.next();
    for (; labeledWord != null; labeledWord = (Token) labeledParser.next()) {
        String labeledTag = labeledWord.label;
        String testTag = tagger.discreteValue(labeledWord);
        if (labeledTag.equals(testTag)) {
            numEqual++;
        }
        numSeen++;
    }
    System.out.println("Total accuracy over " + numSeen + " items: " + String.format("%.2f", 100.0 * (double) numEqual / (double) numSeen) + "%");
}
Also used : POSBracketToToken(edu.illinois.cs.cogcomp.lbjava.nlp.seg.POSBracketToToken) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token) POSBracketToToken(edu.illinois.cs.cogcomp.lbjava.nlp.seg.POSBracketToToken) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser)

Example 8 with Token

use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.

the class POSTaggerUnknown method cachedFeatureValue.

private Feature cachedFeatureValue(Object __example) {
    Token w = (Token) __example;
    String __cachedValue = w.partOfSpeech;
    if (__cachedValue != null) {
        return new DiscretePrimitiveStringFeature(containingPackage, name, "", __cachedValue, valueIndexOf(__cachedValue), (short) allowableValues().length);
    }
    Feature __result;
    __result = valueOf(w, __MikheevTable.allowableTags(w));
    w.partOfSpeech = __result.getStringValue();
    return __result;
}
Also used : DiscretePrimitiveStringFeature(edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token) Feature(edu.illinois.cs.cogcomp.lbjava.classify.Feature) DiscretePrimitiveStringFeature(edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature)

Example 9 with Token

use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.

the class POSWindow method classify.

public FeatureVector classify(Object __example) {
    if (!(__example instanceof Token)) {
        String type = __example == null ? "null" : __example.getClass().getName();
        System.err.println("Classifier 'POSWindow(Token)' defined on line 31 of POS.lbj received '" + type + "' as input.");
        new Exception().printStackTrace();
        System.exit(1);
    }
    Token word = (Token) __example;
    FeatureVector __result;
    __result = new FeatureVector();
    String __id;
    String __value;
    int i;
    Token w = word, last = word;
    for (i = 0; i <= 2 && last != null; ++i) {
        last = (Token) last.next;
    }
    for (i = 0; i > -2 && w.previous != null; --i) {
        w = (Token) w.previous;
    }
    for (; w != last; w = (Token) w.next) {
        __id = "" + (i++);
        __value = "" + (__POSTagger.discreteValue(w));
        __result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
    }
    return __result;
}
Also used : Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)

Example 10 with Token

use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.

the class POSAnnotator method addView.

/**
     * annotates TextAnnotation with POS view and adds it to the TextAnnotation.
     *
     * @param record TextAnnotation to annotate
     */
@Override
public void addView(TextAnnotation record) throws AnnotatorException {
    if (!record.hasView(tokensfield) && !record.hasView(sentencesfield)) {
        throw new AnnotatorException("Record must be tokenized and sentence split first");
    }
    long startTime = System.currentTimeMillis();
    List<Token> input = LBJavaUtils.recordToLBJTokens(record);
    List<Constituent> tokens = record.getView(ViewNames.TOKENS).getConstituents();
    TokenLabelView posView = new TokenLabelView(ViewNames.POS, getAnnotatorName(), record, 1.0);
    int tcounter = 0;
    for (Token lbjtoken : input) {
        tagger.discreteValue(lbjtoken);
        Constituent token = tokens.get(tcounter);
        Constituent label = new Constituent(tagger.discreteValue(lbjtoken), ViewNames.POS, record, token.getStartSpan(), token.getEndSpan());
        posView.addConstituent(label);
        tcounter++;
    }
    long endTime = System.currentTimeMillis();
    logger.debug("Tagged input in {}ms", endTime - startTime);
    record.addView(ViewNames.POS, posView);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)

Aggregations

Token (edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)19 DiscretePrimitiveStringFeature (edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature)8 FeatureVector (edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector)5 Parser (edu.illinois.cs.cogcomp.lbjava.parse.Parser)4 Feature (edu.illinois.cs.cogcomp.lbjava.classify.Feature)3 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)2 SentenceSplitter (edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter)2 WordSplitter (edu.illinois.cs.cogcomp.lbjava.nlp.WordSplitter)2 PlainToTokenParser (edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser)2 LinkedVector (edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector)2 Test (org.junit.Test)2 Chunker (edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker)1 CoNLL2000Parser (edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser)1 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)1 Word (edu.illinois.cs.cogcomp.lbjava.nlp.Word)1 POSBracketToToken (edu.illinois.cs.cogcomp.lbjava.nlp.seg.POSBracketToToken)1 ChildrenFromVectors (edu.illinois.cs.cogcomp.lbjava.parse.ChildrenFromVectors)1 POSTagger (edu.illinois.cs.cogcomp.pos.lbjava.POSTagger)1 ArrayList (java.util.ArrayList)1 LinkedList (java.util.LinkedList)1