Search in sources :

Example 11 with Token

use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.

the class POSTaggerKnown method cachedFeatureValue.

private Feature cachedFeatureValue(Object __example) {
    Token w = (Token) __example;
    String __cachedValue = w.partOfSpeech;
    if (__cachedValue != null) {
        return new DiscretePrimitiveStringFeature(containingPackage, name, "", __cachedValue, valueIndexOf(__cachedValue), (short) allowableValues().length);
    }
    Feature __result;
    __result = valueOf(w, baselineTarget.allowableTags(wordForm.discreteValue(w)));
    w.partOfSpeech = __result.getStringValue();
    return __result;
}
Also used : DiscretePrimitiveStringFeature(edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token) Feature(edu.illinois.cs.cogcomp.lbjava.classify.Feature) DiscretePrimitiveStringFeature(edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature)

Example 12 with Token

use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.

the class TestDiff method testDiff.

@Test
public void testDiff() {
    POSTagger tagger = new POSTagger();
    Parser parser = new PlainToTokenParser(new WordSplitter(new SentenceSplitter(testFile)));
    String sentence = "";
    int sentenceCounter = 0;
    int tokenCounter = 0;
    int correctCounter = 0;
    for (Token word = (Token) parser.next(); word != null; word = (Token) parser.next()) {
        String tag = tagger.discreteValue(word);
        if (refTags.get(tokenCounter).equals(tag)) {
            correctCounter++;
        }
        tokenCounter++;
    }
    double result = ((double) correctCounter) / tokenCounter;
    if (result < thresholdAcc) {
        fail("Tagger performance is insufficient: " + "\nProduced: " + result + "\nExpected: " + thresholdAcc);
    }
}
Also used : SentenceSplitter(edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter) PlainToTokenParser(edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token) WordSplitter(edu.illinois.cs.cogcomp.lbjava.nlp.WordSplitter) POSTagger(edu.illinois.cs.cogcomp.pos.lbjava.POSTagger) Parser(edu.illinois.cs.cogcomp.lbjava.parse.Parser) PlainToTokenParser(edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser) Test(org.junit.Test)

Example 13 with Token

use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.

the class Formpp method classify.

public FeatureVector classify(Object __example) {
    Token word = (Token) __example;
    FeatureVector __result;
    __result = new FeatureVector();
    String __id;
    String __value;
    int before = 2;
    int after = 2;
    int k = 2;
    int i;
    Token w = word, last = word;
    for (i = 0; i <= after && last != null; ++i) {
        last = (Token) last.next;
    }
    for (i = 0; i > -before && w.previous != null; --i) {
        w = (Token) w.previous;
    }
    String[] forms = new String[before + after + 1];
    i = 0;
    for (; w != last; w = (Token) w.next) {
        forms[i++] = word.form;
    }
    for (int j = 0; j < k; j++) {
        for (i = 0; i < forms.length; i++) {
            StringBuilder f = new StringBuilder();
            for (int context = 0; context <= j && i + context < forms.length; context++) {
                if (context != 0) {
                    f.append("_");
                }
                f.append(forms[i + context]);
            }
            __id = "" + (i + "_" + j);
            __value = "" + (f.toString());
            __result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
        }
    }
    return __result;
}
Also used : FeatureVector(edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector) DiscretePrimitiveStringFeature(edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)

Example 14 with Token

use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.

the class Mixed method classify.

public FeatureVector classify(Object __example) {
    Token word = (Token) __example;
    FeatureVector __result;
    __result = new FeatureVector();
    String __id;
    String __value;
    int before = 2;
    int after = 2;
    int k = 2;
    int i;
    Token w = word, last = word;
    for (i = 0; i <= after && last != null; ++i) {
        last = (Token) last.next;
    }
    for (i = 0; i > -before && w.previous != null; --i) {
        w = (Token) w.previous;
    }
    String[] tags = new String[before + after + 1];
    String[] forms = new String[before + after + 1];
    i = 0;
    for (; w != last; w = (Token) w.next) {
        tags[i] = __POSTagger.discreteValue(w);
        forms[i] = w.form;
        i++;
    }
    for (int j = 1; j < k; j++) {
        for (int x = 0; x < 2; x++) {
            boolean t = true;
            for (i = 0; i < tags.length; i++) {
                StringBuilder f = new StringBuilder();
                for (int context = 0; context <= j && i + context < tags.length; context++) {
                    if (context != 0) {
                        f.append("_");
                    }
                    if (t && x == 0) {
                        f.append(tags[i + context]);
                    } else {
                        f.append(forms[i + context]);
                    }
                    t = !t;
                }
                __id = "" + (i + "_" + j);
                __value = "" + (f.toString());
                __result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
            }
        }
    }
    return __result;
}
Also used : FeatureVector(edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector) DiscretePrimitiveStringFeature(edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)

Example 15 with Token

use of edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token in project cogcomp-nlp by CogComp.

the class POSWindowpp method classify.

public FeatureVector classify(Object __example) {
    Token word = (Token) __example;
    FeatureVector __result;
    __result = new FeatureVector();
    String __id;
    String __value;
    int before = 3;
    int after = 3;
    int k = 3;
    int i;
    Token w = word, last = word;
    for (i = 0; i <= after && last != null; ++i) {
        last = (Token) last.next;
    }
    for (i = 0; i > -before && w.previous != null; --i) {
        w = (Token) w.previous;
    }
    String[] tags = new String[before + after + 1];
    i = 0;
    for (; w != last; w = (Token) w.next) {
        tags[i++] = __POSTagger.discreteValue(w);
    }
    for (int j = 0; j < k; j++) {
        for (i = 0; i < tags.length; i++) {
            StringBuilder f = new StringBuilder();
            for (int context = 0; context <= j && i + context < tags.length; context++) {
                if (context != 0) {
                    f.append("_");
                }
                f.append(tags[i + context]);
            }
            __id = "" + (i + "_" + j);
            __value = "" + (f.toString());
            __result.addFeature(new DiscretePrimitiveStringFeature(this.containingPackage, this.name, __id, __value, valueIndexOf(__value), (short) 0));
        }
    }
    return __result;
}
Also used : FeatureVector(edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector) DiscretePrimitiveStringFeature(edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)

Aggregations

Token (edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)19 DiscretePrimitiveStringFeature (edu.illinois.cs.cogcomp.lbjava.classify.DiscretePrimitiveStringFeature)8 FeatureVector (edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector)5 Parser (edu.illinois.cs.cogcomp.lbjava.parse.Parser)4 Feature (edu.illinois.cs.cogcomp.lbjava.classify.Feature)3 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)2 SentenceSplitter (edu.illinois.cs.cogcomp.lbjava.nlp.SentenceSplitter)2 WordSplitter (edu.illinois.cs.cogcomp.lbjava.nlp.WordSplitter)2 PlainToTokenParser (edu.illinois.cs.cogcomp.lbjava.nlp.seg.PlainToTokenParser)2 LinkedVector (edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector)2 Test (org.junit.Test)2 Chunker (edu.illinois.cs.cogcomp.chunker.main.lbjava.Chunker)1 CoNLL2000Parser (edu.illinois.cs.cogcomp.chunker.utils.CoNLL2000Parser)1 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)1 Word (edu.illinois.cs.cogcomp.lbjava.nlp.Word)1 POSBracketToToken (edu.illinois.cs.cogcomp.lbjava.nlp.seg.POSBracketToToken)1 ChildrenFromVectors (edu.illinois.cs.cogcomp.lbjava.parse.ChildrenFromVectors)1 POSTagger (edu.illinois.cs.cogcomp.pos.lbjava.POSTagger)1 ArrayList (java.util.ArrayList)1 LinkedList (java.util.LinkedList)1