Search in sources :

Example 6 with View

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.

the class EREDocumentReader method createAndAddXmlMarkupAnnotations.

/**
     * create a view with constituents representing post boundaries and quotations.
     * For each constituent, the label is the span type; attribute AUTHOR specifies the post or quote author name,
     *    and attributes NAME_START and NAME_END specify the name offsets in the original xml text
     *
     * @param xmlTa an XmlTextAnnotation containing information to use for an POST_ERE view.
     */
private void createAndAddXmlMarkupAnnotations(XmlTextAnnotation xmlTa) {
    List<XmlDocumentProcessor.SpanInfo> markup = xmlTa.getXmlMarkup();
    TextAnnotation ta = xmlTa.getTextAnnotation();
    View postView = new View(getPostViewName(), NAME, ta, 1.0);
    for (XmlDocumentProcessor.SpanInfo spanInfo : markup) {
        String label = spanInfo.label;
        Pair<String, IntPair> authorInfo = null;
        boolean isPost = false;
        if (POST.equals(label)) {
            isPost = true;
            authorInfo = spanInfo.attributes.get(AUTHOR);
        } else if (QUOTE.equals(label)) {
            isPost = true;
            authorInfo = spanInfo.attributes.get(ORIG_AUTHOR);
        }
        if (isPost) {
            IntPair cleanTextOffsets = new IntPair(xmlTa.getXmlSt().computeModifiedOffsetFromOriginal(spanInfo.spanOffsets.getFirst()), xmlTa.getXmlSt().computeModifiedOffsetFromOriginal(spanInfo.spanOffsets.getSecond()));
            if (-1 == cleanTextOffsets.getFirst() || -1 == cleanTextOffsets.getSecond())
                throw new IllegalStateException("could not compute cleanText offsets for " + label + " span with offsets " + spanInfo.spanOffsets.getFirst() + ", " + spanInfo.spanOffsets.getSecond());
            int tokStart = ta.getTokenIdFromCharacterOffset(cleanTextOffsets.getFirst());
            int tokEnd = ta.getTokenIdFromCharacterOffset(cleanTextOffsets.getSecond());
            assert (tokStart >= 0 && tokEnd >= 0 && tokEnd > tokStart);
            Constituent c = new Constituent(label, getPostViewName(), ta, tokStart, tokEnd);
            if (null != authorInfo) {
                c.addAttribute(AUTHOR, authorInfo.getFirst());
                c.addAttribute(NAME_START, Integer.toString(authorInfo.getSecond().getFirst()));
                c.addAttribute(NAME_END, Integer.toString(authorInfo.getSecond().getSecond()));
                postView.addConstituent(c);
            }
        }
    }
    if (!postView.getConstituents().isEmpty())
        ta.addView(getPostViewName(), postView);
}
Also used : XmlDocumentProcessor(edu.illinois.cs.cogcomp.core.utilities.XmlDocumentProcessor) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) XmlTextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 7 with View

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.

the class SimpleGazetteerAnnotator method addView.

/**
     * The view will consist of potentially overlapping constituents representing those tokens that
     * matched entries in the gazetteers. Some tokens will match against several gazetteers.
     */
@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
    View view = ta.getView(ViewNames.TOKENS);
    List<Constituent> constituents = view.getConstituents();
    SpanLabelView slv = new SpanLabelView(this.getViewName(), this.getClass().getName(), ta, 1d, true);
    for (int constindx = 0; constindx < constituents.size(); constindx++) {
        for (int dictindx = 0; dictindx < dictionaries.size(); dictindx++) {
            dictionaries.get(dictindx).match(constituents, constindx, slv);
            dictionariesIgnoreCase.get(dictindx).match(constituents, constindx, slv);
        }
    }
    ta.addView(slv.getViewName(), slv);
}
Also used : SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 8 with View

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.

the class POSWindowTwo method getFeatures.

@Override
public /**
     * This feature extractor assumes that the TOKEN View, POS View have been
     * generated in the Constituents TextAnnotation. It will use its own POS tag of the
     * two context words before and after the constituent.
     *
     **/
Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    View TOKENS = null, POS = null;
    try {
        TOKENS = ta.getView(ViewNames.TOKENS);
        POS = ta.getView(ViewNames.POS);
    } catch (Exception e) {
        e.printStackTrace();
    }
    // We can assume that the constituent in this case is a Word(Token) described by the LBJ
    // chunk definition
    int startspan = c.getStartSpan();
    int endspan = c.getEndSpan();
    // All our constituents are words(tokens)
    // words two before & after
    int k = 2;
    String[] tags = getwindowtagskfrom(TOKENS, POS, startspan, endspan, k);
    String classifier = "POSWindowTwo";
    String __id, __value;
    Set<Feature> __result = new LinkedHashSet<Feature>();
    for (int i = 0; i < tags.length; i++) {
        if (tags[i] == null) {
            continue;
        } else {
            __id = classifier + ":" + i;
            __value = "(" + tags[i] + ")";
            logger.info(__id + __value);
            __result.add(new DiscreteFeature(__id + __value));
        }
    }
    return __result;
}
Also used : DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) RealFeature(edu.illinois.cs.cogcomp.edison.features.RealFeature) IOException(java.io.IOException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 9 with View

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.

the class TestSrlNomIdentifier method test.

/**
     * Only in and out relations in the SRL_VERB view are used for the purpose of testing.
     */
public final void test() throws Exception {
    logger.info("Nom_Identifier Feature Extractor");
    String[] viewsToAdd = { ViewNames.POS, ViewNames.LEMMA, ViewNames.SHALLOW_PARSE, ViewNames.PARSE_GOLD, ViewNames.SRL_VERB, ViewNames.PARSE_STANFORD, ViewNames.NER_CONLL };
    TextAnnotation ta = DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation(viewsToAdd, true, 3);
    ta.addView(ClauseViewGenerator.STANFORD);
    ta.addView(PseudoParse.STANFORD);
    logger.info("This textannotation annotates the text: \n" + ta.getText());
    View SRL_VERB = ta.getView("SRL_VERB");
    List<Constituent> testlist = SRL_VERB.getConstituentsCoveringSpan(10, 13);
    testlist.addAll(SRL_VERB.getConstituentsCoveringSpan(26, 27));
    FeatureManifest featureManifest;
    FeatureExtractor fex;
    String fileName = Constant.prefix + "/Nom/Identifier/nom-identifier.fex";
    featureManifest = new FeatureManifest(new FileInputStream(fileName));
    FeatureManifest.setFeatureExtractor("hyphen-argument-feature", FeatureGenerators.hyphenTagFeature);
    FeatureManifest.setTransformer("parse-left-sibling", FeatureGenerators.getParseLeftSibling(ViewNames.PARSE_STANFORD));
    FeatureManifest.setTransformer("parse-right-sibling", FeatureGenerators.getParseRightSibling(ViewNames.PARSE_STANFORD));
    FeatureManifest.setFeatureExtractor("pp-features", FeatureGenerators.ppFeatures(ViewNames.PARSE_STANFORD));
    FeatureManifest.setFeatureExtractor("projected-path", new ProjectedPath(ViewNames.PARSE_STANFORD));
    featureManifest.useCompressedName();
    featureManifest.setVariable("*default-parser*", ViewNames.PARSE_STANFORD);
    fex = featureManifest.createFex();
    SrlNomIdentifier ni = new SrlNomIdentifier();
    for (Constituent test : testlist) {
        assertTrue(SRLFeaturesComparator.isEqual(test, fex, ni));
    }
}
Also used : FeatureExtractor(edu.illinois.cs.cogcomp.edison.features.FeatureExtractor) FeatureManifest(edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest) ProjectedPath(edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) FileInputStream(java.io.FileInputStream)

Example 10 with View

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.

the class TestCorlex method test.

public final void test() throws EdisonException {
    log.debug("Corlex Feature Extractor");
    // Using the first TA and a constituent between span of 30-40 as a test
    TextAnnotation ta = tas.get(1);
    View TOKENS = ta.getView("TOKENS");
    log.debug("Got tokens FROM TextAnnotation");
    CorelexFeatureExtractor testInstance = new CorelexFeatureExtractor(true);
    Set<Feature> feats = testInstance.getWordFeatures(ta, 1);
    String[] expected_outputs = { "atr" };
    if (feats == null) {
        log.debug("Feats are returning NULL.");
    }
    log.debug("Printing Set of Features");
    for (Feature f : feats) {
        log.debug(f.getName());
        assertTrue(ArrayUtils.contains(expected_outputs, f.getName()));
    }
}
Also used : CorelexFeatureExtractor(edu.illinois.cs.cogcomp.edison.features.factory.CorelexFeatureExtractor) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Feature(edu.illinois.cs.cogcomp.edison.features.Feature)

Aggregations

View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)64 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)51 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)49 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)22 Test (org.junit.Test)21 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)16 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)7 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)7 ArrayList (java.util.ArrayList)7 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)6 LinkedHashSet (java.util.LinkedHashSet)6 Set (java.util.Set)6 POSBaseLineCounter (edu.illinois.cs.cogcomp.edison.utilities.POSBaseLineCounter)5 POSMikheevCounter (edu.illinois.cs.cogcomp.edison.utilities.POSMikheevCounter)5 IOException (java.io.IOException)5 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)4 JsonObject (com.google.gson.JsonObject)3