Search in sources :

Example 16 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class ColumnFormatReader method outputToColumnFormatFile.

public static void outputToColumnFormatFile(TextAnnotation ta, String viewName, String outputFilePath) {
    List<Integer> sentence_ends = new ArrayList<Integer>();
    for (int i = 0; i < ta.getNumberOfSentences(); i++) {
        sentence_ends.add(ta.getSentence(i).getEndSpan());
    }
    String outputContent = "";
    View tokenView = ta.getView(ViewNames.TOKENS);
    View constituentView = ta.getView(viewName);
    for (Constituent token : tokenView) {
        String consTag = "O";
        String xh = "x";
        String zh = "0";
        String oh = "O";
        List<Constituent> constituentList = constituentView.getConstituentsCoveringToken(token.getStartSpan());
        if (constituentList.size() > 0) {
            Constituent hit = constituentList.get(0);
            if (hit.getStartSpan() == token.getStartSpan()) {
                consTag = "B-" + hit.getLabel();
            } else {
                consTag = "I-" + hit.getLabel();
            }
        }
        outputContent += consTag + "\t" + zh + "\t" + token.getStartSpan() + "\t" + oh + "\t" + oh + "\t" + token.toString() + "\t" + xh + "\t" + xh + "\t" + zh + "\n";
        if (sentence_ends.contains(token.getStartSpan())) {
            outputContent += "\n";
        }
    }
    try {
        FileOutputStream out = null;
        out = new FileOutputStream(outputFilePath);
        out.write(outputContent.getBytes());
        out.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 17 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class ExtentTester method getFullMention.

/**
 * Gets the full mention of the given  head
 * @param classifier The extent classifier
 * @param head The head Constituent
 * @param gazetteers gazetteers
 * @param brownClusters brownclusters
 * @param wordnet wordnet
 * @return A Constituent of a full mention (extent included)
 * @Note The returned Constituent has Attributes "EntityHeadStartSpan" and "EntityHeadEndSpan"
 */
public static Constituent getFullMention(extent_classifier classifier, Constituent head, Gazetteers gazetteers, BrownClusters brownClusters, WordNetManager wordnet) {
    addHeadAttributes(head, gazetteers, brownClusters, wordnet);
    View tokenView = head.getTextAnnotation().getView(ViewNames.TOKENS);
    int leftIdx = head.getStartSpan() - 1;
    while (leftIdx >= tokenView.getStartSpan()) {
        Constituent cur = tokenView.getConstituentsCoveringToken(leftIdx).get(0);
        addExtentAttributes(cur, gazetteers, brownClusters, wordnet);
        Relation candidate = new Relation("UNKNOWN", cur, head, 1.0f);
        String prediction = classifier.discreteValue(candidate);
        if (prediction.equals("false")) {
            leftIdx++;
            break;
        }
        leftIdx--;
    }
    if (leftIdx < tokenView.getStartSpan()) {
        leftIdx = tokenView.getStartSpan();
    }
    int rightIdx = head.getEndSpan();
    while (rightIdx < tokenView.getEndSpan()) {
        Constituent cur = tokenView.getConstituentsCoveringToken(rightIdx).get(0);
        addExtentAttributes(cur, gazetteers, brownClusters, wordnet);
        Relation candidate = new Relation("UNKNOWN", cur, head, 1.0f);
        String prediction = classifier.discreteValue(candidate);
        if (prediction.equals("false")) {
            rightIdx--;
            break;
        }
        rightIdx++;
    }
    if (rightIdx >= tokenView.getEndSpan()) {
        rightIdx = tokenView.getEndSpan() - 1;
    }
    Constituent fullMention = new Constituent(head.getLabel(), 1.0f, ViewNames.MENTION, head.getTextAnnotation(), leftIdx, rightIdx + 1);
    fullMention.addAttribute("EntityHeadStartSpan", Integer.toString(head.getStartSpan()));
    fullMention.addAttribute("EntityHeadEndSpan", Integer.toString(head.getEndSpan()));
    fullMention.addAttribute("EntityType", head.getAttribute("EntityType"));
    fullMention.addAttribute("EntityMentionType", head.getAttribute("EntityMentionType"));
    return fullMention;
}
Also used : Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 18 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class LemmatizerTATest method testCreateTextAnnotationLemmaView.

@Test
public void testCreateTextAnnotationLemmaView() {
    View lemmaView = null;
    TextAnnotation ta = inputTa;
    try {
        lemmaView = lem.createLemmaView(ta);
    } catch (IOException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    boolean isTested = false;
    if (null != lemmaView) {
        List<Constituent> spans = lemmaView.getConstituents();
        printConstituents(System.out, spans);
        // orig 'The'
        String the = spans.get(0).getLabel();
        // orig 'men'
        String CIA = spans.get(1).getLabel();
        // orig 'have'
        String thought = spans.get(2).getLabel();
        // orig 'had'
        String had = spans.get(6).getLabel();
        // orig 'examinations'
        String were = spans.get(15).getLabel();
        assertEquals(the, "the");
        assertEquals(CIA, "cia");
        assertEquals(thought, "think");
        assertEquals(had, "have");
        assertEquals(were, "be");
        isTested = true;
    }
    assertTrue(isTested);
}
Also used : IOException(java.io.IOException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Example 19 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class IllinoisLemmatizer method createLemmaView.

/**
 * create a Lemma view in the TextAnnotation argument, and return a reference to that View.
 */
public View createLemmaView(TextAnnotation inputTa) throws IOException {
    String[] toks = inputTa.getTokens();
    TokenLabelView lemmaView = new TokenLabelView(ViewNames.LEMMA, NAME, inputTa, 1.0);
    for (int i = 0; i < toks.length; ++i) {
        String lemma = getLemma(inputTa, i);
        Constituent lemmaConstituent = new Constituent(lemma, ViewNames.LEMMA, inputTa, i, i + 1);
        lemmaView.addConstituent(lemmaConstituent);
    }
    inputTa.addView(ViewNames.LEMMA, lemmaView);
    return lemmaView;
}
Also used : TokenLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 20 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class Contains method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent instance) throws EdisonException {
    Set<Feature> features = new LinkedHashSet<Feature>();
    TextAnnotation ta = instance.getTextAnnotation();
    View view = ta.getView(viewName);
    List<Constituent> lsc = view.getConstituentsCovering(instance);
    if (lsc.size() == 0) {
        features.add(N);
        return features;
    }
    boolean contains = false;
    for (Constituent c : lsc) if (contained.contains(c.getTokenizedSurfaceForm()) || contained.contains(c.getLabel())) {
        contains = true;
        break;
    }
    if (contains)
        features.add(Y);
    else
        features.add(N);
    return features;
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Aggregations

Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)227 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)121 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)66 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)44 Test (org.junit.Test)43 ArrayList (java.util.ArrayList)37 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)28 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)25 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)22 LinkedHashSet (java.util.LinkedHashSet)22 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)21 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)20 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)18 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)18 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)17 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)14 HashSet (java.util.HashSet)13