Search in sources :

Example 66 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class PrepSRLAnnotator method addView.

@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
    List<Constituent> candidates = new ArrayList<>();
    for (Constituent c : ta.getView(ViewNames.TOKENS).getConstituents()) {
        int tokenId = c.getStartSpan();
        if (PrepSRLDataReader.isPrep(ta, tokenId))
            candidates.add(c.cloneForNewViewWithDestinationLabel(viewName, DataReader.CANDIDATE));
        // Now check bigrams & trigrams
        Constituent multiWordPrep = PrepSRLDataReader.isBigramPrep(ta, tokenId, viewName);
        if (multiWordPrep != null)
            candidates.add(multiWordPrep);
        multiWordPrep = PrepSRLDataReader.isTrigramPrep(ta, tokenId, viewName);
        if (multiWordPrep != null)
            candidates.add(multiWordPrep);
    }
    SpanLabelView prepositionLabelView = new SpanLabelView(viewName, viewName + "-annotator", ta, 1.0, true);
    for (Constituent c : candidates) {
        String role = classifier.discreteValue(c);
        if (!role.equals(DataReader.CANDIDATE))
            prepositionLabelView.addSpanLabel(c.getStartSpan(), c.getEndSpan(), role, 1.0);
    }
    ta.addView(viewName, prepositionLabelView);
}
Also used : ArrayList(java.util.ArrayList) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 67 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class SentencePipelineTest method testFailingPosFile.

@Test
public void testFailingPosFile() {
    String text = null;
    try {
        text = LineIO.slurp(POS_FILE);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    TextAnnotation ta = null;
    try {
        ta = sentenceProcessor.createAnnotatedTextAnnotation("testPos", "tesPos", text);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    Constituent s = ta.getView(ViewNames.SENTENCE).getConstituents().get(3);
    List<Constituent> posConstituentsInThirdSent = ta.getView(ViewNames.POS).getConstituentsOverlappingCharSpan(s.getStartCharOffset(), s.getEndCharOffset());
    List<Constituent> toksInThirdSent = ta.getView(ViewNames.TOKENS).getConstituentsOverlappingCharSpan(s.getStartCharOffset(), s.getEndCharOffset());
    assertTrue(posConstituentsInThirdSent.size() > 0);
    assertEquals(toksInThirdSent.size(), posConstituentsInThirdSent.size());
}
Also used : FileNotFoundException(java.io.FileNotFoundException) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Example 68 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class LBJavaUtils method recordToLBJTokens.

/**
     * Converts a record into LBJ Tokens for use with LBJ classifiers. If part of speech is present
     * in record, it is added to the LBJ tokens.
     */
public static List<Token> recordToLBJTokens(TextAnnotation record) {
    List<Token> lbjTokens = new LinkedList<>();
    List<List<String>> sentences = tokensAsStrings(record.getView(ViewNames.TOKENS).getConstituents(), record.getView(ViewNames.SENTENCE).getConstituents(), record.getText());
    List<Constituent> tags = null;
    if (record.hasView(ViewNames.POS))
        tags = record.getView(ViewNames.POS).getConstituents();
    int tagIndex = 0;
    for (List<String> sentence : sentences) {
        boolean opendblquote = true;
        Word wprevious = null;
        Token tprevious = null;
        for (String token : sentence) {
            if (token.equals("\"")) {
                token = opendblquote ? "``" : "''";
                opendblquote = !opendblquote;
            } else if (token.equals("(")) {
                token = "-LRB-";
            } else if (token.equals(")")) {
                token = "-RRB-";
            } else if (token.equals("{")) {
                token = "-LCB-";
            } else if (token.equals("}")) {
                token = "-RCB-";
            } else if (token.equals("[")) {
                token = "-LSB-";
            } else if (token.equals("]")) {
                token = "-RSB-";
            }
            Word wcurrent = new Word(token, wprevious);
            if (null != tags && !tags.isEmpty()) {
                Constituent tag = tags.get(tagIndex++);
                wcurrent.partOfSpeech = tag.getLabel();
            }
            Token tcurrent = new Token(wcurrent, tprevious, "");
            lbjTokens.add(tcurrent);
            if (tprevious != null) {
                tprevious.next = tcurrent;
            }
            wprevious = wcurrent;
            tprevious = tcurrent;
        }
    }
    return lbjTokens;
}
Also used : Word(edu.illinois.cs.cogcomp.lbjava.nlp.Word) Token(edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token) List(java.util.List) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 69 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class LBJavaUtils method tokensAsStrings.

/**
     * Converts sentences and tokens represented as spans into a list of lists of string.
     */
public static List<List<String>> tokensAsStrings(List<Constituent> tokens, List<Constituent> sentences, String rawText) {
    List<List<String>> strTokens = new ArrayList<>();
    int sentNum = 0;
    Constituent sentence = sentences.get(sentNum);
    strTokens.add(new ArrayList<String>());
    for (Constituent token : tokens) {
        if (token.getStartSpan() >= sentence.getEndSpan()) {
            strTokens.add(new ArrayList<String>());
            sentNum++;
            sentence = sentences.get(sentNum);
        }
        strTokens.get(sentNum).add(rawText.substring(token.getStartCharOffset(), token.getEndCharOffset()));
    }
    return strTokens;
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 70 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class NERAnnotatorTest method evaluatePerformance.

/**
     * Make sure it runs in reasonable time. We will test the performance of the machine we run on
     * to get a better measure.
     */
// @Test
public void evaluatePerformance() {
    // now do performance.
    final int SIZE = 100;
    // make sure any lazy loading is done outside the performance test.
    TextAnnotation tat = tab.createTextAnnotation(TEST_INPUT);
    try {
        getView(tat);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    long expectedPerformance = this.measureMachinePerformance();
    logger.info("Expect " + expectedPerformance);
    {
        TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
        View view = null;
        try {
            view = getView(ta);
        } catch (AnnotatorException e) {
            e.printStackTrace();
            fail(e.getMessage());
        }
        assertTrue(view != null);
    }
    // start the performance test.
    long start = System.currentTimeMillis();
    for (int i = 0; i < SIZE; i++) {
        TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
        View view = null;
        try {
            view = getView(ta);
        } catch (AnnotatorException e) {
            e.printStackTrace();
            fail(e.getMessage());
        }
        assertTrue(view != null);
        for (Constituent c : view.getConstituents()) {
            assertTrue("No entity named \"" + c.toString() + "\"", entities.contains(c.toString()));
        }
    }
    start = System.currentTimeMillis() - start;
    start /= SIZE;
    System.out.printf("For text size = %d, average NER runtime = %d, normalized = %f", TEST_INPUT.length(), start, (double) start / (double) expectedPerformance);
    assertTrue(start <= expectedPerformance);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Aggregations

Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)176 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)95 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)51 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)44 Test (org.junit.Test)39 ArrayList (java.util.ArrayList)29 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)25 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)24 LinkedHashSet (java.util.LinkedHashSet)22 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)20 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)20 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)17 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)14 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)13 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)12 HashSet (java.util.HashSet)12 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)11