Search in sources :

Example 1 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class SimpleGazetteerAnnotator method addView.

/**
     * The view will consist of potentially overlapping constituents representing those tokens that
     * matched entries in the gazetteers. Some tokens will match against several gazetteers.
     */
@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
    View view = ta.getView(ViewNames.TOKENS);
    List<Constituent> constituents = view.getConstituents();
    SpanLabelView slv = new SpanLabelView(this.getViewName(), this.getClass().getName(), ta, 1d, true);
    for (int constindx = 0; constindx < constituents.size(); constindx++) {
        for (int dictindx = 0; dictindx < dictionaries.size(); dictindx++) {
            dictionaries.get(dictindx).match(constituents, constindx, slv);
            dictionariesIgnoreCase.get(dictindx).match(constituents, constindx, slv);
        }
    }
    ta.addView(slv.getViewName(), slv);
}
Also used : SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 2 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class TreebankChunkReader method addChunkAnnotation.

private TextAnnotation addChunkAnnotation(TextAnnotation textAnnotation, int chunkLineId) {
    SpanLabelView chunkView = new SpanLabelView(ViewNames.SHALLOW_PARSE, "Gold", textAnnotation, 1.0);
    String currentChunkLabel = "";
    int start = -1;
    while (currentChunkLineId < chunkLines.size()) {
        String line = chunkLines.get(currentChunkLineId++);
        if (line.trim().length() == 0)
            break;
        if (line.startsWith("#")) {
            if (!line.startsWith("# Sentence"))
                continue;
            line = line.replaceAll("# Sentence ", "");
            String[] parts = line.split("/");
            String fId = parts[0];
            int tId = Integer.parseInt(parts[1]);
            if (!currentSectionFiles[currentFileId - 1].equals("wsj_" + fId + ".mrg"))
                throw new IllegalStateException(currentSectionFiles[currentFileId - 1] + " does not match " + "wsj_" + fId + ".mrg");
            if (tId != this.treeInFile)
                throw new IllegalStateException("Expected tree id: " + tId + ", found: " + (this.treeInFile));
            continue;
        }
        String[] parts = line.split(" +");
        int id = Integer.parseInt(parts[1]);
        String chunkLabel = parts[2];
        String word = parts[4];
        word = SentenceUtils.makeSentencePresentable(word);
        String expectedWord = textAnnotation.getToken(id);
        if (!word.equals(expectedWord))
            throw new IllegalStateException("Expected word: " + expectedWord + ", found " + word);
        if (currentChunkLabel.equals("")) {
            if (chunkLabel.startsWith("B")) {
                start = id;
                currentChunkLabel = chunkLabel;
            } else if (!chunkLabel.startsWith("O")) {
                throw new IllegalStateException("Expected B, found " + chunkLabel);
            }
        } else if (currentChunkLabel.startsWith("B")) {
            if (chunkLabel.startsWith("B")) {
                if (start >= 0)
                    chunkView.addSpanLabel(start, id, currentChunkLabel.replaceAll("B-", ""), 1d);
                else
                    throw new IllegalStateException("Start <0");
                currentChunkLabel = chunkLabel;
                start = id;
            } else if (chunkLabel.startsWith("I-")) {
            } else if (chunkLabel.startsWith("O")) {
                if (start >= 0)
                    chunkView.addSpanLabel(start, id, currentChunkLabel.replaceAll("B-", ""), 1d);
                else
                    throw new IllegalStateException("Start <0");
                start = -1;
                currentChunkLabel = chunkLabel;
            }
        } else if (currentChunkLabel.startsWith("I-")) {
            if (chunkLabel.startsWith("B")) {
                if (start >= 0)
                    chunkView.addSpanLabel(start, id, currentChunkLabel.replaceAll("B-", ""), 1d);
                else
                    throw new IllegalStateException("Start <0");
                currentChunkLabel = chunkLabel;
                start = id;
            } else if (chunkLabel.startsWith("I-")) {
            } else if (chunkLabel.startsWith("O")) {
                if (start >= 0)
                    chunkView.addSpanLabel(start, id, currentChunkLabel.replaceAll("B-", ""), 1d);
                else
                    throw new IllegalStateException("Start <0");
                currentChunkLabel = chunkLabel;
                start = -1;
            }
        } else if (currentChunkLabel.startsWith("O")) {
            if (chunkLabel.startsWith("B")) {
                currentChunkLabel = chunkLabel;
                start = id;
            } else if (chunkLabel.startsWith("I-")) {
                throw new IllegalStateException("Expected B, found " + chunkLabel);
            } else if (chunkLabel.startsWith("O")) {
                currentChunkLabel = chunkLabel;
            }
        }
    }
    // end of while
    textAnnotation.addView(ViewNames.SHALLOW_PARSE, chunkView);
    return textAnnotation;
}
Also used : SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)

Example 3 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class BrownClusterViewGenerator method addView.

@Override
public void addView(TextAnnotation ta) {
    lazyLoadClusters();
    SpanLabelView view = new SpanLabelView(getViewName(), "BrownClusters", ta, 1.0, true);
    Map<String, List<IntPair>> m = getMatchingSpans(ta);
    for (Entry<String, List<IntPair>> entry : m.entrySet()) {
        String label = entry.getKey();
        Set<IntPair> added = new LinkedHashSet<>();
        for (IntPair p : entry.getValue()) {
            // don't add nested constituents of the same type
            boolean foundContainer = false;
            for (IntPair p1 : added) {
                if (p1 == p)
                    continue;
                if (p1.getFirst() <= p.getFirst() && p1.getSecond() >= p.getSecond()) {
                    foundContainer = true;
                    break;
                }
            }
            if (!foundContainer) {
                view.addSpanLabel(p.getFirst(), p.getSecond(), label, 1.0);
                added.add(p);
            }
        }
    }
    ta.addView(getViewName(), view);
}
Also used : SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair)

Example 4 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class CurrencyIndicator method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    try {
        if (!loaded)
            synchronized (this) {
                // now its changed to be loaded from datastore.
                if (!loaded)
                    loadCurrency(gzip, true);
            }
    } catch (Exception ex) {
        throw new EdisonException(ex);
    }
    TextAnnotation ta = c.getTextAnnotation();
    if (!ta.hasView(VIEW_NAME)) {
        try {
            addCurrencyView(ta);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    SpanLabelView view = (SpanLabelView) ta.getView(VIEW_NAME);
    Set<Feature> features = new LinkedHashSet<>();
    for (Constituent cc : view.where(Queries.containedInConstituent(c))) {
        if (cc.getEndSpan() == c.getEndSpan()) {
            if (cc.getStartSpan() - 1 > c.getEndSpan()) {
                // check if this is a number
                if (WordLists.NUMBERS.contains(ta.getToken(cc.getStartSpan() - 1).toLowerCase())) {
                    features.add(CURRENCY);
                    break;
                }
            }
        } else if (WordFeatureExtractorFactory.numberNormalizer.getWordFeatures(ta, cc.getEndSpan()).size() > 0) {
            features.add(CURRENCY);
            break;
        }
    }
    return features;
}
Also used : EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) URISyntaxException(java.net.URISyntaxException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 5 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class SimpleGazetteerAnnotatorTest method testMultiThreading.

/**
     * Test method for
     * {@link edu.illinois.cs.cogcomp.edison.annotators.SimpleGazetteerAnnotator#addView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)}
     * .
     * 
     * @throws URISyntaxException
     * @throws IOException
     * @throws AnnotatorException
     */
@Test
public void testMultiThreading() throws IOException, URISyntaxException, AnnotatorException {
    final SimpleGazetteerAnnotator sga = new SimpleGazetteerAnnotator(defaultRm);
    class TestThread extends Thread {

        Throwable throwable;

        public void run() {
            long start = System.currentTimeMillis();
            while (true) {
                final TextAnnotation ta = tab.createTextAnnotation("I hail from the university of illinois at champaign urbana.");
                try {
                    sga.addView(ta);
                } catch (AnnotatorException e) {
                    throwable = e;
                    return;
                }
                SpanLabelView view = (SpanLabelView) ta.getView(ViewNames.TREE_GAZETTEER);
                List<Constituent> entities = view.getConstituents();
                Constituent c1 = entities.get(0);
                try {
                    assertEquals(c1.toString(), "university of illinois");
                    Constituent c2 = entities.get(1);
                    assertEquals(c2.toString(), "university of illinois at champaign urbana");
                    Constituent c3 = entities.get(2);
                    assertEquals(c3.toString(), "illinois");
                    Constituent c4 = entities.get(3);
                    assertEquals(c4.toString(), "champaign");
                    Constituent c5 = entities.get(4);
                    assertEquals(c5.toString(), "urbana");
                    assertEquals(c1.getLabel(), "organizations(IC)");
                    assertEquals(c2.getLabel(), "organizations(IC)");
                    assertEquals(c3.getLabel(), "places(IC)");
                    assertEquals(c4.getLabel(), "places(IC)");
                    assertEquals(c5.getLabel(), "places(IC)");
                    if ((System.currentTimeMillis() - start) > 10000l) {
                        // run for one minute.
                        throwable = null;
                        return;
                    }
                } catch (AssertionError ae) {
                    throwable = ae;
                    ae.printStackTrace();
                    return;
                }
            }
        }
    }
    final int numthreads = 20;
    TestThread[] threads = new TestThread[numthreads];
    for (int i = 0; i < numthreads; i++) {
        threads[i] = new TestThread();
        threads[i].start();
    }
    logger.info("Begin multithreaded test.");
    for (int i = 0; i < numthreads; i++) {
        while (true) try {
            threads[i].join();
            assertEquals("Exception during multithreading test : " + threads[i].throwable, threads[i].throwable, null);
            break;
        } catch (InterruptedException e) {
            continue;
        }
    }
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Aggregations

SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)24 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)12 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)12 ArrayList (java.util.ArrayList)5 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)4 Test (org.junit.Test)3 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)2 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)2 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)2 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)2 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)2 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)2 Annotation (edu.stanford.nlp.pipeline.Annotation)2 CoreMap (edu.stanford.nlp.util.CoreMap)2 LinkedHashSet (java.util.LinkedHashSet)2 Properties (java.util.Properties)2 Comma (edu.illinois.cs.cogcomp.comma.datastructures.Comma)1 CommaSRLSentence (edu.illinois.cs.cogcomp.comma.datastructures.CommaSRLSentence)1 Option (edu.illinois.cs.cogcomp.core.datastructures.Option)1