Search in sources :

Example 41 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class MainServer method annotateText.

private static String annotateText(AnnotatorService finalPipeline, String text, String views, Logger logger) throws AnnotatorException {
    if (views == null || text == null) {
        return "The parameters 'text' and/or 'views' are not specified. Here is a sample input:  \n ?text=\"This is a sample sentence. I'm happy.\"&views=POS,NER";
    } else {
        logger.info("------------------------------");
        logger.info("Text: " + text);
        logger.info("Views to add: " + views);
        String[] viewsInArray = views.split(",");
        logger.info("Adding the basic annotations . . . ");
        TextAnnotation ta = finalPipeline.createBasicTextAnnotation("", "", text);
        for (String vuName : viewsInArray) {
            logger.info("Adding the view: ->" + vuName.trim() + "<-");
            try {
                finalPipeline.addView(ta, vuName.trim());
            } catch (Exception e) {
                e.printStackTrace();
            }
            printMemoryDetails(logger);
        }
        logger.info("Done adding the views. Deserializing the view now.");
        String output = SerializationHelper.serializeToJson(ta);
        logger.info("Done. Sending the result back. ");
        return output;
    }
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) IOException(java.io.IOException) HelpScreenException(net.sourceforge.argparse4j.internal.HelpScreenException) ArgumentParserException(net.sourceforge.argparse4j.inf.ArgumentParserException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException)

Example 42 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class SentencePipelineTest method testFailingPosFile.

@Test
public void testFailingPosFile() {
    String text = null;
    try {
        text = LineIO.slurp(POS_FILE);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    TextAnnotation ta = null;
    try {
        ta = sentenceProcessor.createAnnotatedTextAnnotation("testPos", "tesPos", text);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    Constituent s = ta.getView(ViewNames.SENTENCE).getConstituents().get(3);
    List<Constituent> posConstituentsInThirdSent = ta.getView(ViewNames.POS).getConstituentsOverlappingCharSpan(s.getStartCharOffset(), s.getEndCharOffset());
    List<Constituent> toksInThirdSent = ta.getView(ViewNames.TOKENS).getConstituentsOverlappingCharSpan(s.getStartCharOffset(), s.getEndCharOffset());
    assertTrue(posConstituentsInThirdSent.size() > 0);
    assertEquals(toksInThirdSent.size(), posConstituentsInThirdSent.size());
}
Also used : FileNotFoundException(java.io.FileNotFoundException) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Example 43 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class NERAnnotatorTest method testTokenization.

/**
 * test tokenization produces the correct number of constinuents.
 */
@Test
public void testTokenization() {
    TextAnnotation ta = tab.createTextAnnotation(TOKEN_TEST);
    View nerView = null;
    try {
        nerView = getView(ta);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertEquals(nerView.getConstituents().size(), 2);
    String tokTestB = "Grigory Pasko, crusading Russian journalist who documented Russian Navy mishandling of " + "nuclear waste, is released on parole after serving two-thirds of his four-year prison sentence.";
    ta = tab.createTextAnnotation(tokTestB);
    try {
        nerView = getView(ta);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    assertEquals(3, nerView.getNumberOfConstituents());
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Test(org.junit.Test)

Example 44 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class NERAnnotatorTest method evaluatePerformance.

/**
 * Make sure it runs in reasonable time. We will test the performance of the machine we run on
 * to get a better measure.
 */
// @Test
public void evaluatePerformance() {
    // now do performance.
    final int SIZE = 100;
    // make sure any lazy loading is done outside the performance test.
    TextAnnotation tat = tab.createTextAnnotation(TEST_INPUT);
    try {
        getView(tat);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    long expectedPerformance = this.measureMachinePerformance();
    logger.info("Expect " + expectedPerformance);
    {
        TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
        View view = null;
        try {
            view = getView(ta);
        } catch (AnnotatorException e) {
            e.printStackTrace();
            fail(e.getMessage());
        }
        assertTrue(view != null);
    }
    // start the performance test.
    long start = System.currentTimeMillis();
    for (int i = 0; i < SIZE; i++) {
        TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
        View view = null;
        try {
            view = getView(ta);
        } catch (AnnotatorException e) {
            e.printStackTrace();
            fail(e.getMessage());
        }
        assertTrue(view != null);
        for (Constituent c : view.getConstituents()) {
            assertTrue("No entity named \"" + c.toString() + "\"", entities.contains(c.toString()));
        }
    }
    start = System.currentTimeMillis() - start;
    start /= SIZE;
    System.out.printf("For text size = %d, average NER runtime = %d, normalized = %f", TEST_INPUT.length(), start, (double) start / (double) expectedPerformance);
    assertTrue(start <= expectedPerformance);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 45 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class ServerClientAnnotator method annotate.

/**
 * The method is synchronized since the caching seems to have issues upon mult-threaded caching
 * @param overwrite if true, it would overwrite the values on cache
 */
public synchronized TextAnnotation annotate(String str, boolean overwrite) throws Exception {
    String viewsConnected = Arrays.toString(viewsToAdd);
    String views = viewsConnected.substring(1, viewsConnected.length() - 1).replace(" ", "");
    ConcurrentMap<String, byte[]> concurrentMap = (db != null) ? db.hashMap(viewName, Serializer.STRING, Serializer.BYTE_ARRAY).createOrOpen() : null;
    String key = DigestUtils.sha1Hex(str + views);
    if (!overwrite && concurrentMap != null && concurrentMap.containsKey(key)) {
        byte[] taByte = concurrentMap.get(key);
        return SerializationHelper.deserializeTextAnnotationFromBytes(taByte);
    } else {
        URL obj = new URL(url + ":" + port + "/annotate");
        HttpURLConnection con = (HttpURLConnection) obj.openConnection();
        con.setRequestMethod("POST");
        con.setRequestProperty("charset", "utf-8");
        con.setRequestProperty("Content-Type", "text/plain; charset=utf-8");
        con.setDoOutput(true);
        con.setUseCaches(false);
        OutputStreamWriter wr = new OutputStreamWriter(con.getOutputStream());
        wr.write("text=" + URLEncoder.encode(str, "UTF-8") + "&views=" + views);
        wr.flush();
        InputStreamReader reader = new InputStreamReader(con.getInputStream());
        BufferedReader in = new BufferedReader(reader);
        String inputLine;
        StringBuilder response = new StringBuilder();
        while ((inputLine = in.readLine()) != null) {
            response.append(inputLine);
        }
        in.close();
        reader.close();
        wr.close();
        con.disconnect();
        TextAnnotation ta = SerializationHelper.deserializeFromJson(response.toString());
        if (concurrentMap != null) {
            concurrentMap.put(key, SerializationHelper.serializeTextAnnotationToBytes(ta));
            this.db.commit();
        }
        return ta;
    }
}
Also used : HttpURLConnection(java.net.HttpURLConnection) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) URL(java.net.URL)

Aggregations

TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)292 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)121 Test (org.junit.Test)84 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)60 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)48 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)40 ArrayList (java.util.ArrayList)33 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)32 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)28 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)27 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)24 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)22 IOException (java.io.IOException)22 LinkedHashSet (java.util.LinkedHashSet)21 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)20 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)19 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)18 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)18 File (java.io.File)18 XmlTextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation)16