use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class MainServer method annotateText.
private static String annotateText(AnnotatorService finalPipeline, String text, String views, Logger logger) throws AnnotatorException {
if (views == null || text == null) {
return "The parameters 'text' and/or 'views' are not specified. Here is a sample input: \n ?text=\"This is a sample sentence. I'm happy.\"&views=POS,NER";
} else {
logger.info("------------------------------");
logger.info("Text: " + text);
logger.info("Views to add: " + views);
String[] viewsInArray = views.split(",");
logger.info("Adding the basic annotations . . . ");
TextAnnotation ta = finalPipeline.createBasicTextAnnotation("", "", text);
for (String vuName : viewsInArray) {
logger.info("Adding the view: ->" + vuName.trim() + "<-");
try {
finalPipeline.addView(ta, vuName.trim());
} catch (Exception e) {
e.printStackTrace();
}
printMemoryDetails(logger);
}
logger.info("Done adding the views. Deserializing the view now.");
String output = SerializationHelper.serializeToJson(ta);
logger.info("Done. Sending the result back. ");
return output;
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class SentencePipelineTest method testFailingPosFile.
@Test
public void testFailingPosFile() {
String text = null;
try {
text = LineIO.slurp(POS_FILE);
} catch (FileNotFoundException e) {
e.printStackTrace();
fail(e.getMessage());
}
TextAnnotation ta = null;
try {
ta = sentenceProcessor.createAnnotatedTextAnnotation("testPos", "tesPos", text);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
Constituent s = ta.getView(ViewNames.SENTENCE).getConstituents().get(3);
List<Constituent> posConstituentsInThirdSent = ta.getView(ViewNames.POS).getConstituentsOverlappingCharSpan(s.getStartCharOffset(), s.getEndCharOffset());
List<Constituent> toksInThirdSent = ta.getView(ViewNames.TOKENS).getConstituentsOverlappingCharSpan(s.getStartCharOffset(), s.getEndCharOffset());
assertTrue(posConstituentsInThirdSent.size() > 0);
assertEquals(toksInThirdSent.size(), posConstituentsInThirdSent.size());
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class NERAnnotatorTest method testTokenization.
/**
* test tokenization produces the correct number of constinuents.
*/
@Test
public void testTokenization() {
TextAnnotation ta = tab.createTextAnnotation(TOKEN_TEST);
View nerView = null;
try {
nerView = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertEquals(nerView.getConstituents().size(), 2);
String tokTestB = "Grigory Pasko, crusading Russian journalist who documented Russian Navy mishandling of " + "nuclear waste, is released on parole after serving two-thirds of his four-year prison sentence.";
ta = tab.createTextAnnotation(tokTestB);
try {
nerView = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertEquals(3, nerView.getNumberOfConstituents());
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class NERAnnotatorTest method evaluatePerformance.
/**
* Make sure it runs in reasonable time. We will test the performance of the machine we run on
* to get a better measure.
*/
// @Test
public void evaluatePerformance() {
// now do performance.
final int SIZE = 100;
// make sure any lazy loading is done outside the performance test.
TextAnnotation tat = tab.createTextAnnotation(TEST_INPUT);
try {
getView(tat);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
long expectedPerformance = this.measureMachinePerformance();
logger.info("Expect " + expectedPerformance);
{
TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
View view = null;
try {
view = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(view != null);
}
// start the performance test.
long start = System.currentTimeMillis();
for (int i = 0; i < SIZE; i++) {
TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
View view = null;
try {
view = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(view != null);
for (Constituent c : view.getConstituents()) {
assertTrue("No entity named \"" + c.toString() + "\"", entities.contains(c.toString()));
}
}
start = System.currentTimeMillis() - start;
start /= SIZE;
System.out.printf("For text size = %d, average NER runtime = %d, normalized = %f", TEST_INPUT.length(), start, (double) start / (double) expectedPerformance);
assertTrue(start <= expectedPerformance);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ServerClientAnnotator method annotate.
/**
* The method is synchronized since the caching seems to have issues upon mult-threaded caching
* @param overwrite if true, it would overwrite the values on cache
*/
public synchronized TextAnnotation annotate(String str, boolean overwrite) throws Exception {
String viewsConnected = Arrays.toString(viewsToAdd);
String views = viewsConnected.substring(1, viewsConnected.length() - 1).replace(" ", "");
ConcurrentMap<String, byte[]> concurrentMap = (db != null) ? db.hashMap(viewName, Serializer.STRING, Serializer.BYTE_ARRAY).createOrOpen() : null;
String key = DigestUtils.sha1Hex(str + views);
if (!overwrite && concurrentMap != null && concurrentMap.containsKey(key)) {
byte[] taByte = concurrentMap.get(key);
return SerializationHelper.deserializeTextAnnotationFromBytes(taByte);
} else {
URL obj = new URL(url + ":" + port + "/annotate");
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
con.setRequestMethod("POST");
con.setRequestProperty("charset", "utf-8");
con.setRequestProperty("Content-Type", "text/plain; charset=utf-8");
con.setDoOutput(true);
con.setUseCaches(false);
OutputStreamWriter wr = new OutputStreamWriter(con.getOutputStream());
wr.write("text=" + URLEncoder.encode(str, "UTF-8") + "&views=" + views);
wr.flush();
InputStreamReader reader = new InputStreamReader(con.getInputStream());
BufferedReader in = new BufferedReader(reader);
String inputLine;
StringBuilder response = new StringBuilder();
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
}
in.close();
reader.close();
wr.close();
con.disconnect();
TextAnnotation ta = SerializationHelper.deserializeFromJson(response.toString());
if (concurrentMap != null) {
concurrentMap.put(key, SerializationHelper.serializeTextAnnotationToBytes(ta));
this.db.commit();
}
return ta;
}
}
Aggregations