use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ServerClientAnnotator method annotate.
public TextAnnotation annotate(String str) throws Exception {
String viewsConnected = Arrays.toString(viewsToAdd);
String views = viewsConnected.substring(1, viewsConnected.length() - 1).replace(" ", "");
ConcurrentMap<String, byte[]> concurrentMap = (db != null) ? db.hashMap(viewName, Serializer.STRING, Serializer.BYTE_ARRAY).createOrOpen() : null;
String key = DigestUtils.sha1Hex(str + views);
if (concurrentMap != null && concurrentMap.containsKey(key)) {
byte[] taByte = concurrentMap.get(key);
return SerializationHelper.deserializeTextAnnotationFromBytes(taByte);
} else {
URL obj = new URL(url + ":" + port + "/annotate");
HttpURLConnection con = (HttpURLConnection) obj.openConnection();
con.setRequestMethod("POST");
con.setRequestProperty("charset", "utf-8");
con.setRequestProperty("Content-Type", "text/plain; charset=utf-8");
con.setDoOutput(true);
con.setUseCaches(false);
OutputStreamWriter wr = new OutputStreamWriter(con.getOutputStream());
wr.write("text=" + URLEncoder.encode(str, "UTF-8") + "&views=" + views);
wr.flush();
int responseCode = con.getResponseCode();
logger.debug("\nSending '" + con.getRequestMethod() + "' request to URL : " + url);
logger.debug("Response Code : " + responseCode);
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
String inputLine;
StringBuilder response = new StringBuilder();
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
}
in.close();
TextAnnotation ta = SerializationHelper.deserializeFromJson(response.toString());
if (concurrentMap != null) {
concurrentMap.put(key, SerializationHelper.serializeTextAnnotationToBytes(ta));
this.db.commit();
}
return ta;
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class CurrencyIndicator method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
try {
if (!loaded)
synchronized (this) {
// now its changed to be loaded from datastore.
if (!loaded)
loadCurrency(gzip, true);
}
} catch (Exception ex) {
throw new EdisonException(ex);
}
TextAnnotation ta = c.getTextAnnotation();
if (!ta.hasView(VIEW_NAME)) {
try {
addCurrencyView(ta);
} catch (Exception e) {
e.printStackTrace();
}
}
SpanLabelView view = (SpanLabelView) ta.getView(VIEW_NAME);
Set<Feature> features = new LinkedHashSet<>();
for (Constituent cc : view.where(Queries.containedInConstituent(c))) {
if (cc.getEndSpan() == c.getEndSpan()) {
if (cc.getStartSpan() - 1 > c.getEndSpan()) {
// check if this is a number
if (WordLists.NUMBERS.contains(ta.getToken(cc.getStartSpan() - 1).toLowerCase())) {
features.add(CURRENCY);
break;
}
}
} else if (WordFeatureExtractorFactory.numberNormalizer.getWordFeatures(ta, cc.getEndSpan()).size() > 0) {
features.add(CURRENCY);
break;
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ParseHeadWordFeatureExtractor method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
TreeView tree = (TreeView) ta.getView(parseViewName);
Constituent phrase;
try {
phrase = tree.getParsePhrase(c);
} catch (Exception e) {
throw new EdisonException(e);
}
Set<Feature> features = new LinkedHashSet<>();
int head = CollinsHeadFinder.getInstance().getHeadWordPosition(phrase);
Constituent c1 = new Constituent("", "", ta, head, head + 1);
features.addAll(fex.getFeatures(c1));
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ContextFeatureExtractor method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
TextAnnotation ta = c.getTextAnnotation();
int start = c.getStartSpan() - contextSize;
int end = c.getEndSpan() + contextSize;
if (start < 0)
start = 0;
if (end >= ta.size())
end = ta.size();
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (ignoreConstituent)
if (c.getStartSpan() <= i && i < c.getEndSpan())
continue;
for (FeatureExtractor f : this.generators) {
Constituent neighbor = new Constituent("TMP", "TMP", ta, i, i + 1);
Set<Feature> feats = f.getFeatures(neighbor);
for (Feature feat : feats) {
String preamble = "context";
if (specifyIndex) {
String index = "*";
if (i < c.getStartSpan())
index = (i - c.getStartSpan()) + "";
else if (i >= c.getEndSpan())
index = (i - c.getEndSpan() + 1) + "";
preamble += index;
}
preamble += ":";
features.add(feat.prefixWith(preamble + f.getName()));
}
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class BIOTester method statistics.
public static void statistics() {
int ace_nam = 0;
int ace_nom = 0;
int ace_pro = 0;
int ere_nam = 0;
int ere_nom = 0;
int ere_pro = 0;
int tac_nam = 0;
int tac_nom = 0;
try {
ACEReaderWithTrueCaseFixer aceReader = new ACEReaderWithTrueCaseFixer("data/all", false);
for (TextAnnotation ta : aceReader) {
for (Constituent c : ta.getView(ViewNames.MENTION_ACE)) {
if (c.getAttribute("EntityMentionType").equals("NAM")) {
ace_nam++;
}
if (c.getAttribute("EntityMentionType").equals("NOM")) {
ace_nom++;
}
if (c.getAttribute("EntityMentionType").equals("PRO")) {
ace_pro++;
}
}
}
EREMentionRelationReader ereReader = new EREMentionRelationReader(EREDocumentReader.EreCorpus.ENR3, "data/ere/data", false);
for (XmlTextAnnotation xta : ereReader) {
TextAnnotation ta = xta.getTextAnnotation();
for (Constituent c : ta.getView(ViewNames.MENTION_ERE)) {
if (c.getAttribute("EntityMentionType").equals("NAM")) {
ere_nam++;
}
if (c.getAttribute("EntityMentionType").equals("NOM")) {
ere_nom++;
}
if (c.getAttribute("EntityMentionType").equals("PRO")) {
ere_pro++;
}
}
}
ColumnFormatReader columnFormatReader = new ColumnFormatReader("data/tac/2016.nam");
for (TextAnnotation ta : columnFormatReader) {
for (Constituent c : ta.getView("MENTIONS")) {
tac_nam++;
}
}
columnFormatReader = new ColumnFormatReader("data/tac/2016.nom");
for (TextAnnotation ta : columnFormatReader) {
for (Constituent c : ta.getView("MENTIONS")) {
tac_nom++;
}
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("ACE_NAM: " + ace_nam);
System.out.println("ACE_NOM: " + ace_nom);
System.out.println("ACE_PRO: " + ace_pro);
System.out.println("ERE_NAM: " + ere_nam);
System.out.println("ERE_NOM: " + ere_nom);
System.out.println("ERE_PRO: " + ere_pro);
System.out.println("TAC_NAM: " + tac_nam);
System.out.println("TAC_NOM: " + tac_nom);
}
Aggregations