use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class PrepSRLDataReader method makeNewTextAnnotation.
private TextAnnotation makeNewTextAnnotation(Element item) {
String id = item.getAttribute("id");
NodeList nl = item.getElementsByTagName("context");
NodeList children = nl.item(0).getChildNodes();
String rawSentenceString = nl.item(0).getTextContent().replaceAll("[\\t\\n]", "").trim();
String preposition = "";
int prepositionPosition = -1;
for (int i = 0; i < children.getLength(); i++) {
Node currentNode = children.item(i);
if (currentNode.getNodeName().equals("head")) {
preposition = currentNode.getTextContent().toLowerCase();
int previousLength = 0;
if (i > 0)
previousLength = tokenize(children.item(i - 1).getTextContent()).size();
prepositionPosition = previousLength;
}
}
String label;
if (corpusName.equals("test")) {
if (keys.containsKey(id))
label = keys.get(id);
else
return null;
} else {
label = ((Element) (item.getElementsByTagName("answer").item(0))).getAttribute("senseid");
}
// Take only the first label for the 500 or so instances which are given multiple labels.
if (label.contains(" "))
label = label.substring(0, label.indexOf(" ")).trim();
if (label.length() == 0) {
log.info("No label for id {}, ignoring sentence", id);
return null;
}
rawSentenceString = rawSentenceString.replaceAll("`", "``");
rawSentenceString = rawSentenceString.replaceAll("\"", "''");
// XXX Assume text is pre-tokenized
String[] tokens = rawSentenceString.split("\\s+");
TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens("Semeval2007Prepositions", id, Collections.singletonList(tokens));
if (!ta.getTokens()[prepositionPosition].toLowerCase().equals(preposition)) {
assert false;
}
TokenLabelView prepositionLabelView = new TokenLabelView(viewName, ta);
String role = senseToRole.get(preposition + ":" + label);
prepositionLabelView.addTokenLabel(prepositionPosition, role, 1.0);
ta.addView(viewName, prepositionLabelView);
return ta;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class VerbSenseLabeler method main.
public static void main(String[] arguments) throws Exception {
if (arguments.length < 1) {
System.err.println("Usage: <config-file>");
System.exit(-1);
}
String configFile = arguments[0];
String input;
VerbSenseLabeler labeler = new VerbSenseLabeler();
System.out.print("Enter text (underscore to quit): ");
input = System.console().readLine().trim();
if (input.equals("_"))
return;
do {
if (!input.isEmpty()) {
TextAnnotation ta = TextPreProcessor.getInstance().preProcessText(input);
TokenLabelView p = labeler.getPrediction(ta);
System.out.println(p);
System.out.println();
}
System.out.print("Enter text (underscore to quit): ");
input = System.console().readLine().trim();
} while (!input.equals("_"));
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class QuantitiesDataReader method addGoldView.
protected void addGoldView(TextAnnotation ta, List<String> labels) {
TokenLabelView posView = new TokenLabelView(viewName, ta);
List<Constituent> constituents = ta.getView(ViewNames.TOKENS).getConstituents();
for (int i = 0; i < constituents.size(); ++i) {
Constituent constituent = (Constituent) constituents.get(i);
posView.addTokenLabel(constituent.getStartSpan(), labels.get(i), 1.0D);
}
ta.addView(viewName, posView);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class POSWindow method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "POSWindow";
TextAnnotation ta = c.getTextAnnotation();
int lenOfTokens = ta.getTokens().length;
int start = c.getStartSpan();
int end = c.getEndSpan();
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
int curr = i, last = i;
// for (; last <= i+2 && last < lenOfTokens; ++last)
for (int j = 0; j < 2 && curr > 0; j++) curr -= 1;
for (int j = 0; j < 2 && last < lenOfTokens - 1; j++) last += 1;
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
for (; curr <= last; curr++) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(curr);
String tag = POSView.getLabel(curr);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
for (; curr <= last; curr++) {
String form = ta.getToken(curr);
String tag = counter.tag(curr, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
} else {
classifier = prefix + "_" + "MikheevPOS";
for (; curr <= last; curr++) {
String form = ta.getToken(curr);
String tag = counter.tag(curr, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
}
}
}
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView in project cogcomp-nlp by CogComp.
the class LabelOneAfter method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
String classifier;
String prefix = "LabelOneAfter";
TextAnnotation ta = c.getTextAnnotation();
int lenOfTokens = ta.getTokens().length;
int start = c.getStartSpan() + 1;
int end = c.getEndSpan() + 1;
Set<Feature> features = new LinkedHashSet<>();
for (int i = start; i < end; i++) {
if (!isPOSFromCounting) {
classifier = prefix + "_" + "POS";
if (i < lenOfTokens) {
TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
String form = ta.getToken(i);
String tag = POSView.getLabel(i);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else if (isBaseLineCounting) {
classifier = prefix + "_" + "BaselinePOS";
if (i < lenOfTokens) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
} else {
classifier = prefix + "_" + "MikheevPOS";
if (i < lenOfTokens) {
String form = ta.getToken(i);
String tag = counter.tag(i, ta);
features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
} else
features.add(new DiscreteFeature(classifier + ":" + ""));
}
}
return features;
}
Aggregations