use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class SenseExampleGenerator method getExamples.
public Pair<SentenceInstance, SentenceStructure> getExamples(TextAnnotation ta) throws Exception {
List<SenseInstance> predicates = new ArrayList<>();
List<SenseStructure> structures = new ArrayList<>();
if (ta.hasView(SenseManager.getGoldViewName()))
getTreebankExamples(ta, predicates, structures);
else
getExamples(ta, predicates);
SentenceInstance sx = new SentenceInstance(predicates);
SentenceStructure sy = new SentenceStructure(sx, structures);
return new Pair<>(sx, sy);
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class VerbSensePropbankReader method addAnnotation.
private void addAnnotation(TextAnnotation ta) {
String goldViewName = SenseManager.getGoldViewName();
Tree<String> tree = ParseHelper.getParseTree(ViewNames.PARSE_GOLD, ta, 0);
Tree<Pair<String, IntPair>> spanLabeledTree = ParseUtils.getSpanLabeledTree(tree);
List<Tree<Pair<String, IntPair>>> yield = spanLabeledTree.getYield();
TokenLabelView view = new TokenLabelView(goldViewName, "AnnotatedTreebank", ta, 1.0);
Set<Integer> predicates = new HashSet<>();
for (PropbankFields fields : goldFields.get(ta.getId())) {
int start = fields.getPredicateStart(yield);
if (predicates.contains(start))
continue;
predicates.add(start);
view.addTokenLabel(start, fields.getSense(), 1.0);
try {
view.addTokenAttribute(start, LemmaIdentifier, fields.getLemma());
} catch (Exception e) {
// XXX Maybe log the exception?
e.printStackTrace();
}
}
if (view.getConstituents().size() > 0)
ta.addView(goldViewName, view);
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class MulticlassInference method getLossAugmentedBestStructure.
@Override
public Pair<IStructure, Double> getLossAugmentedBestStructure(WeightVector weight, IInstance ins, IStructure goldStructure) throws Exception {
SenseInstance x = (SenseInstance) ins;
SenseStructure yGold = null;
if (goldStructure != null)
yGold = (SenseStructure) goldStructure;
int numLabels = manager.getNumLabels();
assert numLabels > 0;
double max = Double.NEGATIVE_INFINITY;
SenseStructure best = null;
double loss = 0;
for (int label = 0; label < numLabels; label++) {
if (!manager.isValidLabel(x, label))
continue;
SenseStructure y = new SenseStructure(x, label, manager);
double score = weight.dotProduct(y.getFeatureVector());
double l = 0;
if (goldStructure != null) {
if (yGold.getLabel() != label)
l++;
}
if (score + l > max + loss) {
max = score;
loss = l;
best = y;
}
}
if (best == null) {
System.out.println(ins);
System.out.println(manager.getLegalSenses(x.getPredicateLemma()));
}
return new Pair<IStructure, Double>(best, loss);
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class IllinoisTokenizer method tokenizeSentence.
/**
* given a sentence, return a set of tokens and their character offsets
*
* @param sentence the plain text sentence to tokenize
* @return an ordered list of tokens from the sentence, and an ordered list of their start and
* end character offsets (one-past-the-end indexing)
*/
@Override
public Pair<String[], IntPair[]> tokenizeSentence(String sentence) {
Sentence lbjSentence = new Sentence(sentence);
LinkedVector wordSplit = lbjSentence.wordSplit();
String[] output = new String[wordSplit.size()];
IntPair[] offsets = new IntPair[wordSplit.size()];
for (int i = 0; i < output.length; i++) {
LinkedChild linkedChild = wordSplit.get(i);
output[i] = linkedChild.toString();
offsets[i] = new IntPair(linkedChild.start, linkedChild.end + 1);
}
return new Pair<>(output, offsets);
}
use of edu.illinois.cs.cogcomp.core.datastructures.Pair in project cogcomp-nlp by CogComp.
the class VerbVoiceIndicator method getWordFeatures.
@Override
public Set<Feature> getWordFeatures(TextAnnotation ta, int wordPosition) throws EdisonException {
Sentence sentence = ta.getSentenceFromToken(wordPosition);
int sentenceStart = sentence.getStartSpan();
int predicatePosition = wordPosition - sentenceStart;
Tree<String> tree = ParseHelper.getParseTree(parseViewName, sentence);
Tree<Pair<String, IntPair>> spanLabeledTree = ParseUtils.getSpanLabeledTree(tree);
Tree<Pair<String, IntPair>> currentNode = spanLabeledTree.getYield().get(predicatePosition).getParent();
String f = getVoice(currentNode);
return new LinkedHashSet<Feature>(Collections.singletonList(DiscreteFeature.create(f)));
}
Aggregations