use of edu.illinois.cs.cogcomp.core.transformers.ITransformer in project cogcomp-nlp by CogComp.
the class IllinoisLemmatizer method readFromClasspath.
public static List<String> readFromClasspath(String filename) {
List<String> lines = null;
try {
InputStream resource = IOUtils.lsResources(IllinoisLemmatizer.class, filename).get(0).openStream();
lines = LineIO.read(resource, Charset.defaultCharset().name(), new ITransformer<String, String>() {
public String transform(String line) {
return line;
}
});
} catch (IOException | URISyntaxException e) {
System.err.println("Error while trying to read " + filename + ".");
System.exit(-1);
}
return lines;
}
use of edu.illinois.cs.cogcomp.core.transformers.ITransformer in project cogcomp-nlp by CogComp.
the class WordBigrams method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent instance) throws EdisonException {
Set<Feature> features = new LinkedHashSet<Feature>();
View tokens = instance.getTextAnnotation().getView(ViewNames.TOKENS);
List<Constituent> list = tokens.getConstituentsCoveringSpan(instance.getStartSpan(), instance.getEndSpan());
Collections.sort(list, TextAnnotationUtilities.constituentStartComparator);
ITransformer<Constituent, String> surfaceFormTransformer = new ITransformer<Constituent, String>() {
private static final long serialVersionUID = 1L;
public String transform(Constituent input) {
return input.getSurfaceForm();
}
};
features.addAll(FeatureNGramUtility.getNgramsOrdered(list, 1, surfaceFormTransformer));
features.addAll(FeatureNGramUtility.getNgramsOrdered(list, 2, surfaceFormTransformer));
return features;
}
use of edu.illinois.cs.cogcomp.core.transformers.ITransformer in project cogcomp-nlp by CogComp.
the class ParseUtils method getTokenIndexedParseTreeNodeCovering.
public static Tree<Pair<String, IntPair>> getTokenIndexedParseTreeNodeCovering(String parseViewName, Constituent c) {
// / UGLY CODE ALERT!!!
TextAnnotation ta = c.getTextAnnotation();
int sentenceId = ta.getSentenceId(c);
Tree<String> tree = getParseTree(parseViewName, ta, sentenceId);
final int sentenceStartSpan = ta.getSentence(sentenceId).getStartSpan();
int start = c.getStartSpan() - sentenceStartSpan;
int end = c.getEndSpan() - sentenceStartSpan;
// Find the tree that covers the start and end tokens. However, start
// and end have been shifted relative to the start of the sentence. So
// we need to shift it back, which is why we have that UGLY as sin
// mapper at the end.
Tree<Pair<String, IntPair>> toknTree = getTokenIndexedTreeCovering(tree, start, end);
ITransformer<Tree<Pair<String, IntPair>>, Pair<String, IntPair>> transformer = new ITransformer<Tree<Pair<String, IntPair>>, Pair<String, IntPair>>() {
@Override
public Pair<String, IntPair> transform(Tree<Pair<String, IntPair>> input) {
Pair<String, IntPair> label = input.getLabel();
IntPair newSpan = new IntPair(label.getSecond().getFirst() + sentenceStartSpan, label.getSecond().getSecond() + sentenceStartSpan);
return new Pair<>(label.getFirst(), newSpan);
}
};
return Mappers.mapTree(toknTree, transformer);
}
use of edu.illinois.cs.cogcomp.core.transformers.ITransformer in project cogcomp-nlp by CogComp.
the class WordBigrams method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent instance) throws EdisonException {
Set<Feature> features = new LinkedHashSet<>();
View tokens = instance.getTextAnnotation().getView(ViewNames.TOKENS);
List<Constituent> list = tokens.getConstituentsCoveringSpan(instance.getStartSpan(), instance.getEndSpan());
list.sort(TextAnnotationUtilities.constituentStartComparator);
ITransformer<Constituent, String> surfaceFormTransformer = new ITransformer<Constituent, String>() {
public String transform(Constituent input) {
return input.getSurfaceForm();
}
};
features.addAll(FeatureNGramUtility.getNgramsOrdered(list, 1, surfaceFormTransformer));
features.addAll(FeatureNGramUtility.getNgramsOrdered(list, 2, surfaceFormTransformer));
return features;
}
use of edu.illinois.cs.cogcomp.core.transformers.ITransformer in project cogcomp-nlp by CogComp.
the class ParseHelper method getTokenIndexedParseTreeNodeCovering.
public static Tree<Pair<String, IntPair>> getTokenIndexedParseTreeNodeCovering(String parseViewName, Constituent c) {
// / UGLY CODE ALERT!!!
TextAnnotation ta = c.getTextAnnotation();
int sentenceId = ta.getSentenceId(c);
Tree<String> tree = getParseTree(parseViewName, ta, sentenceId);
final int sentenceStartSpan = ta.getSentence(sentenceId).getStartSpan();
int start = c.getStartSpan() - sentenceStartSpan;
int end = c.getEndSpan() - sentenceStartSpan;
// Find the tree that covers the start and end tokens. However, start
// and end have been shifted relative to the start of the sentence. So
// we need to shift it back, which is why we have that UGLY as sin
// mapper at the end.
Tree<Pair<String, IntPair>> toknTree = getTokenIndexedTreeCovering(tree, start, end);
ITransformer<Tree<Pair<String, IntPair>>, Pair<String, IntPair>> transformer = new ITransformer<Tree<Pair<String, IntPair>>, Pair<String, IntPair>>() {
@Override
public Pair<String, IntPair> transform(Tree<Pair<String, IntPair>> input) {
Pair<String, IntPair> label = input.getLabel();
IntPair newSpan = new IntPair(label.getSecond().getFirst() + sentenceStartSpan, label.getSecond().getSecond() + sentenceStartSpan);
return new Pair<>(label.getFirst(), newSpan);
}
};
return Mappers.mapTree(toknTree, transformer);
}
Aggregations