use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class QuantitiesDataReader method addGoldView.
protected void addGoldView(TextAnnotation ta, List<String> labels) {
TokenLabelView posView = new TokenLabelView(viewName, ta);
List<Constituent> constituents = ta.getView(ViewNames.TOKENS).getConstituents();
for (int i = 0; i < constituents.size(); ++i) {
Constituent constituent = (Constituent) constituents.get(i);
posView.addTokenLabel(constituent.getStartSpan(), labels.get(i), 1.0D);
}
ta.addView(viewName, posView);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class LBJavaFeatureExtractor method classify.
@Override
public FeatureVector classify(Object o) {
// Make sure the object is a Constituent
if (!(o instanceof Constituent))
throw new IllegalArgumentException("Instance must be of type Constituent");
Constituent instance = (Constituent) o;
FeatureVector featureVector;
try {
featureVector = FeatureUtilities.getLBJFeatures(getFeatures(instance));
} catch (EdisonException e) {
throw new RuntimeException(e);
}
return featureVector;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class LBJavaFeatureExtractor method classify.
@Override
public FeatureVector classify(Object o) {
// Make sure the object is a Constituent
if (!(o instanceof Constituent))
throw new IllegalArgumentException("Instance must be of type Constituent");
Constituent instance = (Constituent) o;
FeatureVector featureVector = new FeatureVector();
try {
featureVector = FeatureUtilities.getLBJFeatures(getFeatures(instance));
} catch (Exception e) {
logger.debug("Couldn't generate feature {} for constituent {}", getName(), instance);
}
return featureVector;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class WordBigrams method getFeatures.
@Override
public Set<Feature> getFeatures(Constituent instance) throws EdisonException {
Set<Feature> features = new LinkedHashSet<Feature>();
View tokens = instance.getTextAnnotation().getView(ViewNames.TOKENS);
List<Constituent> list = tokens.getConstituentsCoveringSpan(instance.getStartSpan(), instance.getEndSpan());
Collections.sort(list, TextAnnotationUtilities.constituentStartComparator);
ITransformer<Constituent, String> surfaceFormTransformer = new ITransformer<Constituent, String>() {
private static final long serialVersionUID = 1L;
public String transform(Constituent input) {
return input.getSurfaceForm();
}
};
features.addAll(FeatureNGramUtility.getNgramsOrdered(list, 1, surfaceFormTransformer));
features.addAll(FeatureNGramUtility.getNgramsOrdered(list, 2, surfaceFormTransformer));
return features;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class HashCollisionReport method main.
/**
* Read each test file in the directory, tokenize and create the token view. Then check for
* collisions.
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
if (args.length == 0)
error("Must pass in the name of a directory with files to test against.");
File dir = new File(args[0]);
if (!dir.exists()) {
error("The directory did not exist : " + dir);
}
if (!dir.isDirectory()) {
error("The path was not a directory : " + dir);
}
File[] files = dir.listFiles();
for (File file : files) {
if (file.isFile()) {
String normal = FileUtils.readFileToString(file);
TextAnnotationBuilder tabldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation taNormal = tabldr.createTextAnnotation("test", "normal", normal);
List<Constituent> normalToks = taNormal.getView(ViewNames.TOKENS).getConstituents();
HashMap<Integer, Constituent> hashmap = new HashMap<>();
// is already used, if it is report it.
for (Constituent c : normalToks) {
int code = c.hashCode();
if (hashmap.containsKey(code)) {
Constituent dup = hashmap.get(code);
System.err.println(c + " == " + dup);
} else {
hashmap.put(code, c);
}
}
}
}
}
Aggregations