use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class SpanishXMLTreeReader method process.
/**
* Read trees from the given file and output their processed forms to
* standard output.
*/
public static void process(File file, TreeReader tr, Pattern posPattern, Pattern wordPattern, boolean plainPrint) throws IOException {
Tree t;
int numTrees = 0, numTreesRetained = 0;
String canonicalFileName = file.getName().substring(0, file.getName().lastIndexOf('.'));
while ((t = tr.readTree()) != null) {
numTrees++;
if (!shouldPrintTree(t, posPattern, wordPattern))
continue;
numTreesRetained++;
String ftbID = ((CoreLabel) t.label()).get(CoreAnnotations.SentenceIDAnnotation.class);
String output = toString(t, plainPrint);
System.out.printf("%s-%s\t%s%n", canonicalFileName, ftbID, output);
}
System.err.printf("%s: %d trees, %d matched and printed%n", file.getName(), numTrees, numTreesRetained);
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class SpanishXMLTreeReader method readTree.
public Tree readTree() {
Tree t = null;
while (t == null && sentences != null && sentIdx < sentences.getLength()) {
int thisSentenceId = sentIdx++;
Node sentRoot = sentences.item(thisSentenceId);
t = getTreeFromXML(sentRoot);
if (t != null) {
t = treeNormalizer.normalizeWholeTree(t, treeFactory);
if (t.label() instanceof CoreLabel)
((CoreLabel) t.label()).set(CoreAnnotations.SentenceIDAnnotation.class, Integer.toString(thisSentenceId));
}
}
return t;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class NERFeatureFactoryITest method testSloppyGazette.
public void testSloppyGazette() {
List<CoreLabel> sentence = SentenceUtils.toCoreLabelList("For three years , John Bauer has worked at Stanford .".split(" +"));
PaddedList<CoreLabel> paddedSentence = new PaddedList<CoreLabel>(sentence, new CoreLabel());
Properties props = new Properties();
props.setProperty("useGazettes", "true");
props.setProperty("sloppyGazette", "true");
props.setProperty("gazette", "projects/core/data/edu/stanford/nlp/ie/test_gazette.txt");
SeqClassifierFlags flags = new SeqClassifierFlags(props);
NERFeatureFactory<CoreLabel> factory = new NERFeatureFactory<CoreLabel>();
factory.init(flags);
Set<String> features;
features = new HashSet<String>(factory.featuresC(paddedSentence, 4));
checkFeatures(features, "BAR-GAZ", "BAZ-GAZ", "FOO-GAZ", "BAR-GAZ2", "BAZ-GAZ2", "FOO-GAZ1", "John-WORD");
features = new HashSet<String>(factory.featuresC(paddedSentence, 5));
checkFeatures(features, "BAR-GAZ", "BAZ-GAZ", "BAR-GAZ2", "BAZ-GAZ2", "Bauer-WORD");
features = new HashSet<String>(factory.featuresC(paddedSentence, 6));
checkFeatures(features, "has-WORD");
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class CRFClassifierITest method adapt2.
/** adapt2 changes from {@code Pair<List<CoreLabel>, Double>} to {@code Pair<List<String>, Double>} to make printout
* better.
*/
private static List<Pair<List<String>, Double>> adapt2(List<Pair<List<CoreLabel>, Double>> in) {
List<Pair<List<String>, Double>> ret = new ArrayList<>();
for (Pair<List<CoreLabel>, Double> pair : in) {
List<String> strs = new ArrayList<>();
for (CoreLabel c : pair.first()) {
String label = c.getString(CoreAnnotations.AnswerAnnotation.class);
int max = Math.min(3, label.length());
strs.add(label.substring(0, max));
}
ret.add(new Pair<>(strs, pair.second()));
}
return ret;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class SpanishTokenizerAnnotatorITest method testSpanish.
public void testSpanish() {
Annotation ann = new Annotation("Damelo");
Properties props = new Properties();
props.setProperty("annotators", "tokenize");
props.setProperty("tokenize.language", "es");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
pipeline.annotate(ann);
Iterator<String> it = spanishTokens.iterator();
for (CoreLabel word : ann.get(CoreAnnotations.TokensAnnotation.class)) {
assertEquals("Bung token in new CoreLabel usage", it.next(), word.get(CoreAnnotations.TextAnnotation.class));
}
assertFalse("Too few tokens in new CoreLabel usage", it.hasNext());
}
Aggregations