use of edu.stanford.nlp.ling.CoreLabel in project mavuno by metzlerd.
the class NLProcTools method getNETags.
public List<String> getNETags() {
List<String> neTags = new ArrayList<String>();
List<CoreLabel> labels = mNETagger.classifySentence(mSentenceWords);
for (CoreLabel label : labels) {
neTags.add(label.get(AnswerAnnotation.class));
}
return neTags;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class DependencyParser method genDictionaries.
/**
* Scan a corpus and store all words, part-of-speech tags, and
* dependency relation labels observed. Prepare other structures
* which support word / POS / label lookup at train- / run-time.
*/
private void genDictionaries(List<CoreMap> sents, List<DependencyTree> trees) {
// Collect all words (!), etc. in lists, tacking on one sentence
// after the other
List<String> word = new ArrayList<>();
List<String> pos = new ArrayList<>();
List<String> label = new ArrayList<>();
for (CoreMap sentence : sents) {
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
for (CoreLabel token : tokens) {
word.add(token.word());
pos.add(token.tag());
}
}
String rootLabel = null;
for (DependencyTree tree : trees) for (int k = 1; k <= tree.n; ++k) if (tree.getHead(k) == 0)
rootLabel = tree.getLabel(k);
else
label.add(tree.getLabel(k));
// Generate "dictionaries," possibly with frequency cutoff
knownWords = Util.generateDict(word, config.wordCutOff);
knownPos = Util.generateDict(pos);
knownLabels = Util.generateDict(label);
knownLabels.add(0, rootLabel);
// Avoid the case that rootLabel equals to one of the other labels
for (int k = 1; k < knownLabels.size(); ++k) if (knownLabels.get(k).equals(rootLabel)) {
knownLabels.remove(k);
break;
}
knownWords.add(0, Config.UNKNOWN);
knownWords.add(1, Config.NULL);
knownWords.add(2, Config.ROOT);
knownPos.add(0, Config.UNKNOWN);
knownPos.add(1, Config.NULL);
knownPos.add(2, Config.ROOT);
knownLabels.add(0, Config.NULL);
generateIDs();
log.info(Config.SEPARATOR);
log.info("#Word: " + knownWords.size());
log.info("#POS:" + knownPos.size());
log.info("#Label: " + knownLabels.size());
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class NegraPennTreebankParserParams method transformTree.
/**
* transformTree does all language-specific tree
* transformations. Any parameterizations should be inside the
* specific TreebankLangParserarams class.
*/
@Override
public Tree transformTree(Tree t, Tree root) {
if (t == null || t.isLeaf()) {
return t;
}
List<String> annotations = new ArrayList<>();
CoreLabel lab = (CoreLabel) t.label();
String word = lab.word();
String tag = lab.tag();
String cat = lab.value();
String baseCat = treebankLanguagePack().basicCategory(cat);
//categories -- at present there is no tag annotation!!
if (t.isPhrasal()) {
List<String> childBasicCats = childBasicCats(t);
// mark vp's headed by "zu" verbs
if (DEBUG) {
if (markZuVP && baseCat.equals("VP")) {
System.out.println("child basic cats: " + childBasicCats);
}
}
if (markZuVP && baseCat.equals("VP") && (childBasicCats.contains("VZ") || childBasicCats.contains("VVIZU"))) {
if (DEBUG)
System.out.println("Marked zu VP" + t);
annotations.add("%ZU");
}
// mark relative clause S's
if (markRC && (t.label() instanceof NegraLabel) && baseCat.equals("S") && ((NegraLabel) t.label()).getEdge() != null && ((NegraLabel) t.label()).getEdge().equals("RC")) {
if (DEBUG) {
System.out.println("annotating this guy as RC:");
t.pennPrint();
}
//throw new RuntimeException("damn, not a Negra Label");
annotations.add("%RC");
}
if (markContainsV && containsVP(t)) {
annotations.add("%vp");
}
if (markLP && leftPhrasal(t)) {
annotations.add("%LP");
}
if (markKonjParent) {
// this depends on functional tags being present
for (String cCat : childBasicCats) {
if (cCat.contains("-KONJ")) {
annotations.add("%konjp");
break;
}
}
}
if (markHDParent) {
// this depends on functional tags being present
for (String cCat : childBasicCats) {
if (cCat.contains("-HD")) {
annotations.add("%hdp");
break;
}
}
}
} else {
//t.isPreTerminal() case
if (markColon && cat.equals("$.") && (word.equals(":") || word.equals(";"))) {
annotations.add("-%colon");
}
}
// if(t.isPreTerminal()) {
// if(parent != null) {
// String parentVal = parent.label().value();
// int cutOffPtD = parentVal.indexOf('-');
// int cutOffPtC = parentVal.indexOf('^');
// int curMin = parentVal.length();
// if(cutOffPtD != -1) {
// curMin = cutOffPtD;
// }
// if(cutOffPtC != -1) {
// curMin = Math.min(curMin, cutOffPtC);
// }
// parentVal = parentVal.substring(0, curMin);
// annotations.add("^" + parentVal);
// }
// }
// put on all the annotations
StringBuilder catSB = new StringBuilder(cat);
for (String annotation : annotations) {
catSB.append(annotation);
}
t.setLabel(new CategoryWordTag(catSB.toString(), word, tag));
return t;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class LeafAncestorEval method display.
public void display(boolean verbose, PrintWriter pw) {
final Random rand = new Random();
double corpusLevel = corpusAvg / corpusNum;
double sentLevel = sentAvg / sentNum;
double sentEx = 100.0 * sentExact / sentNum;
if (verbose) {
Map<Double, List<CoreLabel>> avgMap = new TreeMap<>();
for (Map.Entry<List<CoreLabel>, Double> entry : catAvg.entrySet()) {
double avg = entry.getValue() / catNum.get(entry.getKey());
if (Double.isNaN(avg)) {
avg = -1.0;
}
if (avgMap.containsKey(avg)) {
avgMap.put(avg + (rand.nextDouble() / 10000.0), entry.getKey());
} else {
avgMap.put(avg, entry.getKey());
}
}
pw.println("============================================================");
pw.println("Leaf Ancestor Metric" + "(" + name + ") -- final statistics");
pw.println("============================================================");
pw.println("#Sentences: " + (int) sentNum);
pw.println();
pw.println("Sentence-level (macro-averaged)");
pw.printf(" Avg: %.3f%n", sentLevel);
pw.printf(" Exact: %.2f%%%n", sentEx);
pw.println();
pw.println("Corpus-level (micro-averaged)");
pw.printf(" Avg: %.3f%n", corpusLevel);
pw.println("============================================================");
for (List<CoreLabel> lineage : avgMap.values()) {
if (catNum.get(lineage) < 30.0)
continue;
double avg = catAvg.get(lineage) / catNum.get(lineage);
pw.printf(" %.3f\t%d\t%s%n", avg, (int) ((double) catNum.get(lineage)), toString(lineage));
}
pw.println("============================================================");
} else {
pw.printf("%s summary: corpus: %.3f sent: %.3f sent-ex: %.2f%n", name, corpusLevel, sentLevel, sentEx);
}
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class DependencyParser method predict.
/**
* Convenience method for {@link #predict(edu.stanford.nlp.util.CoreMap)}. The tokens of the provided sentence must
* also have tag annotations (the parser requires part-of-speech tags).
*
* @see #predict(edu.stanford.nlp.util.CoreMap)
*/
public GrammaticalStructure predict(List<? extends HasWord> sentence) {
CoreLabel sentenceLabel = new CoreLabel();
List<CoreLabel> tokens = new ArrayList<>();
int i = 1;
for (HasWord wd : sentence) {
CoreLabel label;
if (wd instanceof CoreLabel) {
label = (CoreLabel) wd;
if (label.tag() == null)
throw new IllegalArgumentException("Parser requires words " + "with part-of-speech tag annotations");
} else {
label = new CoreLabel();
label.setValue(wd.word());
label.setWord(wd.word());
if (!(wd instanceof HasTag))
throw new IllegalArgumentException("Parser requires words " + "with part-of-speech tag annotations");
label.setTag(((HasTag) wd).tag());
}
label.setIndex(i);
i++;
tokens.add(label);
}
sentenceLabel.set(CoreAnnotations.TokensAnnotation.class, tokens);
return predict(sentenceLabel);
}
Aggregations