use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class FindTreebankTree method main.
public static void main(String[] args) {
// Args specified with -tagSeparator, -encoding, etc are assigned
// to the appropriate option. Otherwise, the first arg found is
// the sentence to look for, and all other args are paths in which
// to look for that sentence.
String needle = "";
String tagSeparator = "_";
String encoding = "utf-8";
String fileRegex = "";
List<String> paths = new ArrayList<>();
for (int i = 0; i < args.length; ++i) {
if ((args[i].equalsIgnoreCase("-tagSeparator") || args[i].equalsIgnoreCase("--tagSeparator")) && i + 1 < args.length) {
tagSeparator = args[i + 1];
++i;
} else if ((args[i].equalsIgnoreCase("-encoding") || args[i].equalsIgnoreCase("--encoding")) && i + 1 < args.length) {
encoding = args[i + 1];
++i;
} else if ((args[i].equalsIgnoreCase("-fileRegex") || args[i].equalsIgnoreCase("--fileRegex")) && i + 1 < args.length) {
fileRegex = args[i + 1];
++i;
} else if (needle.equals("")) {
needle = args[i].trim();
} else {
paths.add(args[i]);
}
}
TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
// If the user specified a regex, here we make a filter using that
// regex. We just use an anonymous class for the filter
FileFilter filter = null;
if (!fileRegex.equals("")) {
final Pattern filePattern = Pattern.compile(fileRegex);
filter = pathname -> (pathname.isDirectory() || filePattern.matcher(pathname.getName()).matches());
}
for (String path : paths) {
// Start a new treebank with the given path, encoding, filter, etc
DiskTreebank treebank = new DiskTreebank(trf, encoding);
treebank.loadPath(path, filter);
Iterator<Tree> treeIterator = treebank.iterator();
int treeCount = 0;
String currentFile = "";
while (treeIterator.hasNext()) {
// keep track of which file we are currently looking at
if (!currentFile.equals(treebank.getCurrentFilename())) {
currentFile = treebank.getCurrentFilename();
treeCount = 0;
}
++treeCount;
Tree tree = treeIterator.next();
List<TaggedWord> sentence = tree.taggedYield();
boolean found = false;
// The tree can match in one of three ways: tagged, untagged,
// or untagged and unsegmented (which is useful for Chinese,
// for example)
String haystack = SentenceUtils.listToString(sentence, true);
found = needle.equals(haystack);
haystack = haystack.replaceAll(" ", "");
found = found || needle.equals(haystack);
haystack = SentenceUtils.listToString(sentence, false, tagSeparator);
found = found || needle.equals(haystack);
if (found) {
System.out.println("needle found in " + currentFile + " tree " + treeCount);
}
}
}
}
use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class TreeLemmatizer method transformTree.
@Override
public Tree transformTree(Tree t) {
Morphology morphology = new Morphology();
List<TaggedWord> tagged = null;
int index = 0;
for (Tree leaf : t.getLeaves()) {
Label label = leaf.label();
if (label == null) {
continue;
}
String tag;
if (!(label instanceof HasTag) || ((HasTag) label).tag() == null) {
if (tagged == null) {
tagged = t.taggedYield();
}
tag = tagged.get(index).tag();
} else {
tag = ((HasTag) label).tag();
}
if (!(label instanceof HasLemma)) {
throw new IllegalArgumentException("Got a tree with labels which do not support lemma");
}
((HasLemma) label).setLemma(morphology.lemma(label.value(), tag, true));
++index;
}
return t;
}
use of edu.stanford.nlp.ling.TaggedWord in project CoreNLP by stanfordnlp.
the class TSVTaggedFileReaderTest method testReadBackwards.
public void testReadBackwards() throws IOException {
File file = createTestFile();
TaggedFileRecord record = createRecord(file, "tagColumn=0,wordColumn=1,");
List<List<TaggedWord>> sentences = new ArrayList<List<TaggedWord>>();
for (List<TaggedWord> sentence : record.reader()) {
sentences.add(sentence);
}
assertEquals(3, sentences.size());
assertEquals(3, sentences.get(0).size());
assertEquals("A", sentences.get(0).get(0).tag());
assertEquals("B", sentences.get(0).get(1).tag());
assertEquals("C", sentences.get(0).get(2).tag());
assertEquals("D", sentences.get(1).get(0).tag());
assertEquals("E", sentences.get(1).get(1).tag());
assertEquals("F", sentences.get(2).get(0).tag());
assertEquals("1", sentences.get(0).get(0).word());
assertEquals("2", sentences.get(0).get(1).word());
assertEquals("3", sentences.get(0).get(2).word());
assertEquals("4", sentences.get(1).get(0).word());
assertEquals("5", sentences.get(1).get(1).word());
assertEquals("6", sentences.get(2).get(0).word());
}
Aggregations