use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class TTags method read.
protected void read(String filename) {
try {
DataInputStream in = IOUtils.getDataInputStream(filename);
read(in);
in.close();
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class TTags method save.
protected void save(DataOutputStream file, Map<String, Set<String>> tagTokens) {
try {
file.writeInt(index.size());
for (String item : index) {
file.writeUTF(item);
if (learnClosedTags) {
if (tagTokens.get(item).size() < closedTagThreshold) {
markClosed(item);
}
}
file.writeBoolean(isClosed(item));
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class GrammaticalStructureConversionUtils method convertTrees.
/**
* Given sentences or trees, output the typed dependencies.
* <p>
* By default, the method outputs the collapsed typed dependencies with
* processing of conjuncts. The input can be given as plain text (one sentence
* by line) using the option -sentFile, or as trees using the option
* -treeFile. For -sentFile, the input has to be strictly one sentence per
* line. You can specify where to find a parser with -parserFile
* serializedParserPath. See LexicalizedParser for more flexible processing of
* text files (including with Stanford Dependencies output). The above options
* assume a file as input. You can also feed trees (only) via stdin by using
* the option -filter. If one does not specify a -parserFile, one
* can specify which language pack to use with -tLPP, This option
* specifies a class which determines which GrammaticalStructure to
* use, which HeadFinder to use, etc. It will default to
* edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams,
* but any TreebankLangParserParams can be specified.
* <p>
* If no method of producing trees is given other than to use the
* LexicalizedParser, but no parser is specified, a default parser
* is used, the English parser. You can specify options to load
* with the parser using the -parserOpts flag. If the default
* parser is used, and no options are provided, the option
* -retainTmpSubcategories is used.
* <p>
* The following options can be used to specify the types of dependencies
* wanted: </p>
* <ul>
* <li> -collapsed collapsed dependencies
* <li> -basic non-collapsed dependencies that preserve a tree structure
* <li> -nonCollapsed non-collapsed dependencies that do not preserve a tree
* structure (the basic dependencies plus the extra ones)
* <li> -CCprocessed
* collapsed dependencies and conjunctions processed (dependencies are added
* for each conjunct) -- this is the default if no options are passed
* <li> -collapsedTree collapsed dependencies retaining a tree structure
* <li> -makeCopulaHead Contrary to the approach argued for in the SD papers,
* nevertheless make the verb 'to be' the head, not the predicate noun, adjective,
* etc. (However, when the verb 'to be' is used as an auxiliary verb, the main
* verb is still treated as the head.)
* <li> -originalDependencies generate the dependencies using the original converter
* instead of the Universal Dependencies converter.
* </ul>
* <p>
* The {@code -conllx} option will output the dependencies in the CoNLL format,
* instead of in the standard Stanford format (relation(governor,dependent))
* and will retain punctuation by default.
* When used in the "collapsed" format, words such as prepositions, conjunctions
* which get collapsed into the grammatical relations and are not part of the
* sentence per se anymore will be annotated with "erased" as grammatical relation
* and attached to the fake "ROOT" node with index 0.
* <p>
* Keeping punctuation is the default behavior. This can be stopped with
* {@code -keepPunct false}
* <p>
* The {@code -extraSep} option used with -nonCollapsed will print the basic
* dependencies first, then a separator ======, and then the extra
* dependencies that do not preserve the tree structure. The -test option is
* used for debugging: it prints the grammatical structure, as well as the
* basic, collapsed and CCprocessed dependencies. It also checks the
* connectivity of the collapsed dependencies. If the collapsed dependencies
* list doesn't constitute a connected graph, it prints the possible offending
* nodes (one of them is the real root of the graph).
* <p>
* Using the -conllxFile, you can pass a file containing Stanford dependencies
* in the CoNLL format (e.g., the basic dependencies), and obtain another
* representation using one of the representation options.
* <p>
* Usage: <br>
* <code>java edu.stanford.nlp.trees.GrammaticalStructure [-treeFile FILE | -sentFile FILE | -conllxFile FILE | -filter] <br>
* [-collapsed -basic -CCprocessed -test -generateOriginalDependencies]</code>
*
* @param args Command-line arguments, as above
*/
@SuppressWarnings("unchecked")
public static void convertTrees(String[] args, String defaultLang) {
/* Use a tree normalizer that removes all empty nodes.
This prevents wrong indexing of the nodes in the dependency relations. */
Iterable<GrammaticalStructure> gsBank = null;
Properties props = StringUtils.argsToProperties(args);
String language = props.getProperty("language", defaultLang);
ConverterOptions opts = ConverterOptions.getConverterOptions(language);
MemoryTreebank tb = new MemoryTreebank(opts.treeNormalizer);
Iterable<Tree> trees = tb;
String encoding = props.getProperty("encoding", "utf-8");
try {
System.setOut(new PrintStream(System.out, true, encoding));
} catch (IOException e) {
throw new RuntimeException(e);
}
String treeFileName = props.getProperty("treeFile");
String sentFileName = props.getProperty("sentFile");
String conllXFileName = props.getProperty("conllxFile");
String altDepPrinterName = props.getProperty("altprinter");
String altDepReaderName = props.getProperty("altreader");
String altDepReaderFilename = props.getProperty("altreaderfile");
String filter = props.getProperty("filter");
boolean makeCopulaHead = props.getProperty("makeCopulaHead") != null;
boolean generateOriginalDependencies = props.getProperty("originalDependencies") != null || opts.stanfordDependencies;
// TODO: if a parser is specified, load this from the parser
// instead of ever loading it from this way
String tLPP = props.getProperty("tLPP", opts.tlPPClassName);
TreebankLangParserParams params = ReflectionLoading.loadByReflection(tLPP);
params.setGenerateOriginalDependencies(generateOriginalDependencies);
if (makeCopulaHead) {
// TODO: generalize and allow for more options
String[] options = { "-makeCopulaHead" };
params.setOptionFlag(options, 0);
}
if (sentFileName == null && (altDepReaderName == null || altDepReaderFilename == null) && treeFileName == null && conllXFileName == null && filter == null) {
try {
System.err.printf("Usage: java %s%n", GrammaticalStructure.class.getCanonicalName());
System.err.println("Options:");
System.err.println(" Dependency representation:");
System.err.println(" -basic:\t\tGenerate basic dependencies.");
System.err.println(" -enhanced:\t\tGenerate enhanced dependencies, currently only implemented for English UD.");
System.err.println(" -enhanced++:\tGenerate enhanced++ dependencies (default), currently only implemented for English UD.");
System.err.println(" -collapsed:\t\tGenerate collapsed dependencies, deprecated.");
System.err.println(" -CCprocessed:\tGenerate CC-processed dependencies, deprecated.");
System.err.println(" -collapsedTree:\tGenerate collapsed-tree dependencies, deprecated.");
System.err.println("");
System.err.println(" Input:");
System.err.println(" -treeFile <FILE>:\tConvert from constituency trees in <FILE>");
System.err.println(" -sentFile <FILE>:\tParse and convert sentences from <FILE>. Only implemented for English.");
System.err.println("");
System.err.println(" Output:");
System.err.println(" -conllx:\t\tOutput dependencies in CoNLL format.");
System.err.println("");
System.err.println(" Language:");
System.err.println(" -language [en|zh|en-sd|zh-sd]:\t (Universal English Dependencies, Universal Chinese Dependencies, English Stanford Dependencies, Chinese Stanford Dependencies)");
System.err.println("");
System.err.println("");
System.err.println("");
System.err.println("Example:");
TreeReader tr = new PennTreeReader(new StringReader("((S (NP (NNP Sam)) (VP (VBD died) (NP-TMP (NN today)))))"));
tb.add(tr.readTree());
} catch (Exception e) {
log.info("Horrible error: " + e);
e.printStackTrace();
}
} else if (altDepReaderName != null && altDepReaderFilename != null) {
DependencyReader altDepReader = loadAlternateDependencyReader(altDepReaderName);
try {
gsBank = altDepReader.readDependencies(altDepReaderFilename);
} catch (IOException e) {
log.info("Error reading " + altDepReaderFilename);
return;
}
} else if (treeFileName != null) {
tb.loadPath(treeFileName);
} else if (filter != null) {
tb.load(IOUtils.readerFromStdin());
} else if (conllXFileName != null) {
try {
gsBank = params.readGrammaticalStructureFromFile(conllXFileName);
} catch (RuntimeIOException e) {
log.info("Error reading " + conllXFileName);
return;
}
} else {
String parserFile = props.getProperty("parserFile");
String parserOpts = props.getProperty("parserOpts");
boolean tokenized = props.getProperty("tokenized") != null;
Function<List<? extends HasWord>, Tree> lp = loadParser(parserFile, parserOpts, makeCopulaHead);
trees = new LazyLoadTreesByParsing(sentFileName, encoding, tokenized, lp);
// necessarily have to use LexicalizedParser
try {
Method method = lp.getClass().getMethod("getTLPParams");
params = (TreebankLangParserParams) method.invoke(lp);
params.setGenerateOriginalDependencies(generateOriginalDependencies);
} catch (Exception cnfe) {
throw new RuntimeException(cnfe);
}
}
// treats the output according to the options passed
boolean basic = props.getProperty("basic") != null;
boolean collapsed = props.getProperty("collapsed") != null;
boolean CCprocessed = props.getProperty("CCprocessed") != null;
boolean collapsedTree = props.getProperty("collapsedTree") != null;
boolean nonCollapsed = props.getProperty("nonCollapsed") != null;
boolean extraSep = props.getProperty("extraSep") != null;
boolean parseTree = props.getProperty("parseTree") != null;
boolean test = props.getProperty("test") != null;
boolean keepPunct = PropertiesUtils.getBool(props, "keepPunct", true);
boolean conllx = props.getProperty("conllx") != null;
// todo: Support checkConnected on more options (including basic)
boolean checkConnected = props.getProperty("checkConnected") != null;
boolean portray = props.getProperty("portray") != null;
boolean enhanced = props.getProperty("enhanced") != null;
boolean enhancedPlusPlus = props.getProperty("enhanced++") != null;
// If requested load alternative printer
DependencyPrinter altDepPrinter = null;
if (altDepPrinterName != null) {
altDepPrinter = loadAlternateDependencyPrinter(altDepPrinterName);
}
// log.info("First tree in tb is");
// log.info(((MemoryTreebank) tb).get(0));
Method m = null;
if (test) {
// Do this by reflection to avoid this becoming a dependency when we distribute the parser
try {
Class sgf = Class.forName("edu.stanford.nlp.semgraph.SemanticGraphFactory");
m = sgf.getDeclaredMethod("makeFromTree", GrammaticalStructure.class, SemanticGraphFactory.Mode.class, GrammaticalStructure.Extras.class, Predicate.class);
} catch (Exception e) {
log.info("Test cannot check for cycles in tree format (classes not available)");
}
}
if (gsBank == null) {
gsBank = new TreeBankGrammaticalStructureWrapper(trees, keepPunct, params);
}
for (GrammaticalStructure gs : gsBank) {
Tree tree;
if (gsBank instanceof TreeBankGrammaticalStructureWrapper) {
// log.info("Using TreeBankGrammaticalStructureWrapper branch");
tree = ((TreeBankGrammaticalStructureWrapper) gsBank).getOriginalTree(gs);
// log.info("Tree is: ");
// log.info(t);
} else {
// log.info("Using gs.root() branch");
// recover tree
tree = gs.root();
// log.info("Tree from gs is");
// log.info(t);
}
if (test) {
// print the grammatical structure, the basic, collapsed and CCprocessed
System.out.println("============= parse tree =======================");
tree.pennPrint();
System.out.println();
System.out.println("------------- GrammaticalStructure -------------");
System.out.println(gs);
boolean allConnected = true;
boolean connected;
Collection<TypedDependency> bungRoots = null;
System.out.println("------------- basic dependencies ---------------");
List<TypedDependency> gsb = gs.typedDependencies(GrammaticalStructure.Extras.NONE);
System.out.println(StringUtils.join(gsb, "\n"));
connected = GrammaticalStructure.isConnected(gsb);
if (!connected && bungRoots == null) {
bungRoots = GrammaticalStructure.getRoots(gsb);
}
allConnected = connected && allConnected;
System.out.println("------------- non-collapsed dependencies (basic + extra) ---------------");
List<TypedDependency> gse = gs.typedDependencies(GrammaticalStructure.Extras.MAXIMAL);
System.out.println(StringUtils.join(gse, "\n"));
connected = GrammaticalStructure.isConnected(gse);
if (!connected && bungRoots == null) {
bungRoots = GrammaticalStructure.getRoots(gse);
}
allConnected = connected && allConnected;
System.out.println("------------- collapsed dependencies -----------");
System.out.println(StringUtils.join(gs.typedDependenciesCollapsed(GrammaticalStructure.Extras.MAXIMAL), "\n"));
System.out.println("------------- collapsed dependencies tree -----------");
System.out.println(StringUtils.join(gs.typedDependenciesCollapsedTree(), "\n"));
System.out.println("------------- CCprocessed dependencies --------");
List<TypedDependency> gscc = gs.typedDependenciesCollapsed(GrammaticalStructure.Extras.MAXIMAL);
System.out.println(StringUtils.join(gscc, "\n"));
System.out.println("-----------------------------------------------");
// connectivity tests
connected = GrammaticalStructure.isConnected(gscc);
if (!connected && bungRoots == null) {
bungRoots = GrammaticalStructure.getRoots(gscc);
}
allConnected = connected && allConnected;
if (allConnected) {
System.out.println("dependencies form connected graphs.");
} else {
System.out.println("dependency graph NOT connected! possible offending nodes: " + bungRoots);
}
// libraries
if (m != null) {
try {
// the first arg is null because it's a static method....
Object semGraph = m.invoke(null, gs, SemanticGraphFactory.Mode.CCPROCESSED, GrammaticalStructure.Extras.MAXIMAL, null);
Class sg = Class.forName("edu.stanford.nlp.semgraph.SemanticGraph");
Method mDag = sg.getDeclaredMethod("isDag");
boolean isDag = (Boolean) mDag.invoke(semGraph);
System.out.println("tree dependencies form a DAG: " + isDag);
} catch (Exception e) {
e.printStackTrace();
}
}
} else // end of "test" output
{
if (parseTree) {
System.out.println("============= parse tree =======================");
tree.pennPrint();
System.out.println();
}
if (basic) {
if (collapsed || CCprocessed || collapsedTree || nonCollapsed || enhanced || enhancedPlusPlus) {
System.out.println("------------- basic dependencies ---------------");
}
if (altDepPrinter == null) {
printDependencies(gs, gs.typedDependencies(GrammaticalStructure.Extras.NONE), tree, conllx, false, opts.convertToUPOS);
} else {
System.out.println(altDepPrinter.dependenciesToString(gs, gs.typedDependencies(GrammaticalStructure.Extras.NONE), tree));
}
}
if (nonCollapsed) {
if (basic || CCprocessed || collapsed || collapsedTree) {
System.out.println("----------- non-collapsed dependencies (basic + extra) -----------");
}
printDependencies(gs, gs.allTypedDependencies(), tree, conllx, extraSep, opts.convertToUPOS);
}
if (collapsed) {
if (basic || CCprocessed || collapsedTree || nonCollapsed) {
System.out.println("----------- collapsed dependencies -----------");
}
printDependencies(gs, gs.typedDependenciesCollapsed(GrammaticalStructure.Extras.MAXIMAL), tree, conllx, false, opts.convertToUPOS);
}
if (CCprocessed) {
if (basic || collapsed || collapsedTree || nonCollapsed) {
System.out.println("---------- CCprocessed dependencies ----------");
}
List<TypedDependency> deps = gs.typedDependenciesCCprocessed(GrammaticalStructure.Extras.MAXIMAL);
if (checkConnected) {
if (!GrammaticalStructure.isConnected(deps)) {
log.info("Graph is not connected for:");
log.info(tree);
log.info("possible offending nodes: " + GrammaticalStructure.getRoots(deps));
}
}
printDependencies(gs, deps, tree, conllx, false, opts.convertToUPOS);
}
if (collapsedTree) {
if (basic || CCprocessed || collapsed || nonCollapsed) {
System.out.println("----------- collapsed dependencies tree -----------");
}
printDependencies(gs, gs.typedDependenciesCollapsedTree(), tree, conllx, false, opts.convertToUPOS);
}
if (enhanced) {
if (basic || enhancedPlusPlus) {
System.out.println("----------- enhanced dependencies tree -----------");
}
printDependencies(gs, gs.typedDependenciesEnhanced(), tree, conllx, false, opts.convertToUPOS);
}
if (enhancedPlusPlus) {
if (basic || enhanced) {
System.out.println("----------- enhanced++ dependencies tree -----------");
}
printDependencies(gs, gs.typedDependenciesEnhancedPlusPlus(), tree, conllx, false, opts.convertToUPOS);
}
// default use: enhanced++ for UD, CCprocessed for SD (to parallel what happens within the parser)
if (!basic && !collapsed && !CCprocessed && !collapsedTree && !nonCollapsed && !enhanced && !enhancedPlusPlus) {
if (generateOriginalDependencies) {
printDependencies(gs, gs.typedDependenciesCCprocessed(GrammaticalStructure.Extras.MAXIMAL), tree, conllx, false, opts.convertToUPOS);
} else {
printDependencies(gs, gs.typedDependenciesEnhancedPlusPlus(), tree, conllx, false, opts.convertToUPOS);
}
}
}
if (portray) {
try {
// put up a window showing it
Class sgu = Class.forName("edu.stanford.nlp.rte.gui.SemanticGraphVisualization");
Method mRender = sgu.getDeclaredMethod("render", GrammaticalStructure.class, String.class);
// the first arg is null because it's a static method....
mRender.invoke(null, gs, "Collapsed, CC processed deps");
} catch (Exception e) {
throw new RuntimeException("Couldn't use swing to portray semantic graph", e);
}
}
}
// end for
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class Macros method readMacros.
public static List<Pair<String, String>> readMacros(BufferedReader bin) {
try {
List<Pair<String, String>> macros = new ArrayList<>();
String line;
int lineNumber = 0;
while ((line = bin.readLine()) != null) {
++lineNumber;
String trimmed = line.trim();
if (trimmed.equals("") || trimmed.charAt(0) == '#') {
continue;
}
String[] pieces = line.split("\t", 2);
if (pieces.length < 2) {
throw new IllegalArgumentException("Expected lines of the format " + "original (tab) replacement. " + "Line number " + lineNumber + " does not match.");
}
macros.add(new Pair<>(pieces[0], pieces[1]));
}
return macros;
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class TraditionalSimplifiedCharacterMap method translateLines.
public void translateLines(BufferedReader br, BufferedWriter bw) {
try {
String line;
while ((line = br.readLine()) != null) {
bw.write(apply(line));
bw.newLine();
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
Aggregations