use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class ClassifierCombiner method serializeClassifier.
// method for writing a ClassifierCombiner to an ObjectOutputStream
@Override
public void serializeClassifier(ObjectOutputStream oos) {
try {
// record the properties used to initialize
oos.writeObject(initProps);
// this is a bit of a hack, but have to write this twice so you can get it again
// after you initialize AbstractSequenceClassifier
// basically when this is read from the ObjectInputStream, I read it once to call
// super(props) and then I read it again so I can set this.initProps
// TODO: probably should have AbstractSequenceClassifier store initProps to get rid of this double writing
oos.writeObject(initProps);
// record the initial loadPaths
oos.writeObject(initLoadPaths);
// record the combinationMode
String combinationModeString = combinationMode.name();
oos.writeObject(combinationModeString);
// get the number of classifiers to write to disk
int numClassifiers = baseClassifiers.size();
oos.writeInt(numClassifiers);
// go through baseClassifiers and write each one to disk with CRFClassifier's serialize method
log.info("");
for (AbstractSequenceClassifier<IN> asc : baseClassifiers) {
// CRFClassifier crfc = (CRFClassifier) asc;
// log.info("Serializing a base classifier...");
asc.serializeClassifier(oos);
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class Util method loadConllFile.
// TODO replace with GrammaticalStructure#readCoNLLGrammaticalStructureCollection
public static void loadConllFile(String inFile, List<CoreMap> sents, List<DependencyTree> trees, boolean unlabeled, boolean cPOS) {
CoreLabelTokenFactory tf = new CoreLabelTokenFactory(false);
try {
CoNLLUReader conllUReader = new CoNLLUReader();
List<CoNLLUReader.CoNLLUDocument> docs = conllUReader.readCoNLLUFileCreateCoNLLUDocuments(inFile);
for (CoNLLUReader.CoNLLUDocument doc : docs) {
for (CoNLLUReader.CoNLLUSentence conllSent : doc.sentences) {
CoreMap sentence = new CoreLabel();
List<CoreLabel> sentenceTokens = new ArrayList<>();
DependencyTree tree = new DependencyTree();
for (String tokenLine : conllSent.tokenLines) {
String[] splits = tokenLine.split("\t");
String word = splits[CoNLLUReader.CoNLLU_WordField], pos = cPOS ? splits[CoNLLUReader.CoNLLU_UPOSField] : splits[CoNLLUReader.CoNLLU_XPOSField], depType = splits[CoNLLUReader.CoNLLU_RelnField];
int head = -1;
try {
head = Integer.parseInt(splits[6]);
} catch (NumberFormatException e) {
continue;
}
CoreLabel token = tf.makeToken(word, 0, 0);
token.setTag(pos);
token.set(CoreAnnotations.CoNLLDepParentIndexAnnotation.class, head);
token.set(CoreAnnotations.CoNLLDepTypeAnnotation.class, depType);
sentenceTokens.add(token);
if (!unlabeled)
tree.add(head, depType);
else
tree.add(head, Config.UNKNOWN);
}
trees.add(tree);
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
sents.add(sentence);
}
}
} catch (IOException e) {
throw new RuntimeIOException(e);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
/*try (BufferedReader reader = IOUtils.readerFromString(inFile)) {
List<CoreLabel> sentenceTokens = new ArrayList<>();
DependencyTree tree = new DependencyTree();
for (String line : IOUtils.getLineIterable(reader, false)) {
String[] splits = line.split("\t");
if (splits.length < 10) {
if (sentenceTokens.size() > 0) {
trees.add(tree);
CoreMap sentence = new CoreLabel();
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
sents.add(sentence);
tree = new DependencyTree();
sentenceTokens = new ArrayList<>();
}
} else {
String word = splits[1],
pos = cPOS ? splits[3] : splits[4],
depType = splits[7];
int head = -1;
try {
head = Integer.parseInt(splits[6]);
} catch (NumberFormatException e) {
continue;
}
CoreLabel token = tf.makeToken(word, 0, 0);
token.setTag(pos);
token.set(CoreAnnotations.CoNLLDepParentIndexAnnotation.class, head);
token.set(CoreAnnotations.CoNLLDepTypeAnnotation.class, depType);
sentenceTokens.add(token);
if (!unlabeled)
tree.add(head, depType);
else
tree.add(head, Config.UNKNOWN);
}
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}*/
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class DependencyParser method readEmbedFile.
// TODO this should be a function which returns the embeddings array + embedID
// otherwise the class needlessly carries around the extra baggage of `embeddings`
// (never again used) for the entire training process
private double[][] readEmbedFile(String embedFile, Map<String, Integer> embedID) {
double[][] embeddings = null;
if (embedFile != null) {
try (BufferedReader input = IOUtils.readerFromString(embedFile)) {
List<String> lines = new ArrayList<>();
for (String s; (s = input.readLine()) != null; ) {
lines.add(s);
}
int nWords = lines.size();
String[] splits = lines.get(0).split("\\s+");
int dim = splits.length - 1;
embeddings = new double[nWords][dim];
log.info("Embedding File " + embedFile + ": #Words = " + nWords + ", dim = " + dim);
if (dim != config.embeddingSize)
throw new IllegalArgumentException("The dimension of embedding file does not match config.embeddingSize (" + dim + " vs " + config.embeddingSize + "). Perhaps set the -embeddingSize flag");
for (int i = 0; i < lines.size(); ++i) {
splits = lines.get(i).split("\\s+");
embedID.put(splits[0], i);
for (int j = 0; j < dim; ++j) embeddings[i][j] = Double.parseDouble(splits[j + 1]);
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
embeddings = Util.scaling(embeddings, 0, 1.0);
}
return embeddings;
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class SemanticGraphPrinter method main.
public static void main(String[] args) {
Treebank tb = new MemoryTreebank();
Properties props = StringUtils.argsToProperties(args);
String treeFileName = props.getProperty("treeFile");
String sentFileName = props.getProperty("sentFile");
String testGraph = props.getProperty("testGraph");
if (testGraph == null) {
testGraph = "false";
}
String load = props.getProperty("load");
String save = props.getProperty("save");
if (load != null) {
log.info("Load not implemented!");
return;
}
if (sentFileName == null && treeFileName == null) {
log.info("Usage: java SemanticGraph [-sentFile file|-treeFile file] [-testGraph]");
Tree t = Tree.valueOf("(ROOT (S (NP (NP (DT An) (NN attempt)) (PP (IN on) (NP (NP (NNP Andres) (NNP Pastrana) (POS 's)) (NN life)))) (VP (VBD was) (VP (VBN carried) (PP (IN out) (S (VP (VBG using) (NP (DT a) (JJ powerful) (NN bomb))))))) (. .)))");
tb.add(t);
} else if (treeFileName != null) {
tb.loadPath(treeFileName);
} else {
String[] options = { "-retainNPTmpSubcategories" };
LexicalizedParser lp = LexicalizedParser.loadModel("/u/nlp/data/lexparser/englishPCFG.ser.gz", options);
BufferedReader reader = null;
try {
reader = IOUtils.readerFromString(sentFileName);
} catch (IOException e) {
throw new RuntimeIOException("Cannot find or open " + sentFileName, e);
}
try {
System.out.println("Processing sentence file " + sentFileName);
for (String line; (line = reader.readLine()) != null; ) {
System.out.println("Processing sentence: " + line);
PTBTokenizer<Word> ptb = PTBTokenizer.newPTBTokenizer(new StringReader(line));
List<Word> words = ptb.tokenize();
Tree parseTree = lp.parseTree(words);
tb.add(parseTree);
}
reader.close();
} catch (Exception e) {
throw new RuntimeException("Exception reading key file " + sentFileName, e);
}
}
for (Tree t : tb) {
SemanticGraph sg = SemanticGraphFactory.generateUncollapsedDependencies(t);
System.out.println(sg.toString());
System.out.println(sg.toCompactString());
if (testGraph.equals("true")) {
SemanticGraph g1 = SemanticGraphFactory.generateCollapsedDependencies(t);
System.out.println("TEST SEMANTIC GRAPH - graph ----------------------------");
System.out.println(g1.toString());
System.out.println("readable ----------------------------");
System.out.println(g1.toString(SemanticGraph.OutputFormat.READABLE));
System.out.println("List of dependencies ----------------------------");
System.out.println(g1.toList());
System.out.println("xml ----------------------------");
System.out.println(g1.toString(SemanticGraph.OutputFormat.XML));
System.out.println("dot ----------------------------");
System.out.println(g1.toDotFormat());
System.out.println("dot (simple) ----------------------------");
System.out.println(g1.toDotFormat("Simple", CoreLabel.OutputFormat.VALUE));
// System.out.println(" graph ----------------------------");
// System.out.println(t.allTypedDependenciesCCProcessed(false));
}
}
if (save != null) {
log.info("Save not implemented!");
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class StringUtils method propFileToProperties.
/**
* This method reads in properties listed in a file in the format prop=value, one property per line.
* Although {@code Properties.load(InputStream)} exists, I implemented this method to trim the lines,
* something not implemented in the {@code load()} method.
*
* @param filename A properties file to read
* @return The corresponding Properties object
*/
public static Properties propFileToProperties(String filename) {
try {
InputStream is = new BufferedInputStream(new FileInputStream(filename));
Properties result = new Properties();
result.load(is);
// trim all values
for (String propKey : result.stringPropertyNames()) {
String newVal = result.getProperty(propKey);
result.setProperty(propKey, newVal.trim());
}
is.close();
return result;
} catch (IOException e) {
throw new RuntimeIOException("propFileToProperties could not read properties file: " + filename, e);
}
}
Aggregations