use of edu.stanford.nlp.pipeline.CoNLLUReader in project CoreNLP by stanfordnlp.
the class Util method loadConllFile.
// TODO replace with GrammaticalStructure#readCoNLLGrammaticalStructureCollection
public static void loadConllFile(String inFile, List<CoreMap> sents, List<DependencyTree> trees, boolean unlabeled, boolean cPOS) {
CoreLabelTokenFactory tf = new CoreLabelTokenFactory(false);
try {
CoNLLUReader conllUReader = new CoNLLUReader();
List<CoNLLUReader.CoNLLUDocument> docs = conllUReader.readCoNLLUFileCreateCoNLLUDocuments(inFile);
for (CoNLLUReader.CoNLLUDocument doc : docs) {
for (CoNLLUReader.CoNLLUSentence conllSent : doc.sentences) {
CoreMap sentence = new CoreLabel();
List<CoreLabel> sentenceTokens = new ArrayList<>();
DependencyTree tree = new DependencyTree();
for (String tokenLine : conllSent.tokenLines) {
String[] splits = tokenLine.split("\t");
String word = splits[CoNLLUReader.CoNLLU_WordField], pos = cPOS ? splits[CoNLLUReader.CoNLLU_UPOSField] : splits[CoNLLUReader.CoNLLU_XPOSField], depType = splits[CoNLLUReader.CoNLLU_RelnField];
int head = -1;
try {
head = Integer.parseInt(splits[6]);
} catch (NumberFormatException e) {
continue;
}
CoreLabel token = tf.makeToken(word, 0, 0);
token.setTag(pos);
token.set(CoreAnnotations.CoNLLDepParentIndexAnnotation.class, head);
token.set(CoreAnnotations.CoNLLDepTypeAnnotation.class, depType);
sentenceTokens.add(token);
if (!unlabeled)
tree.add(head, depType);
else
tree.add(head, Config.UNKNOWN);
}
trees.add(tree);
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
sents.add(sentence);
}
}
} catch (IOException e) {
throw new RuntimeIOException(e);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
/*try (BufferedReader reader = IOUtils.readerFromString(inFile)) {
List<CoreLabel> sentenceTokens = new ArrayList<>();
DependencyTree tree = new DependencyTree();
for (String line : IOUtils.getLineIterable(reader, false)) {
String[] splits = line.split("\t");
if (splits.length < 10) {
if (sentenceTokens.size() > 0) {
trees.add(tree);
CoreMap sentence = new CoreLabel();
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
sents.add(sentence);
tree = new DependencyTree();
sentenceTokens = new ArrayList<>();
}
} else {
String word = splits[1],
pos = cPOS ? splits[3] : splits[4],
depType = splits[7];
int head = -1;
try {
head = Integer.parseInt(splits[6]);
} catch (NumberFormatException e) {
continue;
}
CoreLabel token = tf.makeToken(word, 0, 0);
token.setTag(pos);
token.set(CoreAnnotations.CoNLLDepParentIndexAnnotation.class, head);
token.set(CoreAnnotations.CoNLLDepTypeAnnotation.class, depType);
sentenceTokens.add(token);
if (!unlabeled)
tree.add(head, depType);
else
tree.add(head, Config.UNKNOWN);
}
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}*/
}
Aggregations