Search in sources :

Example 1 with CoNLLUReader

use of edu.stanford.nlp.pipeline.CoNLLUReader in project CoreNLP by stanfordnlp.

the class Util method loadConllFile.

// TODO replace with GrammaticalStructure#readCoNLLGrammaticalStructureCollection
public static void loadConllFile(String inFile, List<CoreMap> sents, List<DependencyTree> trees, boolean unlabeled, boolean cPOS) {
    CoreLabelTokenFactory tf = new CoreLabelTokenFactory(false);
    try {
        CoNLLUReader conllUReader = new CoNLLUReader();
        List<CoNLLUReader.CoNLLUDocument> docs = conllUReader.readCoNLLUFileCreateCoNLLUDocuments(inFile);
        for (CoNLLUReader.CoNLLUDocument doc : docs) {
            for (CoNLLUReader.CoNLLUSentence conllSent : doc.sentences) {
                CoreMap sentence = new CoreLabel();
                List<CoreLabel> sentenceTokens = new ArrayList<>();
                DependencyTree tree = new DependencyTree();
                for (String tokenLine : conllSent.tokenLines) {
                    String[] splits = tokenLine.split("\t");
                    String word = splits[CoNLLUReader.CoNLLU_WordField], pos = cPOS ? splits[CoNLLUReader.CoNLLU_UPOSField] : splits[CoNLLUReader.CoNLLU_XPOSField], depType = splits[CoNLLUReader.CoNLLU_RelnField];
                    int head = -1;
                    try {
                        head = Integer.parseInt(splits[6]);
                    } catch (NumberFormatException e) {
                        continue;
                    }
                    CoreLabel token = tf.makeToken(word, 0, 0);
                    token.setTag(pos);
                    token.set(CoreAnnotations.CoNLLDepParentIndexAnnotation.class, head);
                    token.set(CoreAnnotations.CoNLLDepTypeAnnotation.class, depType);
                    sentenceTokens.add(token);
                    if (!unlabeled)
                        tree.add(head, depType);
                    else
                        tree.add(head, Config.UNKNOWN);
                }
                trees.add(tree);
                sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
                sents.add(sentence);
            }
        }
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }
/*try (BufferedReader reader = IOUtils.readerFromString(inFile)) {

      List<CoreLabel> sentenceTokens = new ArrayList<>();
      DependencyTree tree = new DependencyTree();

      for (String line : IOUtils.getLineIterable(reader, false)) {
        String[] splits = line.split("\t");
        if (splits.length < 10) {
          if (sentenceTokens.size() > 0) {
            trees.add(tree);
            CoreMap sentence = new CoreLabel();
            sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
            sents.add(sentence);
            tree = new DependencyTree();
            sentenceTokens = new ArrayList<>();
          }
        } else {
          String word = splits[1],
                  pos = cPOS ? splits[3] : splits[4],
                  depType = splits[7];

          int head = -1;
          try {
            head = Integer.parseInt(splits[6]);
          } catch (NumberFormatException e) {
            continue;
          }

          CoreLabel token = tf.makeToken(word, 0, 0);
          token.setTag(pos);
          token.set(CoreAnnotations.CoNLLDepParentIndexAnnotation.class, head);
          token.set(CoreAnnotations.CoNLLDepTypeAnnotation.class, depType);
          sentenceTokens.add(token);

          if (!unlabeled)
            tree.add(head, depType);
          else
            tree.add(head, Config.UNKNOWN);
        }
      }
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }*/
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) CoreLabelTokenFactory(edu.stanford.nlp.process.CoreLabelTokenFactory) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoNLLUReader(edu.stanford.nlp.pipeline.CoNLLUReader) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)1 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)1 CoreLabel (edu.stanford.nlp.ling.CoreLabel)1 CoNLLUReader (edu.stanford.nlp.pipeline.CoNLLUReader)1 CoreLabelTokenFactory (edu.stanford.nlp.process.CoreLabelTokenFactory)1 CoreMap (edu.stanford.nlp.util.CoreMap)1