Search in sources :

Example 11 with CoreLabelTokenFactory

use of edu.stanford.nlp.process.CoreLabelTokenFactory in project CoreNLP by stanfordnlp.

the class Util method loadConllFile.

// TODO replace with GrammaticalStructure#readCoNLLGrammaticalStructureCollection
public static void loadConllFile(String inFile, List<CoreMap> sents, List<DependencyTree> trees, boolean unlabeled, boolean cPOS) {
    CoreLabelTokenFactory tf = new CoreLabelTokenFactory(false);
    try {
        CoNLLUReader conllUReader = new CoNLLUReader();
        List<CoNLLUReader.CoNLLUDocument> docs = conllUReader.readCoNLLUFileCreateCoNLLUDocuments(inFile);
        for (CoNLLUReader.CoNLLUDocument doc : docs) {
            for (CoNLLUReader.CoNLLUSentence conllSent : doc.sentences) {
                CoreMap sentence = new CoreLabel();
                List<CoreLabel> sentenceTokens = new ArrayList<>();
                DependencyTree tree = new DependencyTree();
                for (String tokenLine : conllSent.tokenLines) {
                    String[] splits = tokenLine.split("\t");
                    String word = splits[CoNLLUReader.CoNLLU_WordField], pos = cPOS ? splits[CoNLLUReader.CoNLLU_UPOSField] : splits[CoNLLUReader.CoNLLU_XPOSField], depType = splits[CoNLLUReader.CoNLLU_RelnField];
                    int head = -1;
                    try {
                        head = Integer.parseInt(splits[6]);
                    } catch (NumberFormatException e) {
                        continue;
                    }
                    CoreLabel token = tf.makeToken(word, 0, 0);
                    token.setTag(pos);
                    token.set(CoreAnnotations.CoNLLDepParentIndexAnnotation.class, head);
                    token.set(CoreAnnotations.CoNLLDepTypeAnnotation.class, depType);
                    sentenceTokens.add(token);
                    if (!unlabeled)
                        tree.add(head, depType);
                    else
                        tree.add(head, Config.UNKNOWN);
                }
                trees.add(tree);
                sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
                sents.add(sentence);
            }
        }
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }
/*try (BufferedReader reader = IOUtils.readerFromString(inFile)) {

      List<CoreLabel> sentenceTokens = new ArrayList<>();
      DependencyTree tree = new DependencyTree();

      for (String line : IOUtils.getLineIterable(reader, false)) {
        String[] splits = line.split("\t");
        if (splits.length < 10) {
          if (sentenceTokens.size() > 0) {
            trees.add(tree);
            CoreMap sentence = new CoreLabel();
            sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
            sents.add(sentence);
            tree = new DependencyTree();
            sentenceTokens = new ArrayList<>();
          }
        } else {
          String word = splits[1],
                  pos = cPOS ? splits[3] : splits[4],
                  depType = splits[7];

          int head = -1;
          try {
            head = Integer.parseInt(splits[6]);
          } catch (NumberFormatException e) {
            continue;
          }

          CoreLabel token = tf.makeToken(word, 0, 0);
          token.setTag(pos);
          token.set(CoreAnnotations.CoNLLDepParentIndexAnnotation.class, head);
          token.set(CoreAnnotations.CoNLLDepTypeAnnotation.class, depType);
          sentenceTokens.add(token);

          if (!unlabeled)
            tree.add(head, depType);
          else
            tree.add(head, Config.UNKNOWN);
        }
      }
    } catch (IOException e) {
      throw new RuntimeIOException(e);
    }*/
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) CoreLabelTokenFactory(edu.stanford.nlp.process.CoreLabelTokenFactory) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoNLLUReader(edu.stanford.nlp.pipeline.CoNLLUReader) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 12 with CoreLabelTokenFactory

use of edu.stanford.nlp.process.CoreLabelTokenFactory in project CoreNLP by stanfordnlp.

the class Env method getDefaultTokensAggregator.

public CoreMapAggregator getDefaultTokensAggregator() {
    if (defaultTokensAggregator == null && (defaultTokensAggregators != null || aggregateToTokens)) {
        CoreLabelTokenFactory tokenFactory = (aggregateToTokens) ? new CoreLabelTokenFactory() : null;
        Map<Class, CoreMapAttributeAggregator> aggregators = defaultTokensAggregators;
        if (aggregators == null) {
            aggregators = CoreMapAttributeAggregator.DEFAULT_NUMERIC_TOKENS_AGGREGATORS;
        }
        defaultTokensAggregator = CoreMapAggregator.getAggregator(aggregators, null, tokenFactory);
    }
    return defaultTokensAggregator;
}
Also used : CoreLabelTokenFactory(edu.stanford.nlp.process.CoreLabelTokenFactory) CoreMapAttributeAggregator(edu.stanford.nlp.pipeline.CoreMapAttributeAggregator)

Aggregations

CoreLabelTokenFactory (edu.stanford.nlp.process.CoreLabelTokenFactory)12 CoreLabel (edu.stanford.nlp.ling.CoreLabel)11 StringReader (java.io.StringReader)7 Sentence (edu.stanford.nlp.simple.Sentence)4 CoreMap (edu.stanford.nlp.util.CoreMap)3 ScoredPassage (io.anserini.qa.passage.ScoredPassage)3 RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)2 IndexUtils (io.anserini.index.IndexUtils)2 IdfPassageScorer (io.anserini.qa.passage.IdfPassageScorer)2 BufferedReader (java.io.BufferedReader)2 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)1 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)1 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)1 HasWord (edu.stanford.nlp.ling.HasWord)1 TaggedWord (edu.stanford.nlp.ling.TaggedWord)1 CoNLLUReader (edu.stanford.nlp.pipeline.CoNLLUReader)1 CoreMapAttributeAggregator (edu.stanford.nlp.pipeline.CoreMapAttributeAggregator)1 DocumentPreprocessor (edu.stanford.nlp.process.DocumentPreprocessor)1 PTBTokenizer (edu.stanford.nlp.process.PTBTokenizer)1