Search in sources :

Example 6 with TreeReader

use of edu.stanford.nlp.trees.TreeReader in project CoreNLP by stanfordnlp.

the class ATBCorrector method main.

//For those trees that lack a sentence-final punc, add one.
//    ("/^[^\\.!\\?]$/ >>- (__ > @ROOT <- __=loc) <: __\n"
//        + "insert (PUNC .) $- loc\n"
//        + "\n");
/**
   * @param args
   */
public static void main(String[] args) {
    if (args.length != 1) {
        log.info("Usage: java " + ATBCorrector.class.getName() + " filename\n");
        System.exit(-1);
    }
    TreeTransformer tt = new ATBCorrector();
    File f = new File(args[0]);
    try {
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
        TreeReaderFactory trf = new ArabicTreeReaderFactory.ArabicRawTreeReaderFactory();
        TreeReader tr = trf.newTreeReader(br);
        int nTrees = 0;
        for (Tree t; (t = tr.readTree()) != null; nTrees++) {
            Tree fixedT = tt.transformTree(t);
            System.out.println(fixedT.toString());
        }
        tr.close();
        System.err.printf("Wrote %d trees%n", nTrees);
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) Tree(edu.stanford.nlp.trees.Tree) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory) ArabicTreeReaderFactory(edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory) TreeTransformer(edu.stanford.nlp.trees.TreeTransformer)

Example 7 with TreeReader

use of edu.stanford.nlp.trees.TreeReader in project CoreNLP by stanfordnlp.

the class AnCoraPOSStats method process.

public void process() throws IOException {
    SpanishXMLTreeReaderFactory trf = new SpanishXMLTreeReaderFactory();
    Tree t;
    for (File file : fileList) {
        Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), ANCORA_ENCODING));
        TreeReader tr = trf.newTreeReader(in);
        // Tree reading will implicitly perform tree normalization for us
        while ((t = tr.readTree()) != null) {
            // Update tagger with this tree
            List<CoreLabel> yield = t.taggedLabeledYield();
            for (CoreLabel leafLabel : yield) {
                if (leafLabel.tag().equals(SpanishTreeNormalizer.MW_TAG))
                    continue;
                unigramTagger.incrementCount(leafLabel.word(), leafLabel.tag());
            }
        }
    }
}
Also used : SpanishXMLTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishXMLTreeReaderFactory) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Tree(edu.stanford.nlp.trees.Tree) TreeReader(edu.stanford.nlp.trees.TreeReader) TreeReader(edu.stanford.nlp.trees.TreeReader)

Example 8 with TreeReader

use of edu.stanford.nlp.trees.TreeReader in project CoreNLP by stanfordnlp.

the class MultiWordPreprocessor method main.

/**
   *
   * @param args
   */
public static void main(String[] args) {
    Properties options = StringUtils.argsToProperties(args, argOptionDefs);
    if (!options.containsKey("") || options.containsKey("help")) {
        log.info(usage());
        return;
    }
    boolean retainNER = PropertiesUtils.getBool(options, "ner", false);
    boolean normalize = PropertiesUtils.getBool(options, "normalize", true);
    final File treeFile = new File(options.getProperty(""));
    TwoDimensionalCounter<String, String> labelTerm = new TwoDimensionalCounter<>();
    TwoDimensionalCounter<String, String> termLabel = new TwoDimensionalCounter<>();
    TwoDimensionalCounter<String, String> labelPreterm = new TwoDimensionalCounter<>();
    TwoDimensionalCounter<String, String> pretermLabel = new TwoDimensionalCounter<>();
    TwoDimensionalCounter<String, String> unigramTagger = new TwoDimensionalCounter<>();
    try {
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
        TreeReaderFactory trf = new SpanishTreeReaderFactory();
        TreeReader tr = trf.newTreeReader(br);
        for (Tree t; (t = tr.readTree()) != null; ) {
            updateTagger(unigramTagger, t);
        }
        //Closes the underlying reader
        tr.close();
        System.out.println("Resolving DUMMY tags");
        resolveDummyTags(treeFile, unigramTagger, retainNER, normalize ? new SpanishTreeNormalizer(true, false, false) : null);
        System.out.println("#Unknown Word Types: " + ManualUWModel.nUnknownWordTypes);
        System.out.println(String.format("#Missing POS: %d (fixed: %d, %.2f%%)", nMissingPOS, nFixedPOS, (double) nFixedPOS / nMissingPOS * 100));
        System.out.println(String.format("#Missing Phrasal: %d (fixed: %d, %.2f%%)", nMissingPhrasal, nFixedPhrasal, (double) nFixedPhrasal / nMissingPhrasal * 100));
        System.out.println("Done!");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : TwoDimensionalCounter(edu.stanford.nlp.stats.TwoDimensionalCounter) TreeReader(edu.stanford.nlp.trees.TreeReader) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) Tree(edu.stanford.nlp.trees.Tree) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory) SpanishTreeNormalizer(edu.stanford.nlp.trees.international.spanish.SpanishTreeNormalizer)

Example 9 with TreeReader

use of edu.stanford.nlp.trees.TreeReader in project CoreNLP by stanfordnlp.

the class MWEPreprocessor method main.

/**
   *
   * @param args
   */
public static void main(String[] args) {
    if (args.length != 1) {
        System.err.printf("Usage: java %s file%n", MWEPreprocessor.class.getName());
        System.exit(-1);
    }
    final File treeFile = new File(args[0]);
    TwoDimensionalCounter<String, String> labelTerm = new TwoDimensionalCounter<>();
    TwoDimensionalCounter<String, String> termLabel = new TwoDimensionalCounter<>();
    TwoDimensionalCounter<String, String> labelPreterm = new TwoDimensionalCounter<>();
    TwoDimensionalCounter<String, String> pretermLabel = new TwoDimensionalCounter<>();
    TwoDimensionalCounter<String, String> unigramTagger = new TwoDimensionalCounter<>();
    try {
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
        TreeReaderFactory trf = new FrenchTreeReaderFactory();
        TreeReader tr = trf.newTreeReader(br);
        for (Tree t; (t = tr.readTree()) != null; ) {
            countMWEStatistics(t, unigramTagger, labelPreterm, pretermLabel, labelTerm, termLabel);
        }
        //Closes the underlying reader
        tr.close();
        System.out.println("Generating {MWE Type -> Terminal}");
        printCounter(labelTerm, "label_term.csv");
        System.out.println("Generating {Terminal -> MWE Type}");
        printCounter(termLabel, "term_label.csv");
        System.out.println("Generating {MWE Type -> POS sequence}");
        printCounter(labelPreterm, "label_pos.csv");
        System.out.println("Generating {POS sequence -> MWE Type}");
        printCounter(pretermLabel, "pos_label.csv");
        if (RESOLVE_DUMMY_TAGS) {
            System.out.println("Resolving DUMMY tags");
            resolveDummyTags(treeFile, pretermLabel, unigramTagger);
        }
        System.out.println("#Unknown Word Types: " + ManualUWModel.nUnknownWordTypes);
        System.out.println("#Missing POS: " + nMissingPOS);
        System.out.println("#Missing Phrasal: " + nMissingPhrasal);
        System.out.println("Done!");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : TwoDimensionalCounter(edu.stanford.nlp.stats.TwoDimensionalCounter) TreeReader(edu.stanford.nlp.trees.TreeReader) FrenchXMLTreeReader(edu.stanford.nlp.trees.international.french.FrenchXMLTreeReader) FrenchTreeReaderFactory(edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory) Tree(edu.stanford.nlp.trees.Tree) FrenchTreeReaderFactory(edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory)

Example 10 with TreeReader

use of edu.stanford.nlp.trees.TreeReader in project CoreNLP by stanfordnlp.

the class TreeToTSV method main.

public static void main(String[] args) {
    if (args.length < 1) {
        System.err.printf("Usage: java %s tree_file%n", TreeToTSV.class.getName());
        System.exit(-1);
    }
    String treeFile = args[0];
    try {
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
        TreeReaderFactory trf = new SpanishTreeReaderFactory();
        TreeReader tr = trf.newTreeReader(br);
        StringBuilder sb = new StringBuilder();
        String nl = System.getProperty("line.separator");
        Pattern nePattern = Pattern.compile("^grup\\.nom\\.");
        Pattern npPattern = Pattern.compile("^np0000.$");
        for (Tree tree; (tree = tr.readTree()) != null; ) {
            for (Tree t : tree) {
                if (!t.isPreTerminal())
                    continue;
                char type = 'O';
                Tree grandma = t.ancestor(1, tree);
                String grandmaValue = ((CoreLabel) grandma.label()).value();
                // grup.nom.x
                if (nePattern.matcher(grandmaValue).find())
                    type = grandmaValue.charAt(9);
                else // else check the pos for np0000x or not
                {
                    String pos = ((CoreLabel) t.label()).value();
                    if (npPattern.matcher(pos).find())
                        type = pos.charAt(6);
                }
                Tree wordNode = t.firstChild();
                String word = ((CoreLabel) wordNode.label()).value();
                sb.append(word).append("\t");
                switch(type) {
                    case 'p':
                        sb.append("PERS");
                        break;
                    case 'l':
                        sb.append("LUG");
                        break;
                    case 'o':
                        sb.append("ORG");
                        break;
                    case '0':
                        sb.append("OTROS");
                        break;
                    default:
                        sb.append("O");
                }
                sb.append(nl);
            }
            sb.append(nl);
        }
        System.out.print(sb.toString());
        tr.close();
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : Pattern(java.util.regex.Pattern) InputStreamReader(java.io.InputStreamReader) FileNotFoundException(java.io.FileNotFoundException) TreeReader(edu.stanford.nlp.trees.TreeReader) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) CoreLabel(edu.stanford.nlp.ling.CoreLabel) BufferedReader(java.io.BufferedReader) Tree(edu.stanford.nlp.trees.Tree) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory)

Aggregations

TreeReader (edu.stanford.nlp.trees.TreeReader)20 Tree (edu.stanford.nlp.trees.Tree)19 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)17 IOException (java.io.IOException)7 CoreLabel (edu.stanford.nlp.ling.CoreLabel)6 FrenchTreeReaderFactory (edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory)6 FileInputStream (java.io.FileInputStream)6 InputStreamReader (java.io.InputStreamReader)6 BufferedReader (java.io.BufferedReader)5 FileNotFoundException (java.io.FileNotFoundException)5 UnsupportedEncodingException (java.io.UnsupportedEncodingException)4 TwoDimensionalCounter (edu.stanford.nlp.stats.TwoDimensionalCounter)3 PennTreeReader (edu.stanford.nlp.trees.PennTreeReader)3 TreebankLanguagePack (edu.stanford.nlp.trees.TreebankLanguagePack)3 SpanishTreeReaderFactory (edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory)3 Pattern (java.util.regex.Pattern)3 Label (edu.stanford.nlp.ling.Label)2 LabeledScoredTreeReaderFactory (edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory)2 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)2 ArabicTreeReaderFactory (edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory)2