Search in sources :

Example 1 with TreeReader

use of edu.stanford.nlp.trees.TreeReader in project CoreNLP by stanfordnlp.

the class Tdiff method main.

/**
   * @param args
   */
public static void main(String[] args) {
    if (args.length != 2) {
        System.out.println("Usage: java Tdiff tree1 tree2");
        return;
    }
    File tree1Path = new File(args[0]);
    File tree2Path = new File(args[1]);
    try {
        TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
        TreeReader tR1 = trf.newTreeReader(new BufferedReader(new FileReader(tree1Path)));
        TreeReader tR2 = trf.newTreeReader(new BufferedReader(new FileReader(tree2Path)));
        Tree t1 = tR1.readTree();
        Tree t2 = tR2.readTree();
        Set<Constituent> t1Diff = markDiff(t1, t2);
        System.out.println(t2.pennString());
        System.out.println();
        for (Constituent c : t1Diff) System.out.println(c);
    } catch (FileNotFoundException e) {
        log.info("File not found!");
    } catch (IOException e) {
        log.info("Unable to read file!");
    }
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) Tree(edu.stanford.nlp.trees.Tree) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory) LabeledScoredTreeReaderFactory(edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory) LabeledScoredTreeReaderFactory(edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory) Constituent(edu.stanford.nlp.trees.Constituent) LabeledConstituent(edu.stanford.nlp.trees.LabeledConstituent)

Example 2 with TreeReader

use of edu.stanford.nlp.trees.TreeReader in project CoreNLP by stanfordnlp.

the class FrenchXMLTreeReader method main.

/**
   * For debugging.
   *
   * @param args
   */
public static void main(String[] args) {
    if (args.length < 1) {
        System.err.printf("Usage: java %s tree_file(s)%n%n", FrenchXMLTreeReader.class.getName());
        System.exit(-1);
    }
    List<File> fileList = new ArrayList<>();
    for (String arg : args) fileList.add(new File(arg));
    TreeReaderFactory trf = new FrenchXMLTreeReaderFactory(false);
    int totalTrees = 0;
    Set<String> morphAnalyses = Generics.newHashSet();
    try {
        for (File file : fileList) {
            TreeReader tr = trf.newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")));
            Tree t;
            int numTrees;
            String canonicalFileName = file.getName().substring(0, file.getName().lastIndexOf('.'));
            for (numTrees = 0; (t = tr.readTree()) != null; numTrees++) {
                String ftbID = ((CoreLabel) t.label()).get(CoreAnnotations.SentenceIDAnnotation.class);
                System.out.printf("%s-%s\t%s%n", canonicalFileName, ftbID, t.toString());
                List<Label> leaves = t.yield();
                for (Label label : leaves) {
                    if (label instanceof CoreLabel)
                        morphAnalyses.add(((CoreLabel) label).originalText());
                }
            }
            tr.close();
            System.err.printf("%s: %d trees%n", file.getName(), numTrees);
            totalTrees += numTrees;
        }
        //wsg2011: Print out the observed morphological analyses
        //      for(String analysis : morphAnalyses)
        //        log.info(analysis);
        System.err.printf("%nRead %d trees%n", totalTrees);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) Tree(edu.stanford.nlp.trees.Tree) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory)

Example 3 with TreeReader

use of edu.stanford.nlp.trees.TreeReader in project CoreNLP by stanfordnlp.

the class SplitMaker method main.

/**
   * @param args
   */
public static void main(String[] args) {
    if (args.length != 1) {
        System.err.printf("Usage: java %s tree_file%n", SplitMaker.class.getName());
        System.exit(-1);
    }
    TreebankLanguagePack tlp = new HebrewTreebankLanguagePack();
    String inputFile = args[0];
    File treeFile = new File(inputFile);
    try {
        TreeReaderFactory trf = new HebrewTreeReaderFactory();
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.getEncoding()));
        TreeReader tr = trf.newTreeReader(br);
        PrintWriter pwDev = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.dev"), false, tlp.getEncoding()));
        PrintWriter pwTrain = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.train"), false, tlp.getEncoding()));
        PrintWriter pwTest = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.test"), false, tlp.getEncoding()));
        int numTrees = 0;
        for (Tree t; ((t = tr.readTree()) != null); numTrees++) {
            if (numTrees < 483)
                pwDev.println(t.toString());
            else if (numTrees >= 483 && numTrees < 5724)
                pwTrain.println(t.toString());
            else
                pwTest.println(t.toString());
        }
        tr.close();
        pwDev.close();
        pwTrain.close();
        pwTest.close();
        System.err.printf("Processed %d trees.%n", numTrees);
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) Tree(edu.stanford.nlp.trees.Tree) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory)

Example 4 with TreeReader

use of edu.stanford.nlp.trees.TreeReader in project CoreNLP by stanfordnlp.

the class SpanishXMLTreeReader method main.

public static void main(String[] args) {
    final Properties options = StringUtils.argsToProperties(args, argOptionDefs());
    if (args.length < 1 || options.containsKey("help")) {
        log.info(usage());
        return;
    }
    final Pattern posPattern = options.containsKey("searchPos") ? Pattern.compile(options.getProperty("searchPos")) : null;
    final Pattern wordPattern = options.containsKey("searchWord") ? Pattern.compile(options.getProperty("searchWord")) : null;
    final boolean plainPrint = PropertiesUtils.getBool(options, "plain", false);
    final boolean ner = PropertiesUtils.getBool(options, "ner", false);
    final boolean detailedAnnotations = PropertiesUtils.getBool(options, "detailedAnnotations", false);
    String[] remainingArgs = options.getProperty("").split(" ");
    List<File> fileList = new ArrayList<>();
    for (String remainingArg : remainingArgs) fileList.add(new File(remainingArg));
    final SpanishXMLTreeReaderFactory trf = new SpanishXMLTreeReaderFactory(true, true, ner, detailedAnnotations);
    ExecutorService pool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
    for (final File file : fileList) {
        pool.execute(new Runnable() {

            public void run() {
                try {
                    Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), "ISO-8859-1"));
                    TreeReader tr = trf.newTreeReader(file.getPath(), in);
                    process(file, tr, posPattern, wordPattern, plainPrint);
                    tr.close();
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }
    pool.shutdown();
    try {
        pool.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
    } catch (InterruptedException e) {
        throw new RuntimeInterruptedException(e);
    }
}
Also used : Pattern(java.util.regex.Pattern) TreeReader(edu.stanford.nlp.trees.TreeReader) TreeReader(edu.stanford.nlp.trees.TreeReader) ExecutorService(java.util.concurrent.ExecutorService)

Example 5 with TreeReader

use of edu.stanford.nlp.trees.TreeReader in project CoreNLP by stanfordnlp.

the class ThreadedParserSlowITest method readTrees.

public static List<Tree> readTrees(String filename, String encoding) {
    ArrayList<Tree> trees = new ArrayList<Tree>();
    try {
        TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
        TreeReader tr = trf.newTreeReader(new InputStreamReader(new FileInputStream(filename), encoding));
        Tree next;
        while ((next = tr.readTree()) != null) {
            trees.add(next);
        }
        System.out.println("Read " + trees.size() + " trees from " + filename);
        return trees;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree) TreeReader(edu.stanford.nlp.trees.TreeReader) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory) LabeledScoredTreeReaderFactory(edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory) IOException(java.io.IOException) LabeledScoredTreeReaderFactory(edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory) FileInputStream(java.io.FileInputStream)

Aggregations

TreeReader (edu.stanford.nlp.trees.TreeReader)20 Tree (edu.stanford.nlp.trees.Tree)19 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)17 IOException (java.io.IOException)7 CoreLabel (edu.stanford.nlp.ling.CoreLabel)6 FrenchTreeReaderFactory (edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory)6 FileInputStream (java.io.FileInputStream)6 InputStreamReader (java.io.InputStreamReader)6 BufferedReader (java.io.BufferedReader)5 FileNotFoundException (java.io.FileNotFoundException)5 UnsupportedEncodingException (java.io.UnsupportedEncodingException)4 TwoDimensionalCounter (edu.stanford.nlp.stats.TwoDimensionalCounter)3 PennTreeReader (edu.stanford.nlp.trees.PennTreeReader)3 TreebankLanguagePack (edu.stanford.nlp.trees.TreebankLanguagePack)3 SpanishTreeReaderFactory (edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory)3 Pattern (java.util.regex.Pattern)3 Label (edu.stanford.nlp.ling.Label)2 LabeledScoredTreeReaderFactory (edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory)2 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)2 ArabicTreeReaderFactory (edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory)2