Search in sources :

Example 46 with FileNotFoundException

use of java.io.FileNotFoundException in project CoreNLP by stanfordnlp.

the class ATBArabicDataset method build.

public void build() {
    for (File path : pathsToData) {
        if (splitFilter == null) {
            treebank.loadPath(path, treeFileExtension, false);
        } else {
            treebank.loadPath(path, splitFilter);
        }
    }
    PrintWriter outfile = null;
    PrintWriter flatFile = null;
    try {
        outfile = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFileName), "UTF-8")));
        flatFile = (makeFlatFile) ? new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(flatFileName), "UTF-8"))) : null;
        treebank.apply(new ArabicRawTreeNormalizer(outfile, flatFile));
        outputFileList.add(outFileName);
        if (makeFlatFile) {
            outputFileList.add(flatFileName);
            toStringBuffer.append(" Made flat files\n");
        }
    } catch (UnsupportedEncodingException e) {
        System.err.printf("%s: Filesystem does not support UTF-8 output\n", this.getClass().getName());
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        System.err.printf("%s: Could not open %s for writing\n", this.getClass().getName(), outFileName);
    } finally {
        if (outfile != null)
            outfile.close();
        if (flatFile != null)
            flatFile.close();
    }
}
Also used : FileOutputStream(java.io.FileOutputStream) FileNotFoundException(java.io.FileNotFoundException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) OutputStreamWriter(java.io.OutputStreamWriter) File(java.io.File) PrintWriter(java.io.PrintWriter) BufferedWriter(java.io.BufferedWriter)

Example 47 with FileNotFoundException

use of java.io.FileNotFoundException in project CoreNLP by stanfordnlp.

the class ArabicSegmenter method evaluate.

/**
   * Evaluate accuracy when the input is gold segmented text *with* segmentation
   * markers and morphological analyses. In other words, the evaluation file has the
   * same format as the training data.
   *
   * @param pwOut
   */
private void evaluate(PrintWriter pwOut) {
    log.info("Starting evaluation...");
    boolean hasSegmentationMarkers = true;
    boolean hasTags = true;
    DocumentReaderAndWriter<CoreLabel> docReader = new ArabicDocumentReaderAndWriter(hasSegmentationMarkers, hasTags, hasDomainLabels, domain, tf);
    ObjectBank<List<CoreLabel>> lines = classifier.makeObjectBankFromFile(flags.testFile, docReader);
    PrintWriter tedEvalGoldTree = null, tedEvalParseTree = null;
    PrintWriter tedEvalGoldSeg = null, tedEvalParseSeg = null;
    if (tedEvalPrefix != null) {
        try {
            tedEvalGoldTree = new PrintWriter(tedEvalPrefix + "_gold.ftree");
            tedEvalGoldSeg = new PrintWriter(tedEvalPrefix + "_gold.segmentation");
            tedEvalParseTree = new PrintWriter(tedEvalPrefix + "_parse.ftree");
            tedEvalParseSeg = new PrintWriter(tedEvalPrefix + "_parse.segmentation");
        } catch (FileNotFoundException e) {
            System.err.printf("%s: %s%n", ArabicSegmenter.class.getName(), e.getMessage());
        }
    }
    Counter<String> labelTotal = new ClassicCounter<>();
    Counter<String> labelCorrect = new ClassicCounter<>();
    int total = 0;
    int correct = 0;
    for (List<CoreLabel> line : lines) {
        final String[] inputTokens = tedEvalSanitize(IOBUtils.IOBToString(line).replaceAll(":", "#pm#")).split(" ");
        final String[] goldTokens = tedEvalSanitize(IOBUtils.IOBToString(line, ":")).split(" ");
        line = classifier.classify(line);
        final String[] parseTokens = tedEvalSanitize(IOBUtils.IOBToString(line, ":")).split(" ");
        for (CoreLabel label : line) {
            // Do not evaluate labeling of whitespace
            String observation = label.get(CoreAnnotations.CharAnnotation.class);
            if (!observation.equals(IOBUtils.getBoundaryCharacter())) {
                total++;
                String hypothesis = label.get(CoreAnnotations.AnswerAnnotation.class);
                String reference = label.get(CoreAnnotations.GoldAnswerAnnotation.class);
                labelTotal.incrementCount(reference);
                if (hypothesis.equals(reference)) {
                    correct++;
                    labelCorrect.incrementCount(reference);
                }
            }
        }
        if (tedEvalParseSeg != null) {
            tedEvalGoldTree.printf("(root");
            tedEvalParseTree.printf("(root");
            int safeLength = inputTokens.length;
            if (inputTokens.length != goldTokens.length) {
                log.info("In generating TEDEval files: Input and gold do not have the same number of tokens");
                log.info("    (ignoring any extras)");
                log.info("  input: " + Arrays.toString(inputTokens));
                log.info("  gold: " + Arrays.toString(goldTokens));
                safeLength = Math.min(inputTokens.length, goldTokens.length);
            }
            if (inputTokens.length != parseTokens.length) {
                log.info("In generating TEDEval files: Input and parse do not have the same number of tokens");
                log.info("    (ignoring any extras)");
                log.info("  input: " + Arrays.toString(inputTokens));
                log.info("  parse: " + Arrays.toString(parseTokens));
                safeLength = Math.min(inputTokens.length, parseTokens.length);
            }
            for (int i = 0; i < safeLength; i++) {
                for (String segment : goldTokens[i].split(":")) tedEvalGoldTree.printf(" (seg %s)", segment);
                tedEvalGoldSeg.printf("%s\t%s%n", inputTokens[i], goldTokens[i]);
                for (String segment : parseTokens[i].split(":")) tedEvalParseTree.printf(" (seg %s)", segment);
                tedEvalParseSeg.printf("%s\t%s%n", inputTokens[i], parseTokens[i]);
            }
            tedEvalGoldTree.printf(")%n");
            tedEvalGoldSeg.println();
            tedEvalParseTree.printf(")%n");
            tedEvalParseSeg.println();
        }
    }
    double accuracy = ((double) correct) / ((double) total);
    accuracy *= 100.0;
    pwOut.println("EVALUATION RESULTS");
    pwOut.printf("#datums:\t%d%n", total);
    pwOut.printf("#correct:\t%d%n", correct);
    pwOut.printf("accuracy:\t%.2f%n", accuracy);
    pwOut.println("==================");
    // Output the per label accuracies
    pwOut.println("PER LABEL ACCURACIES");
    for (String refLabel : labelTotal.keySet()) {
        double nTotal = labelTotal.getCount(refLabel);
        double nCorrect = labelCorrect.getCount(refLabel);
        double acc = (nCorrect / nTotal) * 100.0;
        pwOut.printf(" %s\t%.2f%n", refLabel, acc);
    }
    if (tedEvalParseSeg != null) {
        tedEvalGoldTree.close();
        tedEvalGoldSeg.close();
        tedEvalParseTree.close();
        tedEvalParseSeg.close();
    }
}
Also used : FileNotFoundException(java.io.FileNotFoundException) CoreLabel(edu.stanford.nlp.ling.CoreLabel) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) List(java.util.List) PrintWriter(java.io.PrintWriter)

Example 48 with FileNotFoundException

use of java.io.FileNotFoundException in project CoreNLP by stanfordnlp.

the class FTBDataset method makeSplitSet.

private Set<String> makeSplitSet(String splitFileName) {
    splitFileName = DataFilePaths.convert(splitFileName);
    Set<String> splitSet = Generics.newHashSet();
    LineNumberReader reader = null;
    try {
        reader = new LineNumberReader(new FileReader(splitFileName));
        for (String line; (line = reader.readLine()) != null; ) {
            splitSet.add(line.trim());
        }
        reader.close();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        System.err.printf("%s: Error reading %s (line %d)%n", this.getClass().getName(), splitFileName, reader.getLineNumber());
        e.printStackTrace();
    }
    return splitSet;
}
Also used : FileNotFoundException(java.io.FileNotFoundException) FileReader(java.io.FileReader) IOException(java.io.IOException) LineNumberReader(java.io.LineNumberReader)

Example 49 with FileNotFoundException

use of java.io.FileNotFoundException in project CoreNLP by stanfordnlp.

the class ConfusionMatrixTSV method main.

public static void main(String[] args) {
    if (args.length < 1) {
        System.err.printf("Usage: java %s answers_file%n", ConfusionMatrix.class.getName());
        System.exit(-1);
    }
    try {
        ConfusionMatrix<String> cm = new ConfusionMatrix<>();
        String answersFile = args[0];
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(answersFile), "UTF-8"));
        String line = br.readLine();
        for (; line != null; line = br.readLine()) {
            String[] tokens = line.split("\\s");
            if (tokens.length != 3) {
                System.err.printf("ignoring bad line");
                continue;
            //System.exit(-1);
            }
            cm.add(tokens[2], tokens[1]);
        }
        System.out.println(cm.toString());
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : ConfusionMatrix(edu.stanford.nlp.util.ConfusionMatrix) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FileNotFoundException(java.io.FileNotFoundException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream)

Example 50 with FileNotFoundException

use of java.io.FileNotFoundException in project CoreNLP by stanfordnlp.

the class TreeToTSV method main.

public static void main(String[] args) {
    if (args.length < 1) {
        System.err.printf("Usage: java %s tree_file%n", TreeToTSV.class.getName());
        System.exit(-1);
    }
    String treeFile = args[0];
    try {
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
        TreeReaderFactory trf = new SpanishTreeReaderFactory();
        TreeReader tr = trf.newTreeReader(br);
        StringBuilder sb = new StringBuilder();
        String nl = System.getProperty("line.separator");
        Pattern nePattern = Pattern.compile("^grup\\.nom\\.");
        Pattern npPattern = Pattern.compile("^np0000.$");
        for (Tree tree; (tree = tr.readTree()) != null; ) {
            for (Tree t : tree) {
                if (!t.isPreTerminal())
                    continue;
                char type = 'O';
                Tree grandma = t.ancestor(1, tree);
                String grandmaValue = ((CoreLabel) grandma.label()).value();
                // grup.nom.x
                if (nePattern.matcher(grandmaValue).find())
                    type = grandmaValue.charAt(9);
                else // else check the pos for np0000x or not
                {
                    String pos = ((CoreLabel) t.label()).value();
                    if (npPattern.matcher(pos).find())
                        type = pos.charAt(6);
                }
                Tree wordNode = t.firstChild();
                String word = ((CoreLabel) wordNode.label()).value();
                sb.append(word).append("\t");
                switch(type) {
                    case 'p':
                        sb.append("PERS");
                        break;
                    case 'l':
                        sb.append("LUG");
                        break;
                    case 'o':
                        sb.append("ORG");
                        break;
                    case '0':
                        sb.append("OTROS");
                        break;
                    default:
                        sb.append("O");
                }
                sb.append(nl);
            }
            sb.append(nl);
        }
        System.out.print(sb.toString());
        tr.close();
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : Pattern(java.util.regex.Pattern) InputStreamReader(java.io.InputStreamReader) FileNotFoundException(java.io.FileNotFoundException) TreeReader(edu.stanford.nlp.trees.TreeReader) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) CoreLabel(edu.stanford.nlp.ling.CoreLabel) BufferedReader(java.io.BufferedReader) Tree(edu.stanford.nlp.trees.Tree) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory)

Aggregations

FileNotFoundException (java.io.FileNotFoundException)3572 IOException (java.io.IOException)2027 File (java.io.File)1415 FileInputStream (java.io.FileInputStream)906 InputStream (java.io.InputStream)535 FileOutputStream (java.io.FileOutputStream)522 BufferedReader (java.io.BufferedReader)301 FileReader (java.io.FileReader)267 ArrayList (java.util.ArrayList)232 Path (org.apache.hadoop.fs.Path)224 Test (org.junit.Test)212 InputStreamReader (java.io.InputStreamReader)193 XmlPullParserException (org.xmlpull.v1.XmlPullParserException)189 XmlPullParser (org.xmlpull.v1.XmlPullParser)166 BufferedInputStream (java.io.BufferedInputStream)154 URL (java.net.URL)139 ParcelFileDescriptor (android.os.ParcelFileDescriptor)131 FileStatus (org.apache.hadoop.fs.FileStatus)131 Properties (java.util.Properties)129 HashMap (java.util.HashMap)120