Search in sources :

Example 1 with OutFile

use of edu.illinois.cs.cogcomp.ner.IO.OutFile in project cogcomp-nlp by CogComp.

the class BuildEvaluationFiles method buildEvaluationFile.

public static void buildEvaluationFile(String[] goldFiles, String[] taggedFiles, String outFile) {
    OutFile outPhrase = new OutFile(outFile + ".phraseLevel");
    OutFile outToken = new OutFile(outFile + ".tokenLevel");
    for (int i = 0; i < goldFiles.length; i++) appendToEvaluationFile(goldFiles[i], taggedFiles[i], outPhrase, outToken);
    outPhrase.close();
    outToken.close();
}
Also used : OutFile(edu.illinois.cs.cogcomp.ner.IO.OutFile)

Example 2 with OutFile

use of edu.illinois.cs.cogcomp.ner.IO.OutFile in project cogcomp-nlp by CogComp.

the class NETagPlain method tagData.

/**
     * Does this assume that {@link #init()} has been called already?
     *
     * @param inputPath
     * @param outputPath
     * @throws Exception
     */
public static void tagData(String inputPath, String outputPath) throws Exception {
    File f = new File(inputPath);
    Vector<String> inFiles = new Vector<>();
    Vector<String> outFiles = new Vector<>();
    if (f.isDirectory()) {
        String[] files = f.list();
        for (String file : files) if (!file.startsWith(".")) {
            inFiles.addElement(inputPath + File.separator + file);
            outFiles.addElement(outputPath + File.separator + file);
        }
    } else {
        inFiles.addElement(inputPath);
        outFiles.addElement(outputPath);
    }
    for (int fileId = 0; fileId < inFiles.size(); fileId++) {
        logger.debug("Tagging file: " + inFiles.elementAt(fileId));
        ArrayList<LinkedVector> sentences = PlainTextReader.parsePlainTextFile(inFiles.elementAt(fileId));
        NERDocument doc = new NERDocument(sentences, "consoleInput");
        Data data = new Data(doc);
        ExpressiveFeaturesAnnotator.annotate(data);
        // formerly there was code to load models here. Check that NETagPlain.init() is
        // happening.
        String tagged = tagData(data, tagger1, tagger2);
        OutFile out = new OutFile(outFiles.elementAt(fileId));
        out.println(tagged);
        out.close();
    }
}
Also used : LinkedVector(edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector) File(java.io.File) OutFile(edu.illinois.cs.cogcomp.ner.IO.OutFile) LinkedVector(edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector) Vector(java.util.Vector) OutFile(edu.illinois.cs.cogcomp.ner.IO.OutFile)

Example 3 with OutFile

use of edu.illinois.cs.cogcomp.ner.IO.OutFile in project cogcomp-nlp by CogComp.

the class MemoryEfficientNB method save.

public void save(String file) {
    map.save(file + ".nb.featuremap");
    OutFile out = new OutFile(file);
    out.println(String.valueOf(sampleSize));
    out.println(String.valueOf(weights.length));
    for (double weight : weights) out.println(String.valueOf(weight));
    for (double classCount : classCounts) out.println(String.valueOf(classCount));
    for (int i = 0; i < map.dim; i++) out.println(String.valueOf(wordCounts[i]));
    out.println(String.valueOf(fidCount));
    out.println(String.valueOf(fidCounts.size()));
    for (int i = 0; i < fidCounts.size(); i++) {
        Hashtable<Integer, Double> h = fidCounts.elementAt(i);
        out.println(String.valueOf(h.size()));
        for (Integer fid : h.keySet()) {
            double val = h.get(fid);
            out.println(String.valueOf(fid));
            out.println(String.valueOf(val));
        }
    }
    out.close();
}
Also used : OutFile(edu.illinois.cs.cogcomp.ner.IO.OutFile)

Example 4 with OutFile

use of edu.illinois.cs.cogcomp.ner.IO.OutFile in project cogcomp-nlp by CogComp.

the class NETesterMultiDataset method dumpFeaturesLabeledData.

/**
     * NB: assuming column format
     */
public static void dumpFeaturesLabeledData(String testDatapath, String outDatapath) throws Exception {
    FeaturesLevel1SharedWithLevel2 features1 = new FeaturesLevel1SharedWithLevel2();
    FeaturesLevel2 features2 = new FeaturesLevel2();
    NETaggerLevel1 taggerLevel1 = new NETaggerLevel1(ParametersForLbjCode.currentParameters.pathToModelFile + ".level1", ParametersForLbjCode.currentParameters.pathToModelFile + ".level1.lex");
    NETaggerLevel2 taggerLevel2 = new NETaggerLevel2(ParametersForLbjCode.currentParameters.pathToModelFile + ".level2", ParametersForLbjCode.currentParameters.pathToModelFile + ".level2.lex");
    File f = new File(testDatapath);
    Vector<String> inFiles = new Vector<>();
    Vector<String> outFiles = new Vector<>();
    if (f.isDirectory()) {
        String[] files = f.list();
        for (String file : files) if (!file.startsWith(".")) {
            inFiles.addElement(testDatapath + "/" + file);
            outFiles.addElement(outDatapath + "/" + file);
        }
    } else {
        inFiles.addElement(testDatapath);
        outFiles.addElement(outDatapath);
    }
    for (int fileId = 0; fileId < inFiles.size(); fileId++) {
        Data testData = new Data(inFiles.elementAt(fileId), inFiles.elementAt(fileId), "-c", new String[] {}, new String[] {});
        ExpressiveFeaturesAnnotator.annotate(testData);
        Decoder.annotateDataBIO(testData, taggerLevel1, taggerLevel2);
        OutFile out = new OutFile(outFiles.elementAt(fileId));
        for (int docid = 0; docid < testData.documents.size(); docid++) {
            ArrayList<LinkedVector> sentences = testData.documents.get(docid).sentences;
            for (LinkedVector sentence : sentences) {
                for (int j = 0; j < sentence.size(); j++) {
                    NEWord w = (NEWord) sentence.get(j);
                    out.print(w.neLabel + "\t" + w.form + "\t");
                    FeatureVector fv1 = features1.classify(w);
                    FeatureVector fv2 = features2.classify(w);
                    for (int k = 0; k < fv1.size(); k++) {
                        String s = fv1.getFeature(k).toString();
                        out.print(" " + s.substring(s.indexOf(':') + 1, s.length()));
                    }
                    for (int k = 0; k < fv2.size(); k++) {
                        String s = fv2.getFeature(k).toString();
                        out.print(" " + s.substring(s.indexOf(':') + 1, s.length()));
                    }
                    out.println("");
                }
                out.println("");
            }
        }
        out.close();
    }
}
Also used : FeatureVector(edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector) LinkedVector(edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector) File(java.io.File) OutFile(edu.illinois.cs.cogcomp.ner.IO.OutFile) LinkedVector(edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector) Vector(java.util.Vector) FeatureVector(edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector) OutFile(edu.illinois.cs.cogcomp.ner.IO.OutFile)

Example 5 with OutFile

use of edu.illinois.cs.cogcomp.ner.IO.OutFile in project cogcomp-nlp by CogComp.

the class TaggedDataWriter method writeToFile.

public static void writeToFile(String outputFile, Data data, String fileFormat, NEWord.LabelToLookAt labelType) throws IOException {
    OutFile out = new OutFile(outputFile);
    if (fileFormat.equalsIgnoreCase("-r"))
        out.println(toBracketsFormat(data, labelType));
    else {
        if (fileFormat.equalsIgnoreCase("-c"))
            out.println(toColumnsFormat(data, labelType));
        else {
            throw new IOException("Unknown file format (only options -r and -c are supported): " + fileFormat);
        }
    }
    out.close();
}
Also used : IOException(java.io.IOException) OutFile(edu.illinois.cs.cogcomp.ner.IO.OutFile)

Aggregations

OutFile (edu.illinois.cs.cogcomp.ner.IO.OutFile)9 LinkedVector (edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector)2 File (java.io.File)2 Vector (java.util.Vector)2 FeatureVector (edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector)1 IOException (java.io.IOException)1