use of edu.illinois.cs.cogcomp.ner.IO.OutFile in project cogcomp-nlp by CogComp.
the class BuildEvaluationFiles method buildEvaluationFile.
public static void buildEvaluationFile(String[] goldFiles, String[] taggedFiles, String outFile) {
OutFile outPhrase = new OutFile(outFile + ".phraseLevel");
OutFile outToken = new OutFile(outFile + ".tokenLevel");
for (int i = 0; i < goldFiles.length; i++) appendToEvaluationFile(goldFiles[i], taggedFiles[i], outPhrase, outToken);
outPhrase.close();
outToken.close();
}
use of edu.illinois.cs.cogcomp.ner.IO.OutFile in project cogcomp-nlp by CogComp.
the class NETagPlain method tagData.
/**
* Does this assume that {@link #init()} has been called already?
*
* @param inputPath
* @param outputPath
* @throws Exception
*/
public static void tagData(String inputPath, String outputPath) throws Exception {
File f = new File(inputPath);
Vector<String> inFiles = new Vector<>();
Vector<String> outFiles = new Vector<>();
if (f.isDirectory()) {
String[] files = f.list();
for (String file : files) if (!file.startsWith(".")) {
inFiles.addElement(inputPath + File.separator + file);
outFiles.addElement(outputPath + File.separator + file);
}
} else {
inFiles.addElement(inputPath);
outFiles.addElement(outputPath);
}
for (int fileId = 0; fileId < inFiles.size(); fileId++) {
logger.debug("Tagging file: " + inFiles.elementAt(fileId));
ArrayList<LinkedVector> sentences = PlainTextReader.parsePlainTextFile(inFiles.elementAt(fileId));
NERDocument doc = new NERDocument(sentences, "consoleInput");
Data data = new Data(doc);
ExpressiveFeaturesAnnotator.annotate(data);
// formerly there was code to load models here. Check that NETagPlain.init() is
// happening.
String tagged = tagData(data, tagger1, tagger2);
OutFile out = new OutFile(outFiles.elementAt(fileId));
out.println(tagged);
out.close();
}
}
use of edu.illinois.cs.cogcomp.ner.IO.OutFile in project cogcomp-nlp by CogComp.
the class MemoryEfficientNB method save.
public void save(String file) {
map.save(file + ".nb.featuremap");
OutFile out = new OutFile(file);
out.println(String.valueOf(sampleSize));
out.println(String.valueOf(weights.length));
for (double weight : weights) out.println(String.valueOf(weight));
for (double classCount : classCounts) out.println(String.valueOf(classCount));
for (int i = 0; i < map.dim; i++) out.println(String.valueOf(wordCounts[i]));
out.println(String.valueOf(fidCount));
out.println(String.valueOf(fidCounts.size()));
for (int i = 0; i < fidCounts.size(); i++) {
Hashtable<Integer, Double> h = fidCounts.elementAt(i);
out.println(String.valueOf(h.size()));
for (Integer fid : h.keySet()) {
double val = h.get(fid);
out.println(String.valueOf(fid));
out.println(String.valueOf(val));
}
}
out.close();
}
use of edu.illinois.cs.cogcomp.ner.IO.OutFile in project cogcomp-nlp by CogComp.
the class NETesterMultiDataset method dumpFeaturesLabeledData.
/**
* NB: assuming column format
*/
public static void dumpFeaturesLabeledData(String testDatapath, String outDatapath) throws Exception {
FeaturesLevel1SharedWithLevel2 features1 = new FeaturesLevel1SharedWithLevel2();
FeaturesLevel2 features2 = new FeaturesLevel2();
NETaggerLevel1 taggerLevel1 = new NETaggerLevel1(ParametersForLbjCode.currentParameters.pathToModelFile + ".level1", ParametersForLbjCode.currentParameters.pathToModelFile + ".level1.lex");
NETaggerLevel2 taggerLevel2 = new NETaggerLevel2(ParametersForLbjCode.currentParameters.pathToModelFile + ".level2", ParametersForLbjCode.currentParameters.pathToModelFile + ".level2.lex");
File f = new File(testDatapath);
Vector<String> inFiles = new Vector<>();
Vector<String> outFiles = new Vector<>();
if (f.isDirectory()) {
String[] files = f.list();
for (String file : files) if (!file.startsWith(".")) {
inFiles.addElement(testDatapath + "/" + file);
outFiles.addElement(outDatapath + "/" + file);
}
} else {
inFiles.addElement(testDatapath);
outFiles.addElement(outDatapath);
}
for (int fileId = 0; fileId < inFiles.size(); fileId++) {
Data testData = new Data(inFiles.elementAt(fileId), inFiles.elementAt(fileId), "-c", new String[] {}, new String[] {});
ExpressiveFeaturesAnnotator.annotate(testData);
Decoder.annotateDataBIO(testData, taggerLevel1, taggerLevel2);
OutFile out = new OutFile(outFiles.elementAt(fileId));
for (int docid = 0; docid < testData.documents.size(); docid++) {
ArrayList<LinkedVector> sentences = testData.documents.get(docid).sentences;
for (LinkedVector sentence : sentences) {
for (int j = 0; j < sentence.size(); j++) {
NEWord w = (NEWord) sentence.get(j);
out.print(w.neLabel + "\t" + w.form + "\t");
FeatureVector fv1 = features1.classify(w);
FeatureVector fv2 = features2.classify(w);
for (int k = 0; k < fv1.size(); k++) {
String s = fv1.getFeature(k).toString();
out.print(" " + s.substring(s.indexOf(':') + 1, s.length()));
}
for (int k = 0; k < fv2.size(); k++) {
String s = fv2.getFeature(k).toString();
out.print(" " + s.substring(s.indexOf(':') + 1, s.length()));
}
out.println("");
}
out.println("");
}
}
out.close();
}
}
use of edu.illinois.cs.cogcomp.ner.IO.OutFile in project cogcomp-nlp by CogComp.
the class TaggedDataWriter method writeToFile.
public static void writeToFile(String outputFile, Data data, String fileFormat, NEWord.LabelToLookAt labelType) throws IOException {
OutFile out = new OutFile(outputFile);
if (fileFormat.equalsIgnoreCase("-r"))
out.println(toBracketsFormat(data, labelType));
else {
if (fileFormat.equalsIgnoreCase("-c"))
out.println(toColumnsFormat(data, labelType));
else {
throw new IOException("Unknown file format (only options -r and -c are supported): " + fileFormat);
}
}
out.close();
}
Aggregations