use of edu.illinois.cs.cogcomp.ner.IO.InFile in project cogcomp-nlp by CogComp.
the class DocumentCollection method addFolder.
/*
* This format assumes that the folder contains a bunch of files. each files is a single doc
*/
public void addFolder(String path, int classID, StopWords stops, boolean discardFirstToken, String tokenizationDelimiters) {
String[] files = (new File(path)).list();
for (String file : files) {
InFile in = new InFile(path + "/" + file);
Vector<String> allWords = new Vector<>();
Vector<String> words = in.readLineTokens(tokenizationDelimiters);
if ((discardFirstToken) && (words != null) && (words.size() > 0))
words.removeElementAt(0);
if (stops != null)
words = stops.filterStopWords(words);
while (words != null) {
for (int j = 0; j < words.size(); j++) allWords.addElement(words.elementAt(j));
words = in.readLineTokens(tokenizationDelimiters);
if ((discardFirstToken) && (words != null) && (words.size() > 0))
words.removeElementAt(0);
if (stops != null)
words = stops.filterStopWords(words);
}
docs.addElement(new Document(allWords, classID));
}
}
use of edu.illinois.cs.cogcomp.ner.IO.InFile in project cogcomp-nlp by CogComp.
the class FeatureMap method readFromFile.
public void readFromFile(String countFiles, int thres) {
InFile in = new InFile(countFiles);
Vector<String> tokens = in.readLineTokens(" \n\t");
while (tokens != null) {
int count = Integer.parseInt(tokens.elementAt(0));
if (count >= thres) {
wordToFid.put(tokens.elementAt(1), dim);
fidToWord.put(dim, tokens.elementAt(1));
dim++;
}
tokens = in.readLineTokens("\n\t ");
}
}
use of edu.illinois.cs.cogcomp.ner.IO.InFile in project cogcomp-nlp by CogComp.
the class BracketFileReader method getFileText.
// func -parseBracketsAnnotatedText
private static String getFileText(String file) {
StringBuilder res = new StringBuilder(200000);
InFile in = new InFile(file);
String line = in.readLine();
while (line != null) {
res.append(line).append("\n");
line = in.readLine();
}
in.close();
return res.toString();
}
use of edu.illinois.cs.cogcomp.ner.IO.InFile in project cogcomp-nlp by CogComp.
the class Main method execute.
/**
* execute NER on the selected input file or directory, produce output to standard out or a file
* by the same name as the input.
*
* @throws Exception if anything goes wrong.
*/
private void execute() throws Exception {
if (indirectory.isDirectory()) {
File[] files = indirectory.listFiles();
if (outdirectory != null) {
System.out.println("Total Files : ••••••••••••••••••••••••••••••••••••••••••••••••••");
System.out.println("Completed : ");
double ratio = 50.0 / (double) files.length;
int completed = 0;
int i = 0;
for (; i < files.length; i++) {
File infile = files[i];
processInputFile(infile);
// present completion.
while ((i * ratio) > completed) {
System.out.println("•");
completed++;
}
}
this.getResultProcessor().done();
while ((i * ratio) > completed) {
System.out.println("•");
completed++;
i++;
}
System.out.println("");
} else {
int i = 0;
for (; i < files.length; i++) {
File infile = files[i];
processInputFile(infile);
}
this.getResultProcessor().done();
}
} else {
processInputFile(indirectory);
this.getResultProcessor().done();
System.out.println("Completed");
}
}
use of edu.illinois.cs.cogcomp.ner.IO.InFile in project cogcomp-nlp by CogComp.
the class PlainTextReader method parsePlainTextFile.
public static ArrayList<LinkedVector> parsePlainTextFile(String file) {
InFile in = new InFile(file);
String line = in.readLine();
StringBuilder buf = new StringBuilder(100000);
while (line != null) {
buf.append(line).append(" \n");
line = in.readLine();
}
buf.append(" ");
in.close();
return parseText(normalizeText(buf.toString()));
}
Aggregations