Search in sources :

Example 1 with InFile

use of edu.illinois.cs.cogcomp.ner.IO.InFile in project cogcomp-nlp by CogComp.

the class DocumentCollection method addFolder.

/*
     * This format assumes that the folder contains a bunch of files. each files is a single doc
     */
public void addFolder(String path, int classID, StopWords stops, boolean discardFirstToken, String tokenizationDelimiters) {
    String[] files = (new File(path)).list();
    for (String file : files) {
        InFile in = new InFile(path + "/" + file);
        Vector<String> allWords = new Vector<>();
        Vector<String> words = in.readLineTokens(tokenizationDelimiters);
        if ((discardFirstToken) && (words != null) && (words.size() > 0))
            words.removeElementAt(0);
        if (stops != null)
            words = stops.filterStopWords(words);
        while (words != null) {
            for (int j = 0; j < words.size(); j++) allWords.addElement(words.elementAt(j));
            words = in.readLineTokens(tokenizationDelimiters);
            if ((discardFirstToken) && (words != null) && (words.size() > 0))
                words.removeElementAt(0);
            if (stops != null)
                words = stops.filterStopWords(words);
        }
        docs.addElement(new Document(allWords, classID));
    }
}
Also used : InFile(edu.illinois.cs.cogcomp.ner.IO.InFile) InFile(edu.illinois.cs.cogcomp.ner.IO.InFile) File(java.io.File) Vector(java.util.Vector)

Example 2 with InFile

use of edu.illinois.cs.cogcomp.ner.IO.InFile in project cogcomp-nlp by CogComp.

the class FeatureMap method readFromFile.

public void readFromFile(String countFiles, int thres) {
    InFile in = new InFile(countFiles);
    Vector<String> tokens = in.readLineTokens(" \n\t");
    while (tokens != null) {
        int count = Integer.parseInt(tokens.elementAt(0));
        if (count >= thres) {
            wordToFid.put(tokens.elementAt(1), dim);
            fidToWord.put(dim, tokens.elementAt(1));
            dim++;
        }
        tokens = in.readLineTokens("\n\t ");
    }
}
Also used : InFile(edu.illinois.cs.cogcomp.ner.IO.InFile)

Example 3 with InFile

use of edu.illinois.cs.cogcomp.ner.IO.InFile in project cogcomp-nlp by CogComp.

the class BracketFileReader method getFileText.

// func -parseBracketsAnnotatedText
private static String getFileText(String file) {
    StringBuilder res = new StringBuilder(200000);
    InFile in = new InFile(file);
    String line = in.readLine();
    while (line != null) {
        res.append(line).append("\n");
        line = in.readLine();
    }
    in.close();
    return res.toString();
}
Also used : InFile(edu.illinois.cs.cogcomp.ner.IO.InFile)

Example 4 with InFile

use of edu.illinois.cs.cogcomp.ner.IO.InFile in project cogcomp-nlp by CogComp.

the class Main method execute.

/**
     * execute NER on the selected input file or directory, produce output to standard out or a file
     * by the same name as the input.
     * 
     * @throws Exception if anything goes wrong.
     */
private void execute() throws Exception {
    if (indirectory.isDirectory()) {
        File[] files = indirectory.listFiles();
        if (outdirectory != null) {
            System.out.println("Total Files : ••••••••••••••••••••••••••••••••••••••••••••••••••");
            System.out.println("Completed   : ");
            double ratio = 50.0 / (double) files.length;
            int completed = 0;
            int i = 0;
            for (; i < files.length; i++) {
                File infile = files[i];
                processInputFile(infile);
                // present completion.
                while ((i * ratio) > completed) {
                    System.out.println("•");
                    completed++;
                }
            }
            this.getResultProcessor().done();
            while ((i * ratio) > completed) {
                System.out.println("•");
                completed++;
                i++;
            }
            System.out.println("");
        } else {
            int i = 0;
            for (; i < files.length; i++) {
                File infile = files[i];
                processInputFile(infile);
            }
            this.getResultProcessor().done();
        }
    } else {
        processInputFile(indirectory);
        this.getResultProcessor().done();
        System.out.println("Completed");
    }
}
Also used : File(java.io.File) InFile(edu.illinois.cs.cogcomp.ner.IO.InFile)

Example 5 with InFile

use of edu.illinois.cs.cogcomp.ner.IO.InFile in project cogcomp-nlp by CogComp.

the class PlainTextReader method parsePlainTextFile.

public static ArrayList<LinkedVector> parsePlainTextFile(String file) {
    InFile in = new InFile(file);
    String line = in.readLine();
    StringBuilder buf = new StringBuilder(100000);
    while (line != null) {
        buf.append(line).append(" \n");
        line = in.readLine();
    }
    buf.append(" ");
    in.close();
    return parseText(normalizeText(buf.toString()));
}
Also used : InFile(edu.illinois.cs.cogcomp.ner.IO.InFile)

Aggregations

InFile (edu.illinois.cs.cogcomp.ner.IO.InFile)9 File (java.io.File)3 StringTokenizer (java.util.StringTokenizer)3 HashMap (java.util.HashMap)2 Vector (java.util.Vector)2 ResourceConfigurator (edu.illinois.cs.cogcomp.core.resources.ResourceConfigurator)1 THashMap (gnu.trove.map.hash.THashMap)1 InvalidEndpointException (io.minio.errors.InvalidEndpointException)1 InvalidPortException (io.minio.errors.InvalidPortException)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 InputStream (java.io.InputStream)1 Datastore (org.cogcomp.Datastore)1 DatastoreException (org.cogcomp.DatastoreException)1