Search in sources :

Example 6 with ParsedFile

use of edu.cmu.ml.proppr.util.ParsedFile in project ProPPR by TeamCohen.

the class SparseGraphPlugin method loadArgs.

/** subroutine - populates an array of strings from a file **/
private void loadArgs(String[] args, File file) {
    log.debug("Loading args file " + file.getName() + " in ConstantArgument...");
    ParsedFile parsed = new ParsedFile(file);
    for (String line : parsed) args[parsed.getLineNumber()] = line.trim();
    parsed.close();
}
Also used : ParsedFile(edu.cmu.ml.proppr.util.ParsedFile)

Example 7 with ParsedFile

use of edu.cmu.ml.proppr.util.ParsedFile in project ProPPR by TeamCohen.

the class SparseMatrixIndex method load.

public void load(File dir, String functor_arg1type_arg2type) throws IOException {
    log.info("Loading matrix " + functor_arg1type_arg2type + " from " + dir.getName() + "...");
    this.name = dir + ":" + functor_arg1type_arg2type;
    StatusLogger status = new StatusLogger(LOGUPDATE_MS);
    /* Read the number of rows, columns, and entries - entry is a triple (i,j,m[i,j]) */
    ParsedFile file = new ParsedFile(new File(dir, functor_arg1type_arg2type + ".rce"));
    {
        Iterator<String> it = file.iterator();
        String line = it.next();
        if (line == null)
            throw new IllegalArgumentException("Bad format for " + functor_arg1type_arg2type + ".rce: line 1 must list #rows");
        this.rows = Integer.parseInt(line.trim());
        line = it.next();
        if (line == null)
            throw new IllegalArgumentException("Bad format for " + functor_arg1type_arg2type + ".rce: line 2 must list #cols");
        this.cols = Integer.parseInt(line.trim());
        line = it.next();
        if (line == null)
            throw new IllegalArgumentException("Bad format for " + functor_arg1type_arg2type + ".rce: line 3 must list #entries");
        this.entries = Integer.parseInt(line.trim());
        file.close();
    }
    /* Data is stored like this: colIndices[] is one long
		 * array, and values is a parallel array.  rowsOffsets is another array so that 
		 * rowOffsets[i] is where the column indices for row i start. Thus
		 *
		 * for (k=rowOffsets[i]; k<rowOffsets[i+1]; k++) {
		 *   j = colIndices[k];
		 *   m_ij = values[k];
		 *   // this would retrieve i,j and the corresponding value in the sparse matrix m[i,j]
		 *   doSomethingWith(i,j,m_ij);
		 * }
		 *
		 */
    ArrayList<Integer> rowsOffsets = new ArrayList<Integer>();
    this.colIndices = new int[entries];
    this.values = new float[entries];
    long start = status.tick();
    file = new ParsedFile(new File(dir, functor_arg1type_arg2type + ".rowOffset"));
    for (String line : file) {
        rowsOffsets.add(Integer.parseInt(line));
        if (log.isInfoEnabled()) {
            if (status.due()) {
                log.info("rowOffset: " + file.getLineNumber() + " lines (" + (file.getLineNumber() / status.since(start)) + " klps)");
            }
        }
    }
    file.close();
    start = status.tick();
    file = new ParsedFile(new File(dir, functor_arg1type_arg2type + ".colIndex"));
    for (String line : file) {
        int ln = file.getLineNumber();
        String[] parts = line.split(WEIGHT_DELIMITER);
        colIndices[ln] = Integer.parseInt(parts[0]);
        values[ln] = (float) (parts.length > 1 ? Float.parseFloat(parts[1]) : 1.0);
        if (colIndices[ln] >= arg2.length) {
            throw new IllegalArgumentException("Malformed sparsegraph! For index " + this.name + ", colIndices[" + ln + "]=" + colIndices[ln] + "; arg2.length is only " + arg2.length);
        }
        if (log.isInfoEnabled()) {
            if (status.due()) {
                log.info("colIndex: " + file.getLineNumber() + " lines (" + (file.getLineNumber() / status.since(start)) + " klps)");
            }
        }
    }
    file.close();
    this.rowOffsets = new int[rowsOffsets.size() + 1];
    for (int i = 0; i < rowsOffsets.size(); i++) {
        rowOffsets[i] = rowsOffsets.get(i);
    }
    rowOffsets[rowsOffsets.size()] = entries;
    long del = status.sinceStart();
    if (del > LOGUPDATE_MS)
        log.info("Finished loading sparse graph matrix " + functor_arg1type_arg2type + " (" + (del / 1000.) + " sec)");
}
Also used : StatusLogger(edu.cmu.ml.proppr.util.StatusLogger) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) ParsedFile(edu.cmu.ml.proppr.util.ParsedFile) ParsedFile(edu.cmu.ml.proppr.util.ParsedFile) File(java.io.File)

Example 8 with ParsedFile

use of edu.cmu.ml.proppr.util.ParsedFile in project ProPPR by TeamCohen.

the class FactsPlugin method load.

public void load(File f, int duplicates) {
    ParsedFile parsed = new ParsedFile(f);
    BloomFilter<String> lines = null;
    if (duplicates > 0)
        lines = new BloomFilter<String>(1e-5, duplicates);
    boolean exceeds = false;
    for (String line : parsed) {
        String[] parts = line.split("\t", 2);
        if (parts.length != 2)
            parsed.parseError("expected at least 2 tab-delimited fields");
        if (duplicates > 0) {
            if (lines.contains(line)) {
                log.warn("Skipping duplicate fact at " + f.getName() + ":" + parsed.getAbsoluteLineNumber() + ": " + line);
                continue;
            } else
                lines.add(line);
            if (!exceeds & parsed.getLineNumber() > duplicates) {
                exceeds = true;
                log.warn("Number of facts exceeds " + duplicates + "; duplicate detection may encounter false positives. We should add a command line option to fix this.");
            }
        }
        addFact(parts[0], parts[1].split("\t"));
    }
}
Also used : ParsedFile(edu.cmu.ml.proppr.util.ParsedFile) BloomFilter(com.skjegstad.utils.BloomFilter)

Example 9 with ParsedFile

use of edu.cmu.ml.proppr.util.ParsedFile in project ProPPR by TeamCohen.

the class Trainer method main.

public static void main(String[] args) {
    try {
        int inputFiles = Configuration.USE_TRAIN | Configuration.USE_INIT_PARAMS;
        int outputFiles = Configuration.USE_PARAMS;
        int constants = Configuration.USE_EPOCHS | Configuration.USE_FORCE | Configuration.USE_THREADS | Configuration.USE_FIXEDWEIGHTS;
        int modules = Configuration.USE_TRAINER | Configuration.USE_SRW | Configuration.USE_SQUASHFUNCTION;
        ModuleConfiguration c = new ModuleConfiguration(args, inputFiles, outputFiles, constants, modules);
        log.info(c.toString());
        String groundedFile = c.queryFile.getPath();
        if (!c.queryFile.getName().endsWith(Grounder.GROUNDED_SUFFIX)) {
            throw new IllegalStateException("Run Grounder on " + c.queryFile.getName() + " first. Ground+Train in one go is not supported yet.");
        }
        SymbolTable<String> masterFeatures = new SimpleSymbolTable<String>();
        File featureIndex = new File(groundedFile + Grounder.FEATURE_INDEX_EXTENSION);
        if (featureIndex.exists()) {
            log.info("Reading feature index from " + featureIndex.getName() + "...");
            for (String line : new ParsedFile(featureIndex)) {
                masterFeatures.insert(line.trim());
            }
        }
        log.info("Training model parameters on " + groundedFile + "...");
        long start = System.currentTimeMillis();
        ParamVector<String, ?> params = c.trainer.train(masterFeatures, new ParsedFile(groundedFile), new ArrayLearningGraphBuilder(), c.initParamsFile, c.epochs);
        System.out.println("Training time: " + (System.currentTimeMillis() - start));
        if (c.paramsFile != null) {
            log.info("Saving parameters to " + c.paramsFile + "...");
            ParamsFile.save(params, c.paramsFile, c);
        }
    } catch (Throwable t) {
        t.printStackTrace();
        System.exit(-1);
    }
}
Also used : ModuleConfiguration(edu.cmu.ml.proppr.util.ModuleConfiguration) SimpleSymbolTable(edu.cmu.ml.proppr.util.SimpleSymbolTable) ParamsFile(edu.cmu.ml.proppr.util.ParamsFile) ParsedFile(edu.cmu.ml.proppr.util.ParsedFile) File(java.io.File) ParsedFile(edu.cmu.ml.proppr.util.ParsedFile) ArrayLearningGraphBuilder(edu.cmu.ml.proppr.graph.ArrayLearningGraphBuilder)

Aggregations

ParsedFile (edu.cmu.ml.proppr.util.ParsedFile)9 ArrayLearningGraphBuilder (edu.cmu.ml.proppr.graph.ArrayLearningGraphBuilder)3 File (java.io.File)3 BloomFilter (com.skjegstad.utils.BloomFilter)2 ModuleConfiguration (edu.cmu.ml.proppr.util.ModuleConfiguration)2 ParamsFile (edu.cmu.ml.proppr.util.ParamsFile)2 SimpleSymbolTable (edu.cmu.ml.proppr.util.SimpleSymbolTable)2 StatusLogger (edu.cmu.ml.proppr.util.StatusLogger)2 ArrayList (java.util.ArrayList)2 PosNegRWExample (edu.cmu.ml.proppr.examples.PosNegRWExample)1 SRW (edu.cmu.ml.proppr.learn.SRW)1 FixedWeightRules (edu.cmu.ml.proppr.learn.tools.FixedWeightRules)1 RWExampleParser (edu.cmu.ml.proppr.learn.tools.RWExampleParser)1 CustomConfiguration (edu.cmu.ml.proppr.util.CustomConfiguration)1 SimpleParamVector (edu.cmu.ml.proppr.util.math.SimpleParamVector)1 Iterator (java.util.Iterator)1 Callable (java.util.concurrent.Callable)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ExecutionException (java.util.concurrent.ExecutionException)1 ExecutorService (java.util.concurrent.ExecutorService)1