use of edu.cmu.ml.proppr.util.ParsedFile in project ProPPR by TeamCohen.
the class SparseGraphPlugin method loadArgs.
/** subroutine - populates an array of strings from a file **/
private void loadArgs(String[] args, File file) {
log.debug("Loading args file " + file.getName() + " in ConstantArgument...");
ParsedFile parsed = new ParsedFile(file);
for (String line : parsed) args[parsed.getLineNumber()] = line.trim();
parsed.close();
}
use of edu.cmu.ml.proppr.util.ParsedFile in project ProPPR by TeamCohen.
the class SparseMatrixIndex method load.
public void load(File dir, String functor_arg1type_arg2type) throws IOException {
log.info("Loading matrix " + functor_arg1type_arg2type + " from " + dir.getName() + "...");
this.name = dir + ":" + functor_arg1type_arg2type;
StatusLogger status = new StatusLogger(LOGUPDATE_MS);
/* Read the number of rows, columns, and entries - entry is a triple (i,j,m[i,j]) */
ParsedFile file = new ParsedFile(new File(dir, functor_arg1type_arg2type + ".rce"));
{
Iterator<String> it = file.iterator();
String line = it.next();
if (line == null)
throw new IllegalArgumentException("Bad format for " + functor_arg1type_arg2type + ".rce: line 1 must list #rows");
this.rows = Integer.parseInt(line.trim());
line = it.next();
if (line == null)
throw new IllegalArgumentException("Bad format for " + functor_arg1type_arg2type + ".rce: line 2 must list #cols");
this.cols = Integer.parseInt(line.trim());
line = it.next();
if (line == null)
throw new IllegalArgumentException("Bad format for " + functor_arg1type_arg2type + ".rce: line 3 must list #entries");
this.entries = Integer.parseInt(line.trim());
file.close();
}
/* Data is stored like this: colIndices[] is one long
* array, and values is a parallel array. rowsOffsets is another array so that
* rowOffsets[i] is where the column indices for row i start. Thus
*
* for (k=rowOffsets[i]; k<rowOffsets[i+1]; k++) {
* j = colIndices[k];
* m_ij = values[k];
* // this would retrieve i,j and the corresponding value in the sparse matrix m[i,j]
* doSomethingWith(i,j,m_ij);
* }
*
*/
ArrayList<Integer> rowsOffsets = new ArrayList<Integer>();
this.colIndices = new int[entries];
this.values = new float[entries];
long start = status.tick();
file = new ParsedFile(new File(dir, functor_arg1type_arg2type + ".rowOffset"));
for (String line : file) {
rowsOffsets.add(Integer.parseInt(line));
if (log.isInfoEnabled()) {
if (status.due()) {
log.info("rowOffset: " + file.getLineNumber() + " lines (" + (file.getLineNumber() / status.since(start)) + " klps)");
}
}
}
file.close();
start = status.tick();
file = new ParsedFile(new File(dir, functor_arg1type_arg2type + ".colIndex"));
for (String line : file) {
int ln = file.getLineNumber();
String[] parts = line.split(WEIGHT_DELIMITER);
colIndices[ln] = Integer.parseInt(parts[0]);
values[ln] = (float) (parts.length > 1 ? Float.parseFloat(parts[1]) : 1.0);
if (colIndices[ln] >= arg2.length) {
throw new IllegalArgumentException("Malformed sparsegraph! For index " + this.name + ", colIndices[" + ln + "]=" + colIndices[ln] + "; arg2.length is only " + arg2.length);
}
if (log.isInfoEnabled()) {
if (status.due()) {
log.info("colIndex: " + file.getLineNumber() + " lines (" + (file.getLineNumber() / status.since(start)) + " klps)");
}
}
}
file.close();
this.rowOffsets = new int[rowsOffsets.size() + 1];
for (int i = 0; i < rowsOffsets.size(); i++) {
rowOffsets[i] = rowsOffsets.get(i);
}
rowOffsets[rowsOffsets.size()] = entries;
long del = status.sinceStart();
if (del > LOGUPDATE_MS)
log.info("Finished loading sparse graph matrix " + functor_arg1type_arg2type + " (" + (del / 1000.) + " sec)");
}
use of edu.cmu.ml.proppr.util.ParsedFile in project ProPPR by TeamCohen.
the class FactsPlugin method load.
public void load(File f, int duplicates) {
ParsedFile parsed = new ParsedFile(f);
BloomFilter<String> lines = null;
if (duplicates > 0)
lines = new BloomFilter<String>(1e-5, duplicates);
boolean exceeds = false;
for (String line : parsed) {
String[] parts = line.split("\t", 2);
if (parts.length != 2)
parsed.parseError("expected at least 2 tab-delimited fields");
if (duplicates > 0) {
if (lines.contains(line)) {
log.warn("Skipping duplicate fact at " + f.getName() + ":" + parsed.getAbsoluteLineNumber() + ": " + line);
continue;
} else
lines.add(line);
if (!exceeds & parsed.getLineNumber() > duplicates) {
exceeds = true;
log.warn("Number of facts exceeds " + duplicates + "; duplicate detection may encounter false positives. We should add a command line option to fix this.");
}
}
addFact(parts[0], parts[1].split("\t"));
}
}
use of edu.cmu.ml.proppr.util.ParsedFile in project ProPPR by TeamCohen.
the class Trainer method main.
public static void main(String[] args) {
try {
int inputFiles = Configuration.USE_TRAIN | Configuration.USE_INIT_PARAMS;
int outputFiles = Configuration.USE_PARAMS;
int constants = Configuration.USE_EPOCHS | Configuration.USE_FORCE | Configuration.USE_THREADS | Configuration.USE_FIXEDWEIGHTS;
int modules = Configuration.USE_TRAINER | Configuration.USE_SRW | Configuration.USE_SQUASHFUNCTION;
ModuleConfiguration c = new ModuleConfiguration(args, inputFiles, outputFiles, constants, modules);
log.info(c.toString());
String groundedFile = c.queryFile.getPath();
if (!c.queryFile.getName().endsWith(Grounder.GROUNDED_SUFFIX)) {
throw new IllegalStateException("Run Grounder on " + c.queryFile.getName() + " first. Ground+Train in one go is not supported yet.");
}
SymbolTable<String> masterFeatures = new SimpleSymbolTable<String>();
File featureIndex = new File(groundedFile + Grounder.FEATURE_INDEX_EXTENSION);
if (featureIndex.exists()) {
log.info("Reading feature index from " + featureIndex.getName() + "...");
for (String line : new ParsedFile(featureIndex)) {
masterFeatures.insert(line.trim());
}
}
log.info("Training model parameters on " + groundedFile + "...");
long start = System.currentTimeMillis();
ParamVector<String, ?> params = c.trainer.train(masterFeatures, new ParsedFile(groundedFile), new ArrayLearningGraphBuilder(), c.initParamsFile, c.epochs);
System.out.println("Training time: " + (System.currentTimeMillis() - start));
if (c.paramsFile != null) {
log.info("Saving parameters to " + c.paramsFile + "...");
ParamsFile.save(params, c.paramsFile, c);
}
} catch (Throwable t) {
t.printStackTrace();
System.exit(-1);
}
}
Aggregations