use of com.skjegstad.utils.BloomFilter in project ProPPR by TeamCohen.
the class LightweightGraphPlugin method load.
/** Return a simpleGraphComponent with all the components loaded from
a file. The format of the file is that each line is a tab-separated
triple of edgelabel, sourceNode, destNode. */
public static WamPlugin load(APROptions apr, File f, int duplicates) {
GraphlikePlugin p = new LightweightGraphPlugin(apr, f.getName());
ParsedFile parsed = new ParsedFile(f);
BloomFilter<String> lines = null;
if (duplicates > 0)
lines = new BloomFilter<String>(1e-5, duplicates);
boolean exceeds = false;
for (String line : parsed) {
String[] parts = line.split("\t");
if (parts.length < 3)
parsed.parseError("expected 3 tab-delimited fields; got " + parts.length);
if (duplicates > 0) {
if (lines.contains(line)) {
log.warn("Skipping duplicate fact at " + f.getName() + ":" + parsed.getAbsoluteLineNumber() + ": " + line);
continue;
} else
lines.add(line);
if (!exceeds & parsed.getLineNumber() > duplicates) {
exceeds = true;
log.warn("Number of graph edges exceeds " + duplicates + "; duplicate detection may encounter false positives. We should add a command line option to fix this.");
}
}
if (parts.length == 3) {
p.addEdge(parts[0].trim(), parts[1].trim(), parts[2].trim());
} else if (parts.length == 4) {
p.addEdge(parts[0].trim(), parts[1].trim(), parts[2].trim(), Double.parseDouble(parts[3].trim()));
}
}
return p;
}
use of com.skjegstad.utils.BloomFilter in project ProPPR by TeamCohen.
the class FactsPlugin method load.
public void load(File f, int duplicates) {
ParsedFile parsed = new ParsedFile(f);
BloomFilter<String> lines = null;
if (duplicates > 0)
lines = new BloomFilter<String>(1e-5, duplicates);
boolean exceeds = false;
for (String line : parsed) {
String[] parts = line.split("\t", 2);
if (parts.length != 2)
parsed.parseError("expected at least 2 tab-delimited fields");
if (duplicates > 0) {
if (lines.contains(line)) {
log.warn("Skipping duplicate fact at " + f.getName() + ":" + parsed.getAbsoluteLineNumber() + ": " + line);
continue;
} else
lines.add(line);
if (!exceeds & parsed.getLineNumber() > duplicates) {
exceeds = true;
log.warn("Number of facts exceeds " + duplicates + "; duplicate detection may encounter false positives. We should add a command line option to fix this.");
}
}
addFact(parts[0], parts[1].split("\t"));
}
}
Aggregations