use of de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader in project elki by elki-project.
the class AbstractDistributionEstimatorTest method load.
protected void load(String name) {
data = new HashMap<>();
try (//
InputStream in = new GZIPInputStream(AbstractDistributionTest.class.getResourceAsStream(name));
TokenizedReader reader = new TokenizedReader(Pattern.compile(" "), "\"", Pattern.compile("^\\s*#.*"))) {
Tokenizer t = reader.getTokenizer();
DoubleArray buf = new DoubleArray();
reader.reset(in);
while (reader.nextLineExceptComments()) {
assertTrue(t.valid());
String key = t.getStrippedSubstring();
buf.clear();
for (t.advance(); t.valid(); t.advance()) {
buf.add(t.getDouble());
}
data.put(key, buf.toArray());
}
} catch (IOException e) {
fail("Cannot load data.");
}
}
use of de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader in project elki by elki-project.
the class ExternalDoubleOutlierScore method run.
/**
* Run the algorithm.
*
* @param database Database to use
* @param relation Relation to use
* @return Result
*/
public OutlierResult run(Database database, Relation<?> relation) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(relation.getDBIDs(), DataStoreFactory.HINT_STATIC);
DoubleMinMax minmax = new DoubleMinMax();
try (//
InputStream in = FileUtil.tryGzipInput(new FileInputStream(file));
TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) {
Tokenizer tokenizer = reader.getTokenizer();
CharSequence buf = reader.getBuffer();
Matcher mi = idpattern.matcher(buf), ms = scorepattern.matcher(buf);
reader.reset(in);
while (reader.nextLineExceptComments()) {
Integer id = null;
double score = Double.NaN;
for (; /* initialized by nextLineExceptComments */
tokenizer.valid(); tokenizer.advance()) {
mi.region(tokenizer.getStart(), tokenizer.getEnd());
ms.region(tokenizer.getStart(), tokenizer.getEnd());
final boolean mif = mi.find();
final boolean msf = ms.find();
if (mif && msf) {
throw new AbortException("ID pattern and score pattern both match value: " + tokenizer.getSubstring());
}
if (mif) {
if (id != null) {
throw new AbortException("ID pattern matched twice: previous value " + id + " second value: " + tokenizer.getSubstring());
}
id = ParseUtil.parseIntBase10(buf, mi.end(), tokenizer.getEnd());
}
if (msf) {
if (!Double.isNaN(score)) {
throw new AbortException("Score pattern matched twice: previous value " + score + " second value: " + tokenizer.getSubstring());
}
score = ParseUtil.parseDouble(buf, ms.end(), tokenizer.getEnd());
}
}
if (id != null && !Double.isNaN(score)) {
scores.putDouble(DBIDUtil.importInteger(id), score);
minmax.put(score);
} else if (id == null && Double.isNaN(score)) {
LOG.warning("Line did not match either ID nor score nor comment: " + reader.getLineNumber());
} else {
throw new AbortException("Line matched only ID or only SCORE patterns: " + reader.getLineNumber());
}
}
} catch (IOException e) {
throw new AbortException("Could not load outlier scores: " + e.getMessage() + " when loading " + file, e);
}
OutlierScoreMeta meta;
if (inverted) {
meta = new InvertedOutlierScoreMeta(minmax.getMin(), minmax.getMax());
} else {
meta = new BasicOutlierScoreMeta(minmax.getMin(), minmax.getMax());
}
DoubleRelation scoresult = new MaterializedDoubleRelation("External Outlier", "external-outlier", scores, relation.getDBIDs());
OutlierResult or = new OutlierResult(meta, scoresult);
// Apply scaling
if (scaling instanceof OutlierScalingFunction) {
((OutlierScalingFunction) scaling).prepare(or);
}
DoubleMinMax mm = new DoubleMinMax();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
double val = scoresult.doubleValue(iditer);
val = scaling.getScaled(val);
scores.putDouble(iditer, val);
mm.put(val);
}
meta = new BasicOutlierScoreMeta(mm.getMin(), mm.getMax());
or = new OutlierResult(meta, scoresult);
return or;
}
use of de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader in project elki by elki-project.
the class KernelDensityFittingTest method testFitDoubleArray.
/**
* The test will load the given data set and perform a Levenberq-Marquadt
* fitting on a kernelized density estimation. The test evaluates the fitting
* quality to ensure that the results remain stable and significantly better
* than traditional estimation.
*/
@Test
public final void testFitDoubleArray() throws IOException {
DoubleArray data = new DoubleArray();
try (InputStream in = new GZIPInputStream(getClass().getResourceAsStream(dataset));
TokenizedReader reader = new TokenizedReader(Pattern.compile(" "), "\"", Pattern.compile("^\\s*#.*"))) {
Tokenizer t = reader.getTokenizer();
reader.reset(in);
while (reader.nextLineExceptComments() && t.valid()) {
// Read first column only
data.add(t.getDouble());
}
}
// verify data set size.
assertEquals("Data set size doesn't match parameters.", realsize, data.size());
double splitval = 0.5;
double[] fulldata = data.toArray();
Arrays.sort(fulldata);
// Check that the initial parameters match what we were expecting from the
// data.
double[] fullparams = estimateInitialParameters(fulldata);
assertEquals("Full Mean before fitting", 0.4446105, fullparams[0], 0.0001);
assertEquals("Full Stddev before fitting", 1.4012001, fullparams[1], 0.0001);
// Do a fit using only part of the data and check the results are right.
double[] fullfit = run(fulldata, fullparams);
assertEquals("Full Mean after fitting", 0.64505, fullfit[0], 0.01);
assertEquals("Full Stddev after fitting", 1.5227889, fullfit[1], 0.01);
int splitpoint = 0;
while (fulldata[splitpoint] < splitval && splitpoint < fulldata.length) {
splitpoint++;
}
double[] halfdata = Arrays.copyOf(fulldata, splitpoint);
// Check that the initial parameters match what we were expecting from the
// data.
double[] params = estimateInitialParameters(halfdata);
assertEquals("Mean before fitting", -0.65723044, params[0], 0.0001);
assertEquals("Stddev before fitting", 1.0112391, params[1], 0.0001);
// Do a fit using only part of the data and check the results are right.
double[] ps = run(halfdata, params);
assertEquals("Mean after fitting", 0.45980, ps[0], 0.01);
assertEquals("Stddev after fitting", 1.320427, ps[1], 0.01);
}
use of de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader in project elki by elki-project.
the class AbstractDistributionTest method load.
protected void load(String name) {
data = new HashMap<>();
try (//
InputStream in = new GZIPInputStream(getClass().getResourceAsStream(name));
TokenizedReader reader = new TokenizedReader(Pattern.compile(" "), "\"", Pattern.compile("^\\s*#.*"))) {
Tokenizer t = reader.getTokenizer();
DoubleArray buf = new DoubleArray();
reader.reset(in);
while (reader.nextLineExceptComments()) {
assertTrue(t.valid());
String key = t.getStrippedSubstring();
buf.clear();
for (t.advance(); t.valid(); t.advance()) {
buf.add(t.getDouble());
}
data.put(key, buf.toArray());
}
} catch (IOException e) {
fail("Cannot load data.");
}
}
use of de.lmu.ifi.dbs.elki.utilities.io.TokenizedReader in project elki by elki-project.
the class ExternalClustering method run.
/**
* Run the algorithm.
*
* @param database Database to use
* @return Result
*/
@Override
public Clustering<? extends Model> run(Database database) {
Clustering<? extends Model> m = null;
try (//
InputStream in = FileUtil.tryGzipInput(new FileInputStream(file));
TokenizedReader reader = CSVReaderFormat.DEFAULT_FORMAT.makeReader()) {
Tokenizer tokenizer = reader.getTokenizer();
reader.reset(in);
IntArrayList assignment = new IntArrayList(database.getRelation(TypeUtil.DBID).size());
ArrayList<String> name = new ArrayList<>();
line: while (reader.nextLineExceptComments()) {
for (; /* initialized by nextLineExceptComments */
tokenizer.valid(); tokenizer.advance()) {
try {
assignment.add(tokenizer.getIntBase10());
} catch (NumberFormatException e) {
name.add(tokenizer.getSubstring());
}
}
if (LOG.isDebuggingFinest()) {
LOG.debugFinest("Read " + assignment.size() + " assignments and " + name.size() + " labels.");
}
for (Relation<?> r : database.getRelations()) {
if (r.size() == assignment.size()) {
attachToRelation(database, r, assignment, name);
assignment.clear();
name.clear();
continue line;
}
}
throw new AbortException("No relation found to match with clustering of size " + assignment.size());
}
} catch (IOException e) {
throw new AbortException("Could not load outlier scores: " + e.getMessage() + " when loading " + file, e);
}
return m;
}
Aggregations