use of de.lmu.ifi.dbs.elki.data.type.TypeInformation in project elki by elki-project.
the class EvaluateDBCV method processNewResult.
@Override
public void processNewResult(ResultHierarchy hier, Result newResult) {
List<Clustering<?>> crs = Clustering.getClusteringResults(newResult);
if (crs.size() < 1) {
return;
}
Database db = ResultUtil.findDatabase(hier);
TypeInformation typ = new CombinedTypeInformation(this.distanceFunction.getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD);
Relation<O> rel = db.getRelation(typ);
if (rel != null) {
for (Clustering<?> cl : crs) {
evaluateClustering(db, rel, cl);
}
}
}
use of de.lmu.ifi.dbs.elki.data.type.TypeInformation in project elki by elki-project.
the class ArffParser method parse.
@Override
public MultipleObjectsBundle parse(InputStream instream) {
try (InputStreamReader ir = new InputStreamReader(instream);
BufferedReader br = new BufferedReader(ir)) {
ArrayList<String> names = new ArrayList<>(), types = new ArrayList<>();
readHeader(br);
parseAttributeStatements(br, names, types);
// Convert into column mapping. Prepare arrays to fill
int[] targ = new int[names.size()];
TypeInformation[] elkitypes = new TypeInformation[names.size()];
int[] dimsize = new int[names.size()];
processColumnTypes(names, types, targ, elkitypes, dimsize);
// Prepare bundle:
// This is a bit complicated to produce vector fields.
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
StreamTokenizer tokenizer = makeArffTokenizer(br);
int state = 0;
nextToken(tokenizer);
while (tokenizer.ttype != StreamTokenizer.TT_EOF) {
// Parse instance
if (tokenizer.ttype == StreamTokenizer.TT_EOL) {
// ignore empty lines
} else if (tokenizer.ttype != '{') {
if (state == 0) {
setupBundleHeaders(names, targ, elkitypes, dimsize, bundle, false);
// dense
state = 1;
} else if (state != 1) {
throw new AbortException("Mixing dense and sparse vectors is currently not allowed.");
}
// Load a dense instance
bundle.appendSimple(loadDenseInstance(tokenizer, dimsize, elkitypes, bundle.metaLength()));
} else {
if (state == 0) {
setupBundleHeaders(names, targ, elkitypes, dimsize, bundle, true);
// sparse
state = 2;
} else if (state != 2) {
throw new AbortException("Mixing dense and sparse vectors is currently not allowed.");
}
bundle.appendSimple(loadSparseInstance(tokenizer, targ, dimsize, elkitypes, bundle.metaLength()));
}
nextToken(tokenizer);
}
return bundle;
} catch (IOException e) {
throw new AbortException("IO error in parser", e);
}
}
use of de.lmu.ifi.dbs.elki.data.type.TypeInformation in project elki by elki-project.
the class SilhouetteOutlierDetection method getInputTypeRestriction.
@Override
public TypeInformation[] getInputTypeRestriction() {
final TypeInformation dt = getDistanceFunction().getInputTypeRestriction();
TypeInformation[] t = clusterer.getInputTypeRestriction();
for (TypeInformation i : t) {
if (dt.isAssignableFromType(i)) {
return t;
}
}
// Prepend distance type:
TypeInformation[] t2 = new TypeInformation[t.length + 1];
t2[0] = dt;
System.arraycopy(t, 0, t2, 1, t.length);
return t2;
}
use of de.lmu.ifi.dbs.elki.data.type.TypeInformation in project elki by elki-project.
the class KNNBenchmarkAlgorithm method run.
/**
* Run the algorithm.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k);
// No query set - use original database.
if (queries == null) {
final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
KNNList knns = knnQuery.getKNNForDBID(iditer, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
mvdist.put(knns.getKNNDistance());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
}
}
} else {
// Separate query set.
TypeInformation res = getDistanceFunction().getInputTypeRestriction();
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
throw new IncompatibleDataException("No compatible data type in query input was found. Expected: " + res.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
@SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
KNNList knns = knnQuery.getKNNForObject(o, k);
int ichecksum = 0;
for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(knns.size());
mvdist.put(knns.getKNNDistance());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
if (mvdist.getCount() > 0) {
LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
}
}
}
return null;
}
use of de.lmu.ifi.dbs.elki.data.type.TypeInformation in project elki by elki-project.
the class RangeQueryBenchmarkAlgorithm method run.
/**
* Run the algorithm, with a separate query set.
*
* @param database Database
* @param relation Relation
* @return Null result
*/
public Result run(Database database, Relation<O> relation) {
if (queries == null) {
throw new AbortException("A query set is required for this 'run' method.");
}
// Get a distance and kNN query instance.
DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
int dim = RelationUtil.dimensionality(relation);
// Separate query set.
TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
MultipleObjectsBundle bundle = queries.loadData();
int col = -1;
for (int i = 0; i < bundle.metaLength(); i++) {
if (res.isAssignableFromType(bundle.meta(i))) {
col = i;
break;
}
}
if (col < 0) {
StringBuilder buf = new StringBuilder();
buf.append("No compatible data type in query input was found. Expected: ");
buf.append(res.toString());
buf.append(" have: ");
for (int i = 0; i < bundle.metaLength(); i++) {
if (i > 0) {
buf.append(' ');
}
buf.append(bundle.meta(i).toString());
}
throw new IncompatibleDataException(buf.toString());
}
// Random sampling is a bit of hack, sorry.
// But currently, we don't (yet) have an "integer random sample" function.
DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
int hash = 0;
MeanVariance mv = new MeanVariance();
double[] buf = new double[dim];
for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
int off = sids.binarySearch(iditer);
assert (off >= 0);
NumberVector o = (NumberVector) bundle.data(off, col);
for (int i = 0; i < dim; i++) {
buf[i] = o.doubleValue(i);
}
O v = ofactory.newNumberVector(buf);
double r = o.doubleValue(dim);
DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
int ichecksum = 0;
for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
ichecksum += DBIDUtil.asInteger(it);
}
hash = Util.mixHashCodes(hash, ichecksum);
mv.put(rres.size());
LOG.incrementProcessed(prog);
}
LOG.ensureCompleted(prog);
if (LOG.isStatistics()) {
LOG.statistics("Result hashcode: " + hash);
LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
}
return null;
}
Aggregations