use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class GeneratorMain method generate.
/**
* Main loop to generate data set.
*
* @return Generated data set
*/
public MultipleObjectsBundle generate() {
// we actually need some clusters.
if (generators.isEmpty()) {
throw new AbortException("No clusters specified.");
}
// Assert that cluster dimensions agree.
final int dim = generators.get(0).getDim();
for (GeneratorInterface c : generators) {
if (c.getDim() != dim) {
throw new AbortException("Cluster dimensions do not agree.");
}
}
// Prepare result bundle
MultipleObjectsBundle bundle = new MultipleObjectsBundle();
VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
bundle.appendColumn(type, new ArrayList<>());
bundle.appendColumn(TypeUtil.CLASSLABEL, new ArrayList<>());
bundle.appendColumn(Model.TYPE, new ArrayList<Model>());
// generate clusters
ClassLabel[] labels = new ClassLabel[generators.size()];
Model[] models = new Model[generators.size()];
initLabelsAndModels(generators, labels, models, relabelClusters);
final AssignPoint assignment;
if (!testAgainstModel) {
assignment = new AssignPoint();
} else if (relabelClusters == null) {
assignment = new TestModel();
} else if (!relabelDistance) {
assignment = new AssignLabelsByDensity(labels);
} else {
assignment = new AssignLabelsByDistance(labels);
}
for (int i = 0; i < labels.length; i++) {
final GeneratorInterface curclus = generators.get(i);
assignment.newCluster(i, curclus);
// Only dynamic generators allow rejection / model testing:
GeneratorInterfaceDynamic cursclus = (curclus instanceof GeneratorInterfaceDynamic) ? (GeneratorInterfaceDynamic) curclus : null;
int kept = 0;
while (kept < curclus.getSize()) {
// generate the "missing" number of points
List<double[]> newp = curclus.generate(curclus.getSize() - kept);
for (double[] p : newp) {
int bestc = assignment.getAssignment(i, p);
if (bestc < 0) {
cursclus.incrementDiscarded();
continue;
}
bundle.appendSimple(DoubleVector.wrap(p), labels[bestc], models[bestc]);
++kept;
}
}
}
return bundle;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class ByModelClustering method run.
/**
* Run the actual clustering algorithm.
*
* @param relation The data input we use
*/
public Clustering<Model> run(Relation<Model> relation) {
// Build model mapping
HashMap<Model, ModifiableDBIDs> modelMap = new HashMap<>();
for (DBIDIter iditer = relation.iterDBIDs(); iditer.valid(); iditer.advance()) {
Model model = relation.get(iditer);
ModifiableDBIDs modelids = modelMap.get(model);
if (modelids == null) {
modelids = DBIDUtil.newHashSet();
modelMap.put(model, modelids);
}
modelids.add(iditer);
}
Clustering<Model> result = new Clustering<>("By Model Clustering", "bymodel-clustering");
for (Entry<Model, ModifiableDBIDs> entry : modelMap.entrySet()) {
final Model model = entry.getKey();
final ModifiableDBIDs ids = entry.getValue();
final String name = (model instanceof GeneratorInterface) ? ((GeneratorInterface) model).getName() : model.toString();
Cluster<Model> c = new Cluster<>(name, ids, model);
if (noisepattern != null && noisepattern.matcher(name).find()) {
c.setNoise(true);
}
result.addToplevelCluster(c);
}
return result;
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class GeneratorXMLSpec method writeClusters.
/**
* Write the resulting clusters to an output stream.
*
* @param outStream output stream
* @param data Generated data
* @throws IOException thrown on write errors
*/
public void writeClusters(OutputStreamWriter outStream, MultipleObjectsBundle data) throws IOException {
int modelcol = -1;
{
// Find model column
for (int i = 0; i < data.metaLength(); i++) {
if (Model.TYPE.isAssignableFromType(data.meta(i))) {
modelcol = i;
break;
}
}
}
if (modelcol < 0) {
throw new AbortException("No model column found in bundle.");
}
ArrayList<Model> models = new ArrayList<>();
Map<Model, IntArrayList> modelMap = new HashMap<>();
{
// Build a map from model to the actual objects
for (int i = 0; i < data.dataLength(); i++) {
Model model = (Model) data.data(i, modelcol);
IntArrayList modelids = modelMap.get(model);
if (modelids == null) {
models.add(model);
modelids = new IntArrayList();
modelMap.put(model, modelids);
}
modelids.add(i);
}
}
// compute global discard values
int totalsize = 0, totaldisc = 0;
for (Entry<Model, IntArrayList> ent : modelMap.entrySet()) {
totalsize += ent.getValue().size();
if (ent.getKey() instanceof GeneratorSingleCluster) {
totaldisc += ((GeneratorSingleCluster) ent.getKey()).getDiscarded();
}
}
double globdens = (double) (totalsize + totaldisc) / totalsize;
outStream.append("########################################################").append(LINE_SEPARATOR);
outStream.append("## Number of clusters: " + models.size()).append(LINE_SEPARATOR);
for (Model model : models) {
IntArrayList ids = modelMap.get(model);
outStream.append("########################################################").append(LINE_SEPARATOR);
outStream.append("## Size: " + ids.size()).append(LINE_SEPARATOR);
if (model instanceof GeneratorSingleCluster) {
GeneratorSingleCluster cursclus = (GeneratorSingleCluster) model;
outStream.append("########################################################").append(LINE_SEPARATOR);
outStream.append("## Cluster: ").append(cursclus.getName()).append(LINE_SEPARATOR);
double[] cmin = cursclus.getClipmin();
double[] cmax = cursclus.getClipmax();
if (cmin != null && cmax != null) {
//
outStream.append("## Clipping: ").append(FormatUtil.format(cmin)).append(" - ").append(FormatUtil.format(cmax)).append(LINE_SEPARATOR);
}
outStream.append("## Density correction factor: " + cursclus.getDensityCorrection()).append(LINE_SEPARATOR);
outStream.append("## Generators:").append(LINE_SEPARATOR);
for (int i = 0; i < cursclus.getDim(); i++) {
Distribution gen = cursclus.getDistribution(i);
outStream.append("## ").append(gen.toString()).append(LINE_SEPARATOR);
}
if (cursclus.getTransformation() != null && cursclus.getTransformation().getTransformation() != null) {
outStream.append("## Affine transformation matrix:").append(LINE_SEPARATOR);
outStream.append(FormatUtil.format(cursclus.getTransformation().getTransformation(), "## ")).append(LINE_SEPARATOR);
}
outStream.append("## Discards: " + cursclus.getDiscarded() + " Retries left: " + cursclus.getRetries()).append(LINE_SEPARATOR);
double corf = /* cursclus.overweight */
(double) (cursclus.getSize() + cursclus.getDiscarded()) / cursclus.getSize() / globdens;
outStream.append("## Density correction factor estimation: " + corf).append(LINE_SEPARATOR);
}
outStream.append("########################################################").append(LINE_SEPARATOR);
for (IntIterator iter = ids.iterator(); iter.hasNext(); ) {
int num = iter.nextInt();
for (int c = 0; c < data.metaLength(); c++) {
if (c != modelcol) {
if (c > 0) {
outStream.append(' ');
}
outStream.append(data.data(num, c).toString());
}
}
outStream.append(LINE_SEPARATOR);
}
}
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class TrivialGeneratedOutlier method run.
/**
* Run the algorithm
*
* @param models Model relation
* @param vecs Vector relation
* @param labels Label relation
* @return Outlier result
*/
public OutlierResult run(Relation<Model> models, Relation<NumberVector> vecs, Relation<?> labels) {
WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
HashSet<GeneratorSingleCluster> generators = new HashSet<>();
for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
Model model = models.get(iditer);
if (model instanceof GeneratorSingleCluster) {
generators.add((GeneratorSingleCluster) model);
}
}
if (generators.isEmpty()) {
LOG.warning("No generator models found for dataset - all points will be considered outliers.");
}
for (GeneratorSingleCluster gen : generators) {
for (int i = 0; i < gen.getDim(); i++) {
Distribution dist = gen.getDistribution(i);
if (!(dist instanceof NormalDistribution)) {
throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
}
}
}
for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
double score = 1.;
double[] v = vecs.get(iditer).toArray();
for (GeneratorSingleCluster gen : generators) {
double[] tv = v;
// Transform backwards
if (gen.getTransformation() != null) {
tv = gen.getTransformation().applyInverse(v);
}
final int dim = tv.length;
double lensq = 0.0;
int norm = 0;
for (int i = 0; i < dim; i++) {
Distribution dist = gen.getDistribution(i);
if (dist instanceof NormalDistribution) {
NormalDistribution d = (NormalDistribution) dist;
double delta = (tv[i] - d.getMean()) / d.getStddev();
lensq += delta * delta;
norm += 1;
} else {
throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
}
}
if (norm > 0.) {
// The squared distances are ChiSquared distributed
score = Math.min(score, ChiSquaredDistribution.cdf(lensq, norm));
} else {
score = 0.;
}
}
if (expect < 1) {
score = expect * score / (1 - score + expect);
}
scores.putDouble(iditer, score);
}
DoubleRelation scoreres = new MaterializedDoubleRelation("Model outlier scores", "model-outlier", scores, models.getDBIDs());
OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.);
return new OutlierResult(meta, scoreres);
}
use of de.lmu.ifi.dbs.elki.data.model.Model in project elki by elki-project.
the class TextWriter method output.
/**
* Stream output.
*
* @param db Database object
* @param r Result class
* @param streamOpener output stream manager
* @param filter Filter pattern
* @throws IOException on IO error
*/
@SuppressWarnings("unchecked")
public void output(Database db, Result r, StreamFactory streamOpener, Pattern filter) throws IOException {
List<Relation<?>> ra = new LinkedList<>();
List<OrderingResult> ro = new LinkedList<>();
List<Clustering<?>> rc = new LinkedList<>();
List<IterableResult<?>> ri = new LinkedList<>();
List<SettingsResult> rs = new LinkedList<>();
List<Result> otherres = new LinkedList<>();
// Split result objects in different known types:
{
List<Result> results = ResultUtil.filterResults(db.getHierarchy(), r, Result.class);
for (Result res : results) {
if (filter != null) {
final String nam = res.getShortName();
if (nam == null || !filter.matcher(nam).find()) {
continue;
}
}
if (res instanceof Database) {
continue;
}
if (res instanceof Relation) {
ra.add((Relation<?>) res);
continue;
}
if (res instanceof OrderingResult) {
ro.add((OrderingResult) res);
continue;
}
if (res instanceof Clustering) {
rc.add((Clustering<?>) res);
continue;
}
if (res instanceof IterableResult) {
ri.add((IterableResult<?>) res);
continue;
}
if (res instanceof SettingsResult) {
rs.add((SettingsResult) res);
continue;
}
otherres.add(res);
}
}
writeSettingsResult(streamOpener, rs);
for (IterableResult<?> rii : ri) {
writeIterableResult(streamOpener, rii);
}
for (Clustering<?> c : rc) {
NamingScheme naming = new SimpleEnumeratingScheme(c);
for (Cluster<?> clus : c.getAllClusters()) {
writeClusterResult(db, streamOpener, (Clustering<Model>) c, (Cluster<Model>) clus, ra, naming);
}
}
for (OrderingResult ror : ro) {
writeOrderingResult(db, streamOpener, ror, ra);
}
for (Result otherr : otherres) {
writeOtherResult(streamOpener, otherr);
}
}
Aggregations