Search in sources :

Example 31 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class GeneratorXMLDatabaseConnection method processElementRotate.

/**
 * Process a 'rotate' Element in the XML stream.
 *
 * @param cluster
 * @param cur Current document nod
 */
private void processElementRotate(GeneratorSingleCluster cluster, Node cur) {
    int axis1 = 0;
    int axis2 = 0;
    double angle = 0.0;
    String a1str = ((Element) cur).getAttribute(ATTR_AXIS1);
    if (a1str != null && a1str.length() > 0) {
        axis1 = ParseUtil.parseIntBase10(a1str);
    }
    String a2str = ((Element) cur).getAttribute(ATTR_AXIS2);
    if (a2str != null && a2str.length() > 0) {
        axis2 = ParseUtil.parseIntBase10(a2str);
    }
    String anstr = ((Element) cur).getAttribute(ATTR_ANGLE);
    if (anstr != null && anstr.length() > 0) {
        angle = ParseUtil.parseDouble(anstr);
    }
    if (axis1 <= 0 || axis1 > cluster.getDim()) {
        throw new AbortException("Invalid axis1 number given in specification file.");
    }
    if (axis2 <= 0 || axis2 > cluster.getDim()) {
        throw new AbortException("Invalid axis2 number given in specification file.");
    }
    if (axis1 == axis2) {
        throw new AbortException("Invalid axis numbers given in specification file.");
    }
    // Add rotation to cluster.
    cluster.addRotation(axis1 - 1, axis2 - 1, Math.toRadians(angle));
    // TODO: check for unknown attributes.
    XMLNodeIterator iter = new XMLNodeIterator(cur.getFirstChild());
    while (iter.hasNext()) {
        Node child = iter.next();
        if (child.getNodeType() == Node.ELEMENT_NODE) {
            LOG.warning("Unknown element in XML specification file: " + child.getNodeName());
        }
    }
}
Also used : Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) XMLNodeIterator(de.lmu.ifi.dbs.elki.utilities.xml.XMLNodeIterator)

Example 32 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class GeneratorXMLDatabaseConnection method processElementCluster.

/**
 * Process a 'cluster' Element in the XML stream.
 *
 * @param gen Generator
 * @param cur Current document nod
 */
private void processElementCluster(GeneratorMain gen, Node cur) {
    int size = -1;
    double overweight = 1.0;
    String sizestr = ((Element) cur).getAttribute(ATTR_SIZE);
    if (sizestr != null && sizestr.length() > 0) {
        size = (int) (ParseUtil.parseIntBase10(sizestr) * sizescale);
    }
    String name = ((Element) cur).getAttribute(ATTR_NAME);
    String dcostr = ((Element) cur).getAttribute(ATTR_DENSITY);
    if (dcostr != null && dcostr.length() > 0) {
        overweight = ParseUtil.parseDouble(dcostr);
    }
    if (size < 0) {
        throw new AbortException("No valid cluster size given in specification file.");
    }
    if (name == null || name.length() == 0) {
        throw new AbortException("No cluster name given in specification file.");
    }
    // *** add new cluster object
    Random newRand = clusterRandom.getSingleThreadedRandom();
    GeneratorSingleCluster cluster = new GeneratorSingleCluster(name, size, overweight, newRand);
    // TODO: check for unknown attributes.
    XMLNodeIterator iter = new XMLNodeIterator(cur.getFirstChild());
    while (iter.hasNext()) {
        Node child = iter.next();
        if (TAG_UNIFORM.equals(child.getNodeName())) {
            processElementUniform(cluster, child);
        } else if (TAG_NORMAL.equals(child.getNodeName())) {
            processElementNormal(cluster, child);
        } else if (TAG_GAMMA.equals(child.getNodeName())) {
            processElementGamma(cluster, child);
        } else if (TAG_HALTON.equals(child.getNodeName())) {
            processElementHalton(cluster, child);
        } else if (TAG_ROTATE.equals(child.getNodeName())) {
            processElementRotate(cluster, child);
        } else if (TAG_TRANSLATE.equals(child.getNodeName())) {
            processElementTranslate(cluster, child);
        } else if (TAG_CLIP.equals(child.getNodeName())) {
            processElementClipping(cluster, child);
        } else if (child.getNodeType() == Node.ELEMENT_NODE) {
            LOG.warning("Unknown element in XML specification file: " + child.getNodeName());
        }
    }
    gen.addCluster(cluster);
}
Also used : Random(java.util.Random) Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) GeneratorSingleCluster(de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException) XMLNodeIterator(de.lmu.ifi.dbs.elki.utilities.xml.XMLNodeIterator)

Example 33 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class GeneratorMain method generate.

/**
 * Main loop to generate data set.
 *
 * @return Generated data set
 */
public MultipleObjectsBundle generate() {
    // we actually need some clusters.
    if (generators.isEmpty()) {
        throw new AbortException("No clusters specified.");
    }
    // Assert that cluster dimensions agree.
    final int dim = generators.get(0).getDim();
    for (GeneratorInterface c : generators) {
        if (c.getDim() != dim) {
            throw new AbortException("Cluster dimensions do not agree.");
        }
    }
    // Prepare result bundle
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dim);
    bundle.appendColumn(type, new ArrayList<>());
    bundle.appendColumn(TypeUtil.CLASSLABEL, new ArrayList<>());
    bundle.appendColumn(Model.TYPE, new ArrayList<Model>());
    // generate clusters
    ClassLabel[] labels = new ClassLabel[generators.size()];
    Model[] models = new Model[generators.size()];
    initLabelsAndModels(generators, labels, models, relabelClusters);
    final AssignPoint assignment;
    if (!testAgainstModel) {
        assignment = new AssignPoint();
    } else if (relabelClusters == null) {
        assignment = new TestModel();
    } else if (!relabelDistance) {
        assignment = new AssignLabelsByDensity(labels);
    } else {
        assignment = new AssignLabelsByDistance(labels);
    }
    for (int i = 0; i < labels.length; i++) {
        final GeneratorInterface curclus = generators.get(i);
        assignment.newCluster(i, curclus);
        // Only dynamic generators allow rejection / model testing:
        GeneratorInterfaceDynamic cursclus = (curclus instanceof GeneratorInterfaceDynamic) ? (GeneratorInterfaceDynamic) curclus : null;
        int kept = 0;
        while (kept < curclus.getSize()) {
            // generate the "missing" number of points
            List<double[]> newp = curclus.generate(curclus.getSize() - kept);
            for (double[] p : newp) {
                int bestc = assignment.getAssignment(i, p);
                if (bestc < 0) {
                    cursclus.incrementDiscarded();
                    continue;
                }
                bundle.appendSimple(DoubleVector.wrap(p), labels[bestc], models[bestc]);
                ++kept;
            }
        }
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleClassLabel(de.lmu.ifi.dbs.elki.data.SimpleClassLabel) ClassLabel(de.lmu.ifi.dbs.elki.data.ClassLabel) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) Model(de.lmu.ifi.dbs.elki.data.model.Model) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 34 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class GeneratorXMLSpec method writeClusters.

/**
 * Write the resulting clusters to an output stream.
 *
 * @param outStream output stream
 * @param data Generated data
 * @throws IOException thrown on write errors
 */
public void writeClusters(OutputStreamWriter outStream, MultipleObjectsBundle data) throws IOException {
    int modelcol = -1;
    {
        // Find model column
        for (int i = 0; i < data.metaLength(); i++) {
            if (Model.TYPE.isAssignableFromType(data.meta(i))) {
                modelcol = i;
                break;
            }
        }
    }
    if (modelcol < 0) {
        throw new AbortException("No model column found in bundle.");
    }
    ArrayList<Model> models = new ArrayList<>();
    Map<Model, IntArrayList> modelMap = new HashMap<>();
    {
        // Build a map from model to the actual objects
        for (int i = 0; i < data.dataLength(); i++) {
            Model model = (Model) data.data(i, modelcol);
            IntArrayList modelids = modelMap.get(model);
            if (modelids == null) {
                models.add(model);
                modelids = new IntArrayList();
                modelMap.put(model, modelids);
            }
            modelids.add(i);
        }
    }
    // compute global discard values
    int totalsize = 0, totaldisc = 0;
    for (Entry<Model, IntArrayList> ent : modelMap.entrySet()) {
        totalsize += ent.getValue().size();
        if (ent.getKey() instanceof GeneratorSingleCluster) {
            totaldisc += ((GeneratorSingleCluster) ent.getKey()).getDiscarded();
        }
    }
    double globdens = (double) (totalsize + totaldisc) / totalsize;
    outStream.append("########################################################").append(LINE_SEPARATOR);
    outStream.append("## Number of clusters: " + models.size()).append(LINE_SEPARATOR);
    for (Model model : models) {
        IntArrayList ids = modelMap.get(model);
        outStream.append("########################################################").append(LINE_SEPARATOR);
        outStream.append("## Size: " + ids.size()).append(LINE_SEPARATOR);
        if (model instanceof GeneratorSingleCluster) {
            GeneratorSingleCluster cursclus = (GeneratorSingleCluster) model;
            outStream.append("########################################################").append(LINE_SEPARATOR);
            outStream.append("## Cluster: ").append(cursclus.getName()).append(LINE_SEPARATOR);
            double[] cmin = cursclus.getClipmin();
            double[] cmax = cursclus.getClipmax();
            if (cmin != null && cmax != null) {
                // 
                outStream.append("## Clipping: ").append(FormatUtil.format(cmin)).append(" - ").append(FormatUtil.format(cmax)).append(LINE_SEPARATOR);
            }
            outStream.append("## Density correction factor: " + cursclus.getDensityCorrection()).append(LINE_SEPARATOR);
            outStream.append("## Generators:").append(LINE_SEPARATOR);
            for (int i = 0; i < cursclus.getDim(); i++) {
                Distribution gen = cursclus.getDistribution(i);
                outStream.append("##   ").append(gen.toString()).append(LINE_SEPARATOR);
            }
            if (cursclus.getTransformation() != null && cursclus.getTransformation().getTransformation() != null) {
                outStream.append("## Affine transformation matrix:").append(LINE_SEPARATOR);
                outStream.append(FormatUtil.format(cursclus.getTransformation().getTransformation(), "## ")).append(LINE_SEPARATOR);
            }
            outStream.append("## Discards: " + cursclus.getDiscarded() + " Retries left: " + cursclus.getRetries()).append(LINE_SEPARATOR);
            double corf = /* cursclus.overweight */
            (double) (cursclus.getSize() + cursclus.getDiscarded()) / cursclus.getSize() / globdens;
            outStream.append("## Density correction factor estimation: " + corf).append(LINE_SEPARATOR);
        }
        outStream.append("########################################################").append(LINE_SEPARATOR);
        for (IntIterator iter = ids.iterator(); iter.hasNext(); ) {
            int num = iter.nextInt();
            for (int c = 0; c < data.metaLength(); c++) {
                if (c != modelcol) {
                    if (c > 0) {
                        outStream.append(' ');
                    }
                    outStream.append(data.data(num, c).toString());
                }
            }
            outStream.append(LINE_SEPARATOR);
        }
    }
}
Also used : IntIterator(it.unimi.dsi.fastutil.ints.IntIterator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) GeneratorSingleCluster(de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster) Distribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution) Model(de.lmu.ifi.dbs.elki.data.model.Model) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 35 with AbortException

use of de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException in project elki by elki-project.

the class TrivialGeneratedOutlier method run.

/**
 * Run the algorithm
 *
 * @param models Model relation
 * @param vecs Vector relation
 * @param labels Label relation
 * @return Outlier result
 */
public OutlierResult run(Relation<Model> models, Relation<NumberVector> vecs, Relation<?> labels) {
    WritableDoubleDataStore scores = DataStoreUtil.makeDoubleStorage(models.getDBIDs(), DataStoreFactory.HINT_HOT);
    HashSet<GeneratorSingleCluster> generators = new HashSet<>();
    for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
        Model model = models.get(iditer);
        if (model instanceof GeneratorSingleCluster) {
            generators.add((GeneratorSingleCluster) model);
        }
    }
    if (generators.isEmpty()) {
        LOG.warning("No generator models found for dataset - all points will be considered outliers.");
    }
    for (GeneratorSingleCluster gen : generators) {
        for (int i = 0; i < gen.getDim(); i++) {
            Distribution dist = gen.getDistribution(i);
            if (!(dist instanceof NormalDistribution)) {
                throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
            }
        }
    }
    for (DBIDIter iditer = models.iterDBIDs(); iditer.valid(); iditer.advance()) {
        double score = 1.;
        double[] v = vecs.get(iditer).toArray();
        for (GeneratorSingleCluster gen : generators) {
            double[] tv = v;
            // Transform backwards
            if (gen.getTransformation() != null) {
                tv = gen.getTransformation().applyInverse(v);
            }
            final int dim = tv.length;
            double lensq = 0.0;
            int norm = 0;
            for (int i = 0; i < dim; i++) {
                Distribution dist = gen.getDistribution(i);
                if (dist instanceof NormalDistribution) {
                    NormalDistribution d = (NormalDistribution) dist;
                    double delta = (tv[i] - d.getMean()) / d.getStddev();
                    lensq += delta * delta;
                    norm += 1;
                } else {
                    throw new AbortException("TrivialGeneratedOutlier currently only supports normal distributions, got: " + dist);
                }
            }
            if (norm > 0.) {
                // The squared distances are ChiSquared distributed
                score = Math.min(score, ChiSquaredDistribution.cdf(lensq, norm));
            } else {
                score = 0.;
            }
        }
        if (expect < 1) {
            score = expect * score / (1 - score + expect);
        }
        scores.putDouble(iditer, score);
    }
    DoubleRelation scoreres = new MaterializedDoubleRelation("Model outlier scores", "model-outlier", scores, models.getDBIDs());
    OutlierScoreMeta meta = new ProbabilisticOutlierScore(0., 1.);
    return new OutlierResult(meta, scoreres);
}
Also used : WritableDoubleDataStore(de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore) GeneratorSingleCluster(de.lmu.ifi.dbs.elki.data.synthetic.bymodel.GeneratorSingleCluster) OutlierResult(de.lmu.ifi.dbs.elki.result.outlier.OutlierResult) ProbabilisticOutlierScore(de.lmu.ifi.dbs.elki.result.outlier.ProbabilisticOutlierScore) DoubleRelation(de.lmu.ifi.dbs.elki.database.relation.DoubleRelation) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) OutlierScoreMeta(de.lmu.ifi.dbs.elki.result.outlier.OutlierScoreMeta) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) Distribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution) NormalDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.NormalDistribution) ChiSquaredDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.ChiSquaredDistribution) Model(de.lmu.ifi.dbs.elki.data.model.Model) MaterializedDoubleRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation) HashSet(java.util.HashSet) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)99 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)25 IOException (java.io.IOException)24 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)22 ArrayList (java.util.ArrayList)16 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)13 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)13 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)10 DBIDArrayIter (de.lmu.ifi.dbs.elki.database.ids.DBIDArrayIter)9 DoubleRelation (de.lmu.ifi.dbs.elki.database.relation.DoubleRelation)9 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)8 Model (de.lmu.ifi.dbs.elki.data.model.Model)8 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)8 Database (de.lmu.ifi.dbs.elki.database.Database)8 WritableDoubleDataStore (de.lmu.ifi.dbs.elki.database.datastore.WritableDoubleDataStore)8 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)8 OutlierResult (de.lmu.ifi.dbs.elki.result.outlier.OutlierResult)8 MaterializedDoubleRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedDoubleRelation)6 ClassLabel (de.lmu.ifi.dbs.elki.data.ClassLabel)5 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)5