Search in sources :

Example 1 with TypeInformation

use of de.lmu.ifi.dbs.elki.data.type.TypeInformation in project elki by elki-project.

the class EvaluateDBCV method processNewResult.

@Override
public void processNewResult(ResultHierarchy hier, Result newResult) {
    List<Clustering<?>> crs = Clustering.getClusteringResults(newResult);
    if (crs.size() < 1) {
        return;
    }
    Database db = ResultUtil.findDatabase(hier);
    TypeInformation typ = new CombinedTypeInformation(this.distanceFunction.getInputTypeRestriction(), TypeUtil.NUMBER_VECTOR_FIELD);
    Relation<O> rel = db.getRelation(typ);
    if (rel != null) {
        for (Clustering<?> cl : crs) {
            evaluateClustering(db, rel, cl);
        }
    }
}
Also used : CombinedTypeInformation(de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation) Database(de.lmu.ifi.dbs.elki.database.Database) Clustering(de.lmu.ifi.dbs.elki.data.Clustering) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) CombinedTypeInformation(de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation)

Example 2 with TypeInformation

use of de.lmu.ifi.dbs.elki.data.type.TypeInformation in project elki by elki-project.

the class ArffParser method parse.

@Override
public MultipleObjectsBundle parse(InputStream instream) {
    try (InputStreamReader ir = new InputStreamReader(instream);
        BufferedReader br = new BufferedReader(ir)) {
        ArrayList<String> names = new ArrayList<>(), types = new ArrayList<>();
        readHeader(br);
        parseAttributeStatements(br, names, types);
        // Convert into column mapping. Prepare arrays to fill
        int[] targ = new int[names.size()];
        TypeInformation[] elkitypes = new TypeInformation[names.size()];
        int[] dimsize = new int[names.size()];
        processColumnTypes(names, types, targ, elkitypes, dimsize);
        // Prepare bundle:
        // This is a bit complicated to produce vector fields.
        MultipleObjectsBundle bundle = new MultipleObjectsBundle();
        StreamTokenizer tokenizer = makeArffTokenizer(br);
        int state = 0;
        nextToken(tokenizer);
        while (tokenizer.ttype != StreamTokenizer.TT_EOF) {
            // Parse instance
            if (tokenizer.ttype == StreamTokenizer.TT_EOL) {
            // ignore empty lines
            } else if (tokenizer.ttype != '{') {
                if (state == 0) {
                    setupBundleHeaders(names, targ, elkitypes, dimsize, bundle, false);
                    // dense
                    state = 1;
                } else if (state != 1) {
                    throw new AbortException("Mixing dense and sparse vectors is currently not allowed.");
                }
                // Load a dense instance
                bundle.appendSimple(loadDenseInstance(tokenizer, dimsize, elkitypes, bundle.metaLength()));
            } else {
                if (state == 0) {
                    setupBundleHeaders(names, targ, elkitypes, dimsize, bundle, true);
                    // sparse
                    state = 2;
                } else if (state != 2) {
                    throw new AbortException("Mixing dense and sparse vectors is currently not allowed.");
                }
                bundle.appendSimple(loadSparseInstance(tokenizer, targ, dimsize, elkitypes, bundle.metaLength()));
            }
            nextToken(tokenizer);
        }
        return bundle;
    } catch (IOException e) {
        throw new AbortException("IO error in parser", e);
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) IOException(java.io.IOException) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) BufferedReader(java.io.BufferedReader) StreamTokenizer(java.io.StreamTokenizer) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 3 with TypeInformation

use of de.lmu.ifi.dbs.elki.data.type.TypeInformation in project elki by elki-project.

the class SilhouetteOutlierDetection method getInputTypeRestriction.

@Override
public TypeInformation[] getInputTypeRestriction() {
    final TypeInformation dt = getDistanceFunction().getInputTypeRestriction();
    TypeInformation[] t = clusterer.getInputTypeRestriction();
    for (TypeInformation i : t) {
        if (dt.isAssignableFromType(i)) {
            return t;
        }
    }
    // Prepend distance type:
    TypeInformation[] t2 = new TypeInformation[t.length + 1];
    t2[0] = dt;
    System.arraycopy(t, 0, t2, 1, t.length);
    return t2;
}
Also used : TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation)

Example 4 with TypeInformation

use of de.lmu.ifi.dbs.elki.data.type.TypeInformation in project elki by elki-project.

the class KNNBenchmarkAlgorithm method run.

/**
 * Run the algorithm.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    KNNQuery<O> knnQuery = database.getKNNQuery(distQuery, k);
    // No query set - use original database.
    if (queries == null) {
        final DBIDs sample = DBIDUtil.randomSample(relation.getDBIDs(), sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            KNNList knns = knnQuery.getKNNForDBID(iditer, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    } else {
        // Separate query set.
        TypeInformation res = getDistanceFunction().getInputTypeRestriction();
        MultipleObjectsBundle bundle = queries.loadData();
        int col = -1;
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (res.isAssignableFromType(bundle.meta(i))) {
                col = i;
                break;
            }
        }
        if (col < 0) {
            throw new IncompatibleDataException("No compatible data type in query input was found. Expected: " + res.toString());
        }
        // Random sampling is a bit of hack, sorry.
        // But currently, we don't (yet) have an "integer random sample" function.
        DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
        final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
        FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
        int hash = 0;
        MeanVariance mv = new MeanVariance(), mvdist = new MeanVariance();
        for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
            int off = sids.binarySearch(iditer);
            assert (off >= 0);
            @SuppressWarnings("unchecked") O o = (O) bundle.data(off, col);
            KNNList knns = knnQuery.getKNNForObject(o, k);
            int ichecksum = 0;
            for (DBIDIter it = knns.iter(); it.valid(); it.advance()) {
                ichecksum += DBIDUtil.asInteger(it);
            }
            hash = Util.mixHashCodes(hash, ichecksum);
            mv.put(knns.size());
            mvdist.put(knns.getKNNDistance());
            LOG.incrementProcessed(prog);
        }
        LOG.ensureCompleted(prog);
        if (LOG.isStatistics()) {
            LOG.statistics("Result hashcode: " + hash);
            LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
            if (mvdist.getCount() > 0) {
                LOG.statistics("Mean k-distance: " + mvdist.getMean() + " +- " + mvdist.getNaiveStddev());
            }
        }
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) KNNList(de.lmu.ifi.dbs.elki.database.ids.KNNList) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange)

Example 5 with TypeInformation

use of de.lmu.ifi.dbs.elki.data.type.TypeInformation in project elki by elki-project.

the class RangeQueryBenchmarkAlgorithm method run.

/**
 * Run the algorithm, with a separate query set.
 *
 * @param database Database
 * @param relation Relation
 * @return Null result
 */
public Result run(Database database, Relation<O> relation) {
    if (queries == null) {
        throw new AbortException("A query set is required for this 'run' method.");
    }
    // Get a distance and kNN query instance.
    DistanceQuery<O> distQuery = database.getDistanceQuery(relation, getDistanceFunction());
    RangeQuery<O> rangeQuery = database.getRangeQuery(distQuery);
    NumberVector.Factory<O> ofactory = RelationUtil.getNumberVectorFactory(relation);
    int dim = RelationUtil.dimensionality(relation);
    // Separate query set.
    TypeInformation res = VectorFieldTypeInformation.typeRequest(NumberVector.class, dim + 1, dim + 1);
    MultipleObjectsBundle bundle = queries.loadData();
    int col = -1;
    for (int i = 0; i < bundle.metaLength(); i++) {
        if (res.isAssignableFromType(bundle.meta(i))) {
            col = i;
            break;
        }
    }
    if (col < 0) {
        StringBuilder buf = new StringBuilder();
        buf.append("No compatible data type in query input was found. Expected: ");
        buf.append(res.toString());
        buf.append(" have: ");
        for (int i = 0; i < bundle.metaLength(); i++) {
            if (i > 0) {
                buf.append(' ');
            }
            buf.append(bundle.meta(i).toString());
        }
        throw new IncompatibleDataException(buf.toString());
    }
    // Random sampling is a bit of hack, sorry.
    // But currently, we don't (yet) have an "integer random sample" function.
    DBIDRange sids = DBIDUtil.generateStaticDBIDRange(bundle.dataLength());
    final DBIDs sample = DBIDUtil.randomSample(sids, sampling, random);
    FiniteProgress prog = LOG.isVeryVerbose() ? new FiniteProgress("kNN queries", sample.size(), LOG) : null;
    int hash = 0;
    MeanVariance mv = new MeanVariance();
    double[] buf = new double[dim];
    for (DBIDIter iditer = sample.iter(); iditer.valid(); iditer.advance()) {
        int off = sids.binarySearch(iditer);
        assert (off >= 0);
        NumberVector o = (NumberVector) bundle.data(off, col);
        for (int i = 0; i < dim; i++) {
            buf[i] = o.doubleValue(i);
        }
        O v = ofactory.newNumberVector(buf);
        double r = o.doubleValue(dim);
        DoubleDBIDList rres = rangeQuery.getRangeForObject(v, r);
        int ichecksum = 0;
        for (DBIDIter it = rres.iter(); it.valid(); it.advance()) {
            ichecksum += DBIDUtil.asInteger(it);
        }
        hash = Util.mixHashCodes(hash, ichecksum);
        mv.put(rres.size());
        LOG.incrementProcessed(prog);
    }
    LOG.ensureCompleted(prog);
    if (LOG.isStatistics()) {
        LOG.statistics("Result hashcode: " + hash);
        LOG.statistics("Mean number of results: " + mv.getMean() + " +- " + mv.getNaiveStddev());
    }
    return null;
}
Also used : DBIDs(de.lmu.ifi.dbs.elki.database.ids.DBIDs) FiniteProgress(de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress) MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) TypeInformation(de.lmu.ifi.dbs.elki.data.type.TypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MeanVariance(de.lmu.ifi.dbs.elki.math.MeanVariance) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) IncompatibleDataException(de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException) DBIDRange(de.lmu.ifi.dbs.elki.database.ids.DBIDRange) DoubleDBIDList(de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

TypeInformation (de.lmu.ifi.dbs.elki.data.type.TypeInformation)6 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)3 DBIDRange (de.lmu.ifi.dbs.elki.database.ids.DBIDRange)3 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)3 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)3 MeanVariance (de.lmu.ifi.dbs.elki.math.MeanVariance)3 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)3 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)2 KNNList (de.lmu.ifi.dbs.elki.database.ids.KNNList)2 IncompatibleDataException (de.lmu.ifi.dbs.elki.utilities.exceptions.IncompatibleDataException)2 Clustering (de.lmu.ifi.dbs.elki.data.Clustering)1 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)1 CombinedTypeInformation (de.lmu.ifi.dbs.elki.data.type.CombinedTypeInformation)1 SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)1 Database (de.lmu.ifi.dbs.elki.database.Database)1 DoubleDBIDList (de.lmu.ifi.dbs.elki.database.ids.DoubleDBIDList)1 LinearScanQuery (de.lmu.ifi.dbs.elki.database.query.LinearScanQuery)1 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1