Search in sources :

Example 1 with SimpleTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.

the class ArffParser method setupBundleHeaders.

/**
 * Setup the headers for the object bundle.
 *
 * @param names Attribute names
 * @param targ Target columns
 * @param etyp ELKI type information
 * @param dimsize Number of dimensions in the individual types
 * @param bundle Output bundle
 * @param sparse Flag to create sparse vectors
 */
private void setupBundleHeaders(ArrayList<String> names, int[] targ, TypeInformation[] etyp, int[] dimsize, MultipleObjectsBundle bundle, boolean sparse) {
    for (int in = 0, out = 0; in < targ.length; out++) {
        int nin = in + 1;
        for (; nin < targ.length; nin++) {
            if (targ[nin] != targ[in]) {
                break;
            }
        }
        if (TypeUtil.NUMBER_VECTOR_FIELD.equals(etyp[out])) {
            String[] labels = new String[dimsize[out]];
            // Collect labels:
            for (int i = 0; i < dimsize[out]; i++) {
                labels[i] = names.get(out + i);
            }
            if (!sparse) {
                VectorFieldTypeInformation<DoubleVector> type = new VectorFieldTypeInformation<>(DoubleVector.FACTORY, dimsize[out], labels);
                bundle.appendColumn(type, new ArrayList<DoubleVector>());
            } else {
                VectorFieldTypeInformation<SparseDoubleVector> type = new VectorFieldTypeInformation<>(SparseDoubleVector.FACTORY, dimsize[out], labels);
                bundle.appendColumn(type, new ArrayList<SparseDoubleVector>());
            }
        } else if (TypeUtil.LABELLIST.equals(etyp[out])) {
            StringBuilder label = new StringBuilder(names.get(out));
            for (int i = 1; i < dimsize[out]; i++) {
                label.append(' ').append(names.get(out + i));
            }
            bundle.appendColumn(new SimpleTypeInformation<>(LabelList.class, label.toString()), new ArrayList<LabelList>());
        } else if (TypeUtil.EXTERNALID.equals(etyp[out])) {
            bundle.appendColumn(new SimpleTypeInformation<>(ExternalID.class, names.get(out)), new ArrayList<ExternalID>());
        } else if (TypeUtil.CLASSLABEL.equals(etyp[out])) {
            bundle.appendColumn(new SimpleTypeInformation<>(ClassLabel.class, names.get(out)), new ArrayList<ClassLabel>());
        } else {
            throw new AbortException("Unsupported type for column " + in + "->" + out + ": " + ((etyp[out] != null) ? etyp[out].toString() : "null"));
        }
        assert (out == bundle.metaLength() - 1);
        in = nin;
    }
}
Also used : ExternalID(de.lmu.ifi.dbs.elki.data.ExternalID) ArrayList(java.util.ArrayList) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) SparseDoubleVector(de.lmu.ifi.dbs.elki.data.SparseDoubleVector) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) DoubleVector(de.lmu.ifi.dbs.elki.data.DoubleVector) SparseDoubleVector(de.lmu.ifi.dbs.elki.data.SparseDoubleVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Example 2 with SimpleTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.

the class CASH method buildDB.

/**
 * Builds a dim-1 dimensional database where the objects are projected into
 * the specified subspace.
 *
 * @param dim the dimensionality of the database
 * @param basis the basis defining the subspace
 * @param ids the ids for the new database
 * @param relation the database storing the parameterization functions
 * @return a dim-1 dimensional database where the objects are projected into
 *         the specified subspace
 */
private MaterializedRelation<ParameterizationFunction> buildDB(int dim, double[][] basis, DBIDs ids, Relation<ParameterizationFunction> relation) {
    ProxyDatabase proxy = new ProxyDatabase(ids);
    SimpleTypeInformation<ParameterizationFunction> type = new SimpleTypeInformation<>(ParameterizationFunction.class);
    WritableDataStore<ParameterizationFunction> prep = DataStoreUtil.makeStorage(ids, DataStoreFactory.HINT_HOT, ParameterizationFunction.class);
    // Project
    for (DBIDIter iter = ids.iter(); iter.valid(); iter.advance()) {
        prep.put(iter, project(basis, relation.get(iter)));
    }
    if (LOG.isDebugging()) {
        LOG.debugFine("db fuer dim " + (dim - 1) + ": " + ids.size());
    }
    MaterializedRelation<ParameterizationFunction> prel = new MaterializedRelation<>(type, ids, null, prep);
    proxy.addRelation(prel);
    return prel;
}
Also used : ProxyDatabase(de.lmu.ifi.dbs.elki.database.ProxyDatabase) ParameterizationFunction(de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.ParameterizationFunction) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) DBIDIter(de.lmu.ifi.dbs.elki.database.ids.DBIDIter) MaterializedRelation(de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)

Example 3 with SimpleTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.

the class AttributeWiseBetaNormalization method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    if (objects.dataLength() == 0) {
        return objects;
    }
    for (int r = 0; r < objects.metaLength(); r++) {
        SimpleTypeInformation<?> type = (SimpleTypeInformation<?>) objects.meta(r);
        final List<?> column = (List<?>) objects.getColumn(r);
        if (!TypeUtil.NUMBER_VECTOR_FIELD.isAssignableFromType(type)) {
            continue;
        }
        @SuppressWarnings("unchecked") final List<V> castColumn = (List<V>) column;
        // Get the replacement type information
        @SuppressWarnings("unchecked") final VectorFieldTypeInformation<V> castType = (VectorFieldTypeInformation<V>) type;
        factory = FilterUtil.guessFactory(castType);
        // Scan to find the best
        final int dim = castType.getDimensionality();
        dists = new ArrayList<>(dim);
        // Scratch space for testing:
        double[] test = new double[castColumn.size()];
        // We iterate over dimensions, this kind of filter needs fast random
        // access.
        Adapter adapter = new Adapter();
        for (int d = 0; d < dim; d++) {
            adapter.dim = d;
            Distribution dist = findBestFit(castColumn, adapter, d, test);
            if (LOG.isVerbose()) {
                LOG.verbose("Best fit for dimension " + d + ": " + dist.toString());
            }
            dists.add(dist);
        }
        // Beta distribution for projection
        double p = FastMath.pow(alpha, -1 / FastMath.sqrt(dim));
        BetaDistribution beta = new BetaDistribution(p, p);
        // Normalization scan
        double[] buf = new double[dim];
        for (int i = 0; i < objects.dataLength(); i++) {
            final V obj = castColumn.get(i);
            for (int d = 0; d < dim; d++) {
                // TODO: when available, use logspace for better numerical precision!
                buf[d] = beta.quantile(dists.get(d).cdf(obj.doubleValue(d)));
            }
            castColumn.set(i, factory.newNumberVector(buf));
        }
    }
    return objects;
}
Also used : SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) BetaDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.BetaDistribution) VectorFieldTypeInformation(de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation) BetaDistribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.BetaDistribution) Distribution(de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution) ArrayList(java.util.ArrayList) List(java.util.List)

Example 4 with SimpleTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.

the class ClassicMultidimensionalScalingTransform method filter.

@Override
public MultipleObjectsBundle filter(MultipleObjectsBundle objects) {
    final int size = objects.dataLength();
    if (size == 0) {
        return objects;
    }
    MultipleObjectsBundle bundle = new MultipleObjectsBundle();
    for (int r = 0; r < objects.metaLength(); r++) {
        @SuppressWarnings("unchecked") SimpleTypeInformation<Object> type = (SimpleTypeInformation<Object>) objects.meta(r);
        @SuppressWarnings("unchecked") final List<Object> column = (List<Object>) objects.getColumn(r);
        if (!dist.getInputTypeRestriction().isAssignableFromType(type)) {
            bundle.appendColumn(type, column);
            continue;
        }
        // Get the replacement type information
        @SuppressWarnings("unchecked") final List<I> castColumn = (List<I>) column;
        bundle.appendColumn(new VectorFieldTypeInformation<>(factory, tdim), castColumn);
        StepProgress prog = LOG.isVerbose() ? new StepProgress("Classic MDS", 2) : null;
        // Compute distance matrix.
        LOG.beginStep(prog, 1, "Computing distance matrix");
        double[][] mat = computeSquaredDistanceMatrix(castColumn, dist);
        doubleCenterSymmetric(mat);
        // Find eigenvectors.
        {
            LOG.beginStep(prog, 2, "Computing singular value decomposition");
            SingularValueDecomposition svd = new SingularValueDecomposition(mat);
            double[][] u = svd.getU();
            double[] lambda = svd.getSingularValues();
            // Undo squared, unless we were given a squared distance function:
            if (!dist.isSquared()) {
                for (int i = 0; i < tdim; i++) {
                    lambda[i] = FastMath.sqrt(Math.abs(lambda[i]));
                }
            }
            double[] buf = new double[tdim];
            for (int i = 0; i < size; i++) {
                double[] row = u[i];
                for (int x = 0; x < buf.length; x++) {
                    buf[x] = lambda[x] * row[x];
                }
                column.set(i, factory.newNumberVector(buf));
            }
        }
        LOG.setCompleted(prog);
    }
    return bundle;
}
Also used : MultipleObjectsBundle(de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) StepProgress(de.lmu.ifi.dbs.elki.logging.progress.StepProgress) List(java.util.List) SingularValueDecomposition(de.lmu.ifi.dbs.elki.math.linearalgebra.SingularValueDecomposition)

Example 5 with SimpleTypeInformation

use of de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation in project elki by elki-project.

the class EvaluatePrecomputedOutlierScores method run.

@Override
public void run() {
    try (// 
    FileInputStream fis = new FileInputStream(infile);
        // 
        InputStream is = new BufferedInputStream(FileUtil.tryGzipInput(fis));
        FileOutputStream fosResult = new FileOutputStream(outfile, true);
        PrintStream fout = new PrintStream(fosResult);
        FileChannel chan = fosResult.getChannel()) {
        // Setup the input stream.
        parser.initStream(is);
        // Lock the output file:
        chan.lock();
        if (chan.position() == 0L) {
            writeHeader(fout);
        } else {
            LOG.info("Appending to existing output " + outfile);
        }
        int lcol = -1, dcol = -1;
        loop: while (true) {
            BundleStreamSource.Event ev = parser.nextEvent();
            switch(ev) {
                case END_OF_STREAM:
                    break loop;
                case META_CHANGED:
                    BundleMeta meta = parser.getMeta();
                    lcol = -1;
                    dcol = -1;
                    for (int i = 0; i < meta.size(); i++) {
                        SimpleTypeInformation<?> m = meta.get(i);
                        if (TypeUtil.NUMBER_VECTOR_VARIABLE_LENGTH.isAssignableFromType(m)) {
                            if (dcol >= 0) {
                                throw new AbortException("More than one vector column.");
                            }
                            dcol = i;
                        } else if (TypeUtil.GUESSED_LABEL.isAssignableFromType(m)) {
                            if (lcol >= 0) {
                                throw new AbortException("More than one label column.");
                            }
                            lcol = i;
                        } else {
                            throw new AbortException("Unexpected data column type: " + m);
                        }
                    }
                    break;
                case NEXT_OBJECT:
                    if (lcol < 0) {
                        throw new AbortException("No label column available.");
                    }
                    if (dcol < 0) {
                        throw new AbortException("No vector column available.");
                    }
                    processRow(fout, (NumberVector) parser.data(dcol), parser.data(lcol).toString());
                    break;
            }
        }
    } catch (IOException e) {
        throw new AbortException("IO error.", e);
    }
}
Also used : BundleMeta(de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta) FileChannel(java.nio.channels.FileChannel) SimpleTypeInformation(de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation) NumberVector(de.lmu.ifi.dbs.elki.data.NumberVector) AbortException(de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)

Aggregations

SimpleTypeInformation (de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation)15 List (java.util.List)6 VectorFieldTypeInformation (de.lmu.ifi.dbs.elki.data.type.VectorFieldTypeInformation)5 MaterializedRelation (de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation)4 MultipleObjectsBundle (de.lmu.ifi.dbs.elki.datasource.bundle.MultipleObjectsBundle)4 AbortException (de.lmu.ifi.dbs.elki.utilities.exceptions.AbortException)4 ArrayList (java.util.ArrayList)4 DBIDIter (de.lmu.ifi.dbs.elki.database.ids.DBIDIter)3 DBIDs (de.lmu.ifi.dbs.elki.database.ids.DBIDs)3 FiniteProgress (de.lmu.ifi.dbs.elki.logging.progress.FiniteProgress)3 ParameterizationFunction (de.lmu.ifi.dbs.elki.algorithm.clustering.correlation.cash.ParameterizationFunction)2 NumberVector (de.lmu.ifi.dbs.elki.data.NumberVector)2 ModifiableDBIDs (de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs)2 Relation (de.lmu.ifi.dbs.elki.database.relation.Relation)2 BundleMeta (de.lmu.ifi.dbs.elki.datasource.bundle.BundleMeta)2 Index (de.lmu.ifi.dbs.elki.index.Index)2 IndexFactory (de.lmu.ifi.dbs.elki.index.IndexFactory)2 Distribution (de.lmu.ifi.dbs.elki.math.statistics.distribution.Distribution)2 DoubleVector (de.lmu.ifi.dbs.elki.data.DoubleVector)1 ExternalID (de.lmu.ifi.dbs.elki.data.ExternalID)1